In [4]:
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.agents.dqn import DQNAgent
from keras.layers import Dense, Flatten,Lambda
import tensorflow as tf
import numpy as np
import random
import pygame
import gym


class Env(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.MultiDiscrete([39, 27])
        self.screen = pygame.display.set_mode((800, 600))
        self.PlayerX = 0
        self.PlayerY = 0
        self.FoodX = 0
        self.FoodY = 0
        self.state = [self.FoodX - self.PlayerX + 19, self.FoodY - self.PlayerY + 14]
        self.timeLimit = 1000

    def render(self, mode="human"):
        self.screen.fill((0, 0, 0))
        pygame.draw.rect(self.screen, (255, 255, 255), pygame.Rect(self.PlayerX * 40, self.PlayerY * 40, 40, 40))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.FoodX * 40, self.FoodY * 40, 40, 40))
        pygame.display.update()

    def reset(self):
        self.FoodX = random.randint(1, 19)
        self.FoodY = random.randint(1, 14)
        self.PlayerX = 0
        self.PlayerY = 0
        self.timeLimit = 1000
        return self.state

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY += 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX += 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward += 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done, {}


env = Env()

states = env.observation_space.shape
actions = env.action_space.n





def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=(1, states[0])))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    model.add(Lambda(tf.math.argmax, output_shape=1 ))
    
    return model

def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn


model = build_model(states, actions)
DQN = buildAgent(model, actions)

DQN.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
DQN.fit(env, nb_steps=50000, visualize=False, verbose=1)
scores = DQN.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))
pygame.quit()
model.save('model.h5')

  and should_run_async(code)


TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.

In [2]:
!pip show keras-rl2

Name: keras-rl2
Version: 1.0.5
Summary: Deep Reinforcement Learning for Tensorflow 2 Keras
Home-page: https://github.com/wau/keras-rl2
Author: Taylor McNally
Author-email: None
License: MIT
Location: c:\programdata\anaconda3\lib\site-packages
Requires: tensorflow
Required-by: 
