In [1]:
import matplotlib.pyplot as plt
import gym
import csv
import random
import logging
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
import wandb
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from keras.callbacks import CSVLogger

In [2]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8, 8), strides=(4, 4),
              activation='relu', input_shape=(3, height, width, channels)))
    model.add(Convolution2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [3]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(
    ), attr='eps', value_max=1, value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup = 10000, )
    return dqn

In [4]:
env = gym.make("Freeway-v0")
env = gym.wrappers.ResizeObservation(env, (88, 88))
height, width, channels = env.observation_space.shape
actions = env.action_space.n
model = build_model(height, width, channels, actions)

In [5]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 21, 21, 32)     6176      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 9, 9, 64)       32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 7, 7, 64)       36928     
_________________________________________________________________
flatten (Flatten)            (None, 9408)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               4817408   
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 7

In [6]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=0.001))
dqn.fit(env, nb_steps=10000, visualize=True, verbose=2)

Training for 10000 steps ...
 2745/10000: episode: 1, duration: 21.807s, episode steps: 2745, steps per second: 126, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 0.861 [0.000, 2.000],  loss: --, mean_q: --, mean_eps: --
 5491/10000: episode: 2, duration: 21.537s, episode steps: 2746, steps per second: 128, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 0.629 [0.000, 2.000],  loss: --, mean_q: --, mean_eps: --
 8242/10000: episode: 3, duration: 21.439s, episode steps: 2751, steps per second: 128, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 0.393 [0.000, 2.000],  loss: --, mean_q: --, mean_eps: --
done, took 78.491 seconds


<tensorflow.python.keras.callbacks.History at 0x7f4064255f10>

In [None]:
scores = dqn.test(env, nb_episodes = 10, visualize=True)
print(np.mean(scores.history['episode_reward']))

In [8]:
env.close()

In [20]:
import numpy as np
myarray = np.fromfile('temp.h5f.data-00000-of-00001', dtype=float)
print(myarray)

[ 4.28302234e-270  5.86647438e+025  5.32304524e+006  6.34904233e+025
  2.21253796e+040 -1.76973233e+053  8.56384747e-041  6.58231311e+180
  1.01273734e+267  1.27171878e-076  9.89113690e+005  9.83245612e+025
  1.48489143e+011  1.02990189e+026  2.23825620e-154  2.28393623e-053
  9.35203400e+207  9.89110198e+005  9.83245612e+025  1.48489143e+011
  1.02990189e+026  5.68056423e-270  4.69885250e-294  3.07988422e-300
  1.01444746e-211  3.16199836e-086  1.81635207e+098  3.31891467e+006
  7.91056123e+102  4.27563924e-270  1.23520445e-296  4.17478251e-149
  3.15925543e-086  1.81635207e+098  3.31891467e+006  7.91056123e+102
  4.27568256e-270  4.27563910e-270 -3.38186250e+007  4.56185909e-172
  8.98216983e+164  1.73054346e+098  1.01578389e-080  6.83256069e+025
  1.10601002e-303 -4.51971526e-308  8.29251135e+133  1.14254956e+243
  1.73054346e+098  1.01578389e-080  6.83256069e+025  1.10601002e-303
  1.07582693e-298  1.09005920e-298 -6.68582943e-308  1.13173997e-216
  6.08704465e+247  2.17510881e+011