In [7]:
#imports
import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation, Convolution2D, Permute
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint, Visualizer, TrainIntervalLogger, TestLogger

In [8]:
# makes the enviroment
env = gym.make('SpaceInvaders-ram-v4')
# old rom name: 'SpaceInvaders-ram-v4' or 'Breakout-ram-v4'

nb_actions = env.action_space.n
nb_obs = env.observation_space.shape

print("actions:", nb_actions, "   observations:", nb_obs)

file_name = "test_1"
window_size = 4
NB_STEPS = 5000000
NB_STEPS_POL = 1000000

actions: 6    observations: (128,)


In [9]:
# callbacks
#file logger
logger = FileLogger(f'training_logs_{file_name}.txt', interval=1) 

# saver callback
weights_filename = f"model/{file_name}_weights.h5f"
checkpoint_filename = f"model/{file_name}_checkpoint.h5f"
checkpoint_callback = ModelIntervalCheckpoint(checkpoint_filename,interval=100000)

In [10]:
# create the neural network model
model = Sequential()
model.add(Flatten(input_shape=(window_size,) + nb_obs))

model.add(Dense(256))
model.add(Activation('relu'))

model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dense(32))
model.add(Activation('relu'))

model.add(Dense(16))
model.add(Activation('relu'))

model.add(Dense(8))
model.add(Activation('relu'))


model.add(Dense(nb_actions))
model.add(Activation('linear'))

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
activation_7 (Activation)    (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               32896     
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 64)                8256      
_________________________________________________________________
activation_9 (Activation)    (None, 64)               

In [11]:
# setup the agent

# use this line if training is starting from a checkpoint
# model.load_weights("model/test_1_checkpoint.h5f")
# # or from the actual thing
# model.load_weights("model/test_1_weights.h5f")

# setup the memory buffer
memory = SequentialMemory(limit=1000000,window_length=window_size)

# create the policy
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                              attr='eps',
                              value_max=.3,
                              value_min=.1,
                              value_test=.05,
                              nb_steps=1000000) 
# create the agent
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100000,
               target_model_update=10000, policy=policy, gamma=0.99) #removed batch size thing, maybe add back later if its actually important ig idk

In [12]:
# compile fit and evaluate teh agent
dqn.compile(Adam(lr=1e-3), metrics=['mae']) 
train_history = dqn.fit(env, nb_steps=NB_STEPS,callbacks=[logger,checkpoint_callback], visualize=False, verbose=2)

# save the weights
dqn.save_weights(f'model/{file_name}_weights.h5f', overwrite=True)

Training for 5000000 steps ...
     846/5000000: episode: 1, duration: 1.904s, episode steps: 846, steps per second: 444, episode reward: 270.000, mean reward:  0.319 [ 0.000, 30.000], mean action: 3.559 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
    1522/5000000: episode: 2, duration: 1.609s, episode steps: 676, steps per second: 420, episode reward: 245.000, mean reward:  0.362 [ 0.000, 30.000], mean action: 3.629 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
    2449/5000000: episode: 3, duration: 2.019s, episode steps: 927, steps per second: 459, episode reward: 310.000, mean reward:  0.334 [ 0.000, 30.000], mean action: 3.576 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
    3079/5000000: episode: 4, duration: 1.459s, episode steps: 630, steps per second: 432, episode reward: 185.000, mean reward:  0.294 [ 0.000, 30.000], mean action: 3.605 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
    3704/5000000: episode: 5, dur

In [None]:
#test the agent
dqn.compile(Adam(lr=1e-3), metrics=['mae']) 
dqn.test(env, nb_episodes=5, visualize=True)
env.close()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# get the text file
file = open(f"training_logs_{file_name}.txt").readlines()
# its a list so just get the first and only text output
text = file[0]

# list of things in the file to remove such that i will be left with lists then loop to delete them
char_del = ['{"loss": ',', "mae": ',', "mean_q": ',', "mean_eps": ',', "episode_reward": ',', "nb_episode_steps": ',', "nb_steps": ',', "episode": ',', "duration": ','}']
textProc = []
for i in range(10):
    text = text.replace(char_del[i],"")
#print(textProc)
text = text.replace("[","")
text = text.replace("NaN","0")

# this turns it from a text thing to a list
textProc = text.split("]")[:-1]
for i in range(len(textProc)):
    textProc[i] = textProc[i].split(",")

# list of the data columns
cols = ["loss", "mae", "mean_q", "mean_eps", "episode_reward", "nb_episode_steps", "nb_steps", "episode", "duration"]

df = pd.DataFrame()
for i in range(len(textProc)):
    df[cols[i]] = textProc[i]

df.to_csv(f"{file_name}.csv")