In [1]:
import time
import numpy as np
from numpy import radians
import random
from ipywidgets import IntProgress
from IPython.display import display
from lib.cartpolesystem import CartPoleSystem
from lib.colors import Colors
from lib.cartpolesenv import CartPolesEnv
import rl
from rl.memory import SequentialMemory
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
import matplotlib.pyplot as plt
from keras import Sequential
from keras.layers import Input, Flatten, Dense
from keras.optimizers import Adam
from sklearn.preprocessing import KBinsDiscretizer

In [2]:
dt = 0.01
g = 9.81

system = CartPoleSystem(
    (0.0, 0.5, 0.05, -0.8, 0.8, Colors.red),
    (0.05, 0.05, 0.01, 0.5, 0.05, -24.0, 24.0, Colors.black),
    [
        (radians(10), 0.2, 0.2, 0.005, Colors.green),
    ],
    g,
    "rk4"
)

env = CartPolesEnv(system, dt, g)
print("Obs", env.observation_space.shape)
print("Actions", env.action_space.shape)

Obs (5,)
Actions (1,)


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [3]:
# setup experience replay buffer
memory = SequentialMemory(limit=50000, window_length=1)

# setup the Linear annealed policy with the EpsGreedyQPolicy as the inner policy
policy =  LinearAnnealedPolicy(inner_policy=  EpsGreedyQPolicy(),   # policy used to select actions
                               attr='eps',                          # attribute in the inner policy to vary             
                               value_max=1.0,                       # maximum value of attribute that is varying
                               value_min=0.1,                       # minimum value of attribute that is varying
                               value_test=0.05,                     # test if the value selected is < 0.05
                               nb_steps=10000)                      # the number of steps between value_max and value_min

#Feed-Forward Neural Network Model for Deep Q Learning (DQN)
model = Sequential()
model.add(Input(shape=(1,env.observation_space.shape[0])))  
#Input is 1 observation vector, and the number of observations in that vector 
model.add(Flatten())
#Hidden layers with 24 nodes each
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
#Output is the number of actions in the action space
model.add(Dense(env.action_space.shape[0], activation='linear')) 

#Defining DQN Agent for DQN Model
dqn = DQNAgent(model=model,                     # Q-Network model
               nb_actions=env.action_space.shape[0],   # number of actions
               memory=memory,                   # experience replay memory
               nb_steps_warmup=25,              # how many steps are waited before starting experience replay
               target_model_update=1e-2,        # how often the target network is updated
               policy=policy)                   # the action selection policy

#Feed-Forward Neural Network Architecture Summary
print(model.summary())

# Finally, we configure and compile our agent. 
#We can use built-in tensorflow.keras Adam optimizer and evaluation metrics            
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae','accuracy'])

#Finally fit and train the agent
history = dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
     

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 5)                 0         
                                                                 
 dense (Dense)               (None, 24)                144       
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 1)                 25        
                                                                 
Total params: 769
Trainable params: 769
Non-trainable params: 0
_________________________________________________________________
None
Training for 50000 steps ...


ValueError: Error when checking input: expected input_1 to have shape (1, 5) but got array with shape (1, 2)