# Cartpole DQN

#### Import Dependencies

In [1]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os

Using TensorFlow backend.


#### Set Parameters

In [2]:
env = gym.make("CartPole-v0")
state_size =  env.observation_space.shape[0]
action_size = env.action_space.n
batch_size=32    #For Gradient Descent 
n_episodes = 1001 #Maximum No Of Games we want the User To Play, We are going to make the machine remember random detaails from a few episodes
output_dir = "/Users/maharshichattopadhyay/Documents/Study/Major_Project/Cartpole/" 

#### Define Agent 

In [3]:
class DQNAgent:
    
    def __init__(self,state_size,action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) #Bi-ended List, Used to remove old elements when new information comes after list is full
        self.gamma = 0.95 #Discount Factor
        self.epsilon = 1.0 #Exploration Rate of Agent (Exploration vs Exploitation)
        self.epsilon_decay = 0.995 
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()
        
    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim = self.state_size, activation='relu'))
        model.add(Dense(24, activation = 'relu'))
        model.add(Dense(self.action_size,activation = 'linear'))
        model.compile(loss='mse', optimizer = Adam(lr = self.learning_rate))
        return model
    
    def remember(self,state,action,reward,next_state,done):#Done parameter let's us know if the episode has ended or not
        self.memory.append((state,action,reward,next_state,done))
        
    def act(self,state):
        if np.random.rand() <= self.epsilon: 
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def replay(self, batch_size):        
        minibatch = random.sample(self.memory,batch_size)
        for state,action,reward,next_state,done in minibatch:
            if done:
                target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0])) #target is Q-Value Function
            target_f = self.model.predict(state) 
            target_f[0][action] = target #map target from current state to future state
            self.model.fit(state,target_f,epochs=1,verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def load(self,name):
        self.model.load_weights(name)
       
    def save(self,name):
        self.model.save_weights(name)

In [4]:
agent = DQNAgent(state_size,action_size)

Instructions for updating:
Colocations handled automatically by placer.


#### Interact With The Environment

In [None]:
done = False
for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    for time in range(5000):
        env.render()
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state,action,reward,next_state,done)
        state=next_state
        if done:
            print("Episode: {}/{},score: {}, e: {:.2}".format(e,n_episodes,time,agent.epsilon))
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
        if e%50 == 0:
            agent.save(output_dir+"weights_" + '{:04d}'.format(e)+".hdf5")

Instructions for updating:
Use tf.cast instead.
Episode: 0/1001,score: 41, e: 0.96
Episode: 1/1001,score: 11, e: 0.9
Episode: 2/1001,score: 22, e: 0.81
Episode: 3/1001,score: 8, e: 0.78
Episode: 4/1001,score: 9, e: 0.74
Episode: 5/1001,score: 24, e: 0.66
Episode: 6/1001,score: 8, e: 0.63
Episode: 7/1001,score: 20, e: 0.57
Episode: 8/1001,score: 20, e: 0.52
Episode: 9/1001,score: 11, e: 0.49
Episode: 10/1001,score: 15, e: 0.46
Episode: 11/1001,score: 14, e: 0.42
Episode: 12/1001,score: 11, e: 0.4
Episode: 13/1001,score: 17, e: 0.37
Episode: 14/1001,score: 16, e: 0.34
Episode: 15/1001,score: 11, e: 0.32
Episode: 16/1001,score: 14, e: 0.3
Episode: 17/1001,score: 8, e: 0.29
Episode: 18/1001,score: 9, e: 0.28
Episode: 19/1001,score: 11, e: 0.26
Episode: 20/1001,score: 11, e: 0.25
Episode: 21/1001,score: 37, e: 0.21
Episode: 22/1001,score: 40, e: 0.17
Episode: 23/1001,score: 21, e: 0.15
Episode: 24/1001,score: 49, e: 0.12
Episode: 25/1001,score: 40, e: 0.097
Episode: 26/1001,score: 56, e: 0.

Episode: 214/1001,score: 166, e: 0.01
Episode: 215/1001,score: 76, e: 0.01
Episode: 216/1001,score: 109, e: 0.01
Episode: 217/1001,score: 125, e: 0.01
Episode: 218/1001,score: 108, e: 0.01
Episode: 219/1001,score: 164, e: 0.01
Episode: 220/1001,score: 132, e: 0.01
Episode: 221/1001,score: 161, e: 0.01
Episode: 222/1001,score: 140, e: 0.01
Episode: 223/1001,score: 132, e: 0.01
Episode: 224/1001,score: 25, e: 0.01
Episode: 225/1001,score: 11, e: 0.01
Episode: 226/1001,score: 53, e: 0.01
Episode: 227/1001,score: 191, e: 0.01
Episode: 228/1001,score: 199, e: 0.01
Episode: 229/1001,score: 131, e: 0.01
Episode: 230/1001,score: 140, e: 0.01
Episode: 231/1001,score: 102, e: 0.01
Episode: 232/1001,score: 199, e: 0.01
Episode: 233/1001,score: 199, e: 0.01
Episode: 234/1001,score: 186, e: 0.01
Episode: 235/1001,score: 176, e: 0.01
Episode: 236/1001,score: 143, e: 0.01
Episode: 237/1001,score: 122, e: 0.01
Episode: 238/1001,score: 132, e: 0.01
Episode: 239/1001,score: 13, e: 0.01
Episode: 240/1001

Episode: 444/1001,score: 199, e: 0.01
Episode: 445/1001,score: 199, e: 0.01
Episode: 446/1001,score: 174, e: 0.01
Episode: 447/1001,score: 149, e: 0.01
Episode: 448/1001,score: 190, e: 0.01
Episode: 449/1001,score: 185, e: 0.01
Episode: 450/1001,score: 116, e: 0.01
Episode: 451/1001,score: 199, e: 0.01
Episode: 452/1001,score: 44, e: 0.01
Episode: 453/1001,score: 45, e: 0.01
Episode: 454/1001,score: 148, e: 0.01
Episode: 455/1001,score: 199, e: 0.01
Episode: 456/1001,score: 199, e: 0.01
Episode: 457/1001,score: 57, e: 0.01
Episode: 458/1001,score: 72, e: 0.01
Episode: 459/1001,score: 199, e: 0.01
Episode: 460/1001,score: 28, e: 0.01
Episode: 461/1001,score: 199, e: 0.01
Episode: 462/1001,score: 199, e: 0.01
Episode: 463/1001,score: 115, e: 0.01
Episode: 464/1001,score: 199, e: 0.01
Episode: 465/1001,score: 199, e: 0.01
Episode: 466/1001,score: 155, e: 0.01
Episode: 467/1001,score: 199, e: 0.01
Episode: 468/1001,score: 199, e: 0.01
Episode: 469/1001,score: 199, e: 0.01
Episode: 470/1001