In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from collections import deque
import random
import os

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Creating env
env = gym.make('CartPole-v0')

In [3]:
env.reset()
for t in range(10000):
    random_action = env.action_space.sample()
    env.step(random_action)
    env.render()

env.close()



In [4]:
class Agent:
    
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = .95
        self.epsilon = 1.0 # 100% exploration in the beggining
        self.epsilon_decay = .995
        self.epsilon_min = .01
        self.learning_rate = .0001
        self.model = self._create_model()
        
    def _create_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=.0001))
        return model

    def remember(self, state, action, reward, next_state, done):
        
        # Remember past action
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        
        # Epsilon Greedy Method
        if np.random.rand()<=self.epsilon:
            # Take a random action
            return np.random.randint(self.action_size)
        # Ask neural network to suggest suitable action
        return np.argmax(self.model.predict(state)[0])
    
    def train(self, batch_size=32):
        
        # Training using replay buffer
        minibatch = random.sample(self.memory, batch_size)
        
        for experience in minibatch:
            
            state, action, reward, next_state, done = experience
            
            if not done:
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
            else:
                target = reward
                
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            self.model.fit(state, target_f, epochs=1, verbose=0)
            
        if self.epsilon>self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self, name):
        self.model.load_weights(name)
    
    def save(self, name):
        self.model.save_weights(name)
                
            

## Training the DQN agent

In [5]:
n_episodes = 1000
output_dir = "cartpole_model/"

try:
    os.makedirs(output_dir)
    print("Directory " + output_dir + " created.")
except FileExistsError:
    print("Directory " + output_dir + " already exits.")

Directory cartpole_model/ created.


In [6]:
agent = Agent(state_size=4, action_size=2)
done = False

state_size = 4

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
for e in range(n_episodes):
    
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    batch_size = 32
    
    for time in range(500):
        env.render()
        action = agent.act(state)
        next_state, reward, done, other_info = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        
        if done:
            print("Game Episode :{}/{} High Score :{} Exploration Rate :{:.2}".format(e, n_episodes, t, agent.epsilon))
            break
        
    if len(agent.memory)>batch_size:
        agent.train(batch_size)
    
    if e%50==0:
        agent.save(output_dir+"weights_"+"{:04d}".format(e)+"hdf5")
        
print("Deep Q-learner model trained")
env.close()

Game Episode :0/1000 High Score :9999 Exploration Rate :1.0
Game Episode :1/1000 High Score :9999 Exploration Rate :1.0
Instructions for updating:
Use tf.cast instead.
Game Episode :2/1000 High Score :9999 Exploration Rate :0.99
Game Episode :3/1000 High Score :9999 Exploration Rate :0.99
Game Episode :4/1000 High Score :9999 Exploration Rate :0.99
Game Episode :5/1000 High Score :9999 Exploration Rate :0.98
Game Episode :6/1000 High Score :9999 Exploration Rate :0.98
Game Episode :7/1000 High Score :9999 Exploration Rate :0.97
Game Episode :8/1000 High Score :9999 Exploration Rate :0.97
Game Episode :9/1000 High Score :9999 Exploration Rate :0.96
Game Episode :10/1000 High Score :9999 Exploration Rate :0.96
Game Episode :11/1000 High Score :9999 Exploration Rate :0.95
Game Episode :12/1000 High Score :9999 Exploration Rate :0.95
Game Episode :13/1000 High Score :9999 Exploration Rate :0.94
Game Episode :14/1000 High Score :9999 Exploration Rate :0.94
Game Episode :15/1000 High Score :

Game Episode :128/1000 High Score :9999 Exploration Rate :0.53
Game Episode :129/1000 High Score :9999 Exploration Rate :0.53
Game Episode :130/1000 High Score :9999 Exploration Rate :0.52
Game Episode :131/1000 High Score :9999 Exploration Rate :0.52
Game Episode :132/1000 High Score :9999 Exploration Rate :0.52
Game Episode :133/1000 High Score :9999 Exploration Rate :0.52
Game Episode :134/1000 High Score :9999 Exploration Rate :0.51
Game Episode :135/1000 High Score :9999 Exploration Rate :0.51
Game Episode :136/1000 High Score :9999 Exploration Rate :0.51
Game Episode :137/1000 High Score :9999 Exploration Rate :0.51
Game Episode :138/1000 High Score :9999 Exploration Rate :0.5
Game Episode :139/1000 High Score :9999 Exploration Rate :0.5
Game Episode :140/1000 High Score :9999 Exploration Rate :0.5
Game Episode :141/1000 High Score :9999 Exploration Rate :0.5
Game Episode :142/1000 High Score :9999 Exploration Rate :0.49
Game Episode :143/1000 High Score :9999 Exploration Rate :0

Game Episode :260/1000 High Score :9999 Exploration Rate :0.27
Game Episode :261/1000 High Score :9999 Exploration Rate :0.27
Game Episode :262/1000 High Score :9999 Exploration Rate :0.27
Game Episode :263/1000 High Score :9999 Exploration Rate :0.27
Game Episode :264/1000 High Score :9999 Exploration Rate :0.27
Game Episode :265/1000 High Score :9999 Exploration Rate :0.27
Game Episode :266/1000 High Score :9999 Exploration Rate :0.26
Game Episode :267/1000 High Score :9999 Exploration Rate :0.26
Game Episode :268/1000 High Score :9999 Exploration Rate :0.26
Game Episode :269/1000 High Score :9999 Exploration Rate :0.26
Game Episode :270/1000 High Score :9999 Exploration Rate :0.26
Game Episode :271/1000 High Score :9999 Exploration Rate :0.26
Game Episode :272/1000 High Score :9999 Exploration Rate :0.26
Game Episode :273/1000 High Score :9999 Exploration Rate :0.26
Game Episode :274/1000 High Score :9999 Exploration Rate :0.25
Game Episode :275/1000 High Score :9999 Exploration Rat

Game Episode :392/1000 High Score :9999 Exploration Rate :0.14
Game Episode :393/1000 High Score :9999 Exploration Rate :0.14
Game Episode :394/1000 High Score :9999 Exploration Rate :0.14
Game Episode :395/1000 High Score :9999 Exploration Rate :0.14
Game Episode :396/1000 High Score :9999 Exploration Rate :0.14
Game Episode :397/1000 High Score :9999 Exploration Rate :0.14
Game Episode :398/1000 High Score :9999 Exploration Rate :0.14
Game Episode :399/1000 High Score :9999 Exploration Rate :0.14
Game Episode :400/1000 High Score :9999 Exploration Rate :0.14
Game Episode :401/1000 High Score :9999 Exploration Rate :0.13
Game Episode :402/1000 High Score :9999 Exploration Rate :0.13
Game Episode :403/1000 High Score :9999 Exploration Rate :0.13
Game Episode :404/1000 High Score :9999 Exploration Rate :0.13
Game Episode :405/1000 High Score :9999 Exploration Rate :0.13
Game Episode :406/1000 High Score :9999 Exploration Rate :0.13
Game Episode :407/1000 High Score :9999 Exploration Rat

Game Episode :522/1000 High Score :9999 Exploration Rate :0.073
Game Episode :523/1000 High Score :9999 Exploration Rate :0.073
Game Episode :524/1000 High Score :9999 Exploration Rate :0.073
Game Episode :525/1000 High Score :9999 Exploration Rate :0.072
Game Episode :526/1000 High Score :9999 Exploration Rate :0.072
Game Episode :527/1000 High Score :9999 Exploration Rate :0.072
Game Episode :528/1000 High Score :9999 Exploration Rate :0.071
Game Episode :529/1000 High Score :9999 Exploration Rate :0.071
Game Episode :530/1000 High Score :9999 Exploration Rate :0.071
Game Episode :531/1000 High Score :9999 Exploration Rate :0.07
Game Episode :532/1000 High Score :9999 Exploration Rate :0.07
Game Episode :533/1000 High Score :9999 Exploration Rate :0.069
Game Episode :534/1000 High Score :9999 Exploration Rate :0.069
Game Episode :535/1000 High Score :9999 Exploration Rate :0.069
Game Episode :536/1000 High Score :9999 Exploration Rate :0.068
Game Episode :537/1000 High Score :9999 Ex

Game Episode :652/1000 High Score :9999 Exploration Rate :0.038
Game Episode :653/1000 High Score :9999 Exploration Rate :0.038
Game Episode :654/1000 High Score :9999 Exploration Rate :0.038
Game Episode :655/1000 High Score :9999 Exploration Rate :0.038
Game Episode :656/1000 High Score :9999 Exploration Rate :0.038
Game Episode :657/1000 High Score :9999 Exploration Rate :0.037
Game Episode :658/1000 High Score :9999 Exploration Rate :0.037
Game Episode :659/1000 High Score :9999 Exploration Rate :0.037
Game Episode :660/1000 High Score :9999 Exploration Rate :0.037
Game Episode :661/1000 High Score :9999 Exploration Rate :0.037
Game Episode :662/1000 High Score :9999 Exploration Rate :0.036
Game Episode :663/1000 High Score :9999 Exploration Rate :0.036
Game Episode :664/1000 High Score :9999 Exploration Rate :0.036
Game Episode :665/1000 High Score :9999 Exploration Rate :0.036
Game Episode :666/1000 High Score :9999 Exploration Rate :0.036
Game Episode :667/1000 High Score :9999 