In [1]:
import gym

In [2]:
env = gym.make('CartPole-v0')

In [3]:
env.reset()

array([ 0.03360264,  0.01063811,  0.03272176, -0.00664356])

In [15]:
for t in range(1000):
    env.step(env.action_space.sample())
    env.render()
env.close()

# Play Game Randomly

In [19]:
for e in range(20):
    observation = env.reset()
    for t in range(200):
        env.render()
        action = env.action_space.sample()
        observation,reward,done,other_info = env.step(action)
        
        if done:
            print('Game Episode :{}/{} High Score :{}'.format(e,20,t))
            break
            
print("ALL episodes over!")
env.close()

Game Episode :0/20 High Score :19
Game Episode :1/20 High Score :13
Game Episode :2/20 High Score :13
Game Episode :3/20 High Score :26
Game Episode :4/20 High Score :26
Game Episode :5/20 High Score :16
Game Episode :6/20 High Score :26
Game Episode :7/20 High Score :26
Game Episode :8/20 High Score :51
Game Episode :9/20 High Score :13
Game Episode :10/20 High Score :23
Game Episode :11/20 High Score :17
Game Episode :12/20 High Score :19
Game Episode :13/20 High Score :10
Game Episode :14/20 High Score :26
Game Episode :15/20 High Score :16
Game Episode :16/20 High Score :38
Game Episode :17/20 High Score :11
Game Episode :18/20 High Score :23
Game Episode :19/20 High Score :13
ALL episodes over!


# Q-Learning

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import random


Using TensorFlow backend.


In [5]:
class Agent:
    def __init__(self,state_size,action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen = 2000)
        self.gamma = 0.95
        
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._create_model()
        
        
    def _create_model(self):
        model = Sequential()
        model.add(Dense(24,input_dim=self.state_size,activation='relu'))
        model.add(Dense(24,activation='relu'))
        model.add(Dense(self.action_size,activation='linear'))
        model.compile(loss='mse',optimizer = Adam(lr=0.001))
        return model
        
    def remember(self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward,next_state,done))
        
    def act(self,state):
        if np.random.rand() <= self.epsilon:
            #Take Random Action
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def train(self,batch_size=32):
        minibatch = random.sample(self.memory,batch_size)
        
        for experience in minibatch:
            state,action,reward,next_state,done = experience
            
            if not done:
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
            else:
                target = reward
                
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state,target_f,epochs=1,verbose=0)
            
        if self.epsilon>self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self,name):
        self.model.load_weights(name)
    def save(self,name):
        self.model.save_weights(name)

In [6]:
n_episodes = 1000
output_dir = 'Datasets/carpole_model/'

In [7]:
agent = Agent(state_size=4,action_size=2)
done = False
state_size = 4
action_size =2
batch_size = 32

In [None]:
agent = Agent(state_size, action_size) # initialise agent
done = False
for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state,[1,state_size])
    
    for time in range(5000):
        env.render()
        action = agent.act(state) #action is 0 or 1
        next_state,reward,done,other_info = env.step(action) 
        reward = reward if not done else -10
        next_state = np.reshape(next_state,[1,state_size])
        agent.remember(state,action,reward,next_state,done)
        state = next_state
        
        if done:
            print("Game Episode :{}/{}, High Score:{},Exploration Rate:{:.2}".format(e,n_episodes,time,agent.epsilon))
            break
            
    if len(agent.memory)>batch_size:
        agent.train(batch_size)
    
#     if e%50==0:
#         agent.save(output_dir+"weights_"+'{:04d}'.format(e)+".hdf5")
        
env.close()

Game Episode :0/1000, High Score:20,Exploration Rate:1.0
Game Episode :1/1000, High Score:31,Exploration Rate:1.0
Game Episode :2/1000, High Score:22,Exploration Rate:0.99
Game Episode :3/1000, High Score:10,Exploration Rate:0.99
Game Episode :4/1000, High Score:34,Exploration Rate:0.99
Game Episode :5/1000, High Score:11,Exploration Rate:0.98
Game Episode :6/1000, High Score:13,Exploration Rate:0.98
Game Episode :7/1000, High Score:13,Exploration Rate:0.97
Game Episode :8/1000, High Score:14,Exploration Rate:0.97
Game Episode :9/1000, High Score:14,Exploration Rate:0.96
Game Episode :10/1000, High Score:64,Exploration Rate:0.96
Game Episode :11/1000, High Score:12,Exploration Rate:0.95
Game Episode :12/1000, High Score:40,Exploration Rate:0.95
Game Episode :13/1000, High Score:11,Exploration Rate:0.94
Game Episode :14/1000, High Score:19,Exploration Rate:0.94
Game Episode :15/1000, High Score:43,Exploration Rate:0.93
Game Episode :16/1000, High Score:60,Exploration Rate:0.93
Game Epis

Game Episode :139/1000, High Score:42,Exploration Rate:0.5
Game Episode :140/1000, High Score:114,Exploration Rate:0.5
Game Episode :141/1000, High Score:199,Exploration Rate:0.5
Game Episode :142/1000, High Score:65,Exploration Rate:0.49
Game Episode :143/1000, High Score:39,Exploration Rate:0.49
Game Episode :144/1000, High Score:85,Exploration Rate:0.49
Game Episode :145/1000, High Score:57,Exploration Rate:0.49
Game Episode :146/1000, High Score:128,Exploration Rate:0.48
Game Episode :147/1000, High Score:69,Exploration Rate:0.48
Game Episode :148/1000, High Score:41,Exploration Rate:0.48
Game Episode :149/1000, High Score:79,Exploration Rate:0.48
Game Episode :150/1000, High Score:18,Exploration Rate:0.47
Game Episode :151/1000, High Score:16,Exploration Rate:0.47
Game Episode :152/1000, High Score:51,Exploration Rate:0.47
Game Episode :153/1000, High Score:43,Exploration Rate:0.47
Game Episode :154/1000, High Score:104,Exploration Rate:0.46
Game Episode :155/1000, High Score:85,E

Game Episode :275/1000, High Score:199,Exploration Rate:0.25
Game Episode :276/1000, High Score:199,Exploration Rate:0.25
Game Episode :277/1000, High Score:199,Exploration Rate:0.25
Game Episode :278/1000, High Score:199,Exploration Rate:0.25
Game Episode :279/1000, High Score:31,Exploration Rate:0.25
Game Episode :280/1000, High Score:199,Exploration Rate:0.25
Game Episode :281/1000, High Score:130,Exploration Rate:0.25
Game Episode :282/1000, High Score:199,Exploration Rate:0.24
Game Episode :283/1000, High Score:162,Exploration Rate:0.24
Game Episode :284/1000, High Score:188,Exploration Rate:0.24
Game Episode :285/1000, High Score:187,Exploration Rate:0.24
Game Episode :286/1000, High Score:199,Exploration Rate:0.24
Game Episode :287/1000, High Score:199,Exploration Rate:0.24
Game Episode :288/1000, High Score:199,Exploration Rate:0.24
Game Episode :289/1000, High Score:199,Exploration Rate:0.24
Game Episode :290/1000, High Score:199,Exploration Rate:0.23
Game Episode :291/1000, H

Game Episode :410/1000, High Score:130,Exploration Rate:0.13
Game Episode :411/1000, High Score:156,Exploration Rate:0.13
Game Episode :412/1000, High Score:149,Exploration Rate:0.13
Game Episode :413/1000, High Score:41,Exploration Rate:0.13
Game Episode :414/1000, High Score:138,Exploration Rate:0.13
Game Episode :415/1000, High Score:128,Exploration Rate:0.13
Game Episode :416/1000, High Score:137,Exploration Rate:0.12
Game Episode :417/1000, High Score:24,Exploration Rate:0.12
Game Episode :418/1000, High Score:126,Exploration Rate:0.12
Game Episode :419/1000, High Score:117,Exploration Rate:0.12
Game Episode :420/1000, High Score:121,Exploration Rate:0.12
Game Episode :421/1000, High Score:32,Exploration Rate:0.12
Game Episode :422/1000, High Score:128,Exploration Rate:0.12
Game Episode :423/1000, High Score:199,Exploration Rate:0.12
Game Episode :424/1000, High Score:153,Exploration Rate:0.12
Game Episode :425/1000, High Score:143,Exploration Rate:0.12
Game Episode :426/1000, Hig

Game Episode :544/1000, High Score:163,Exploration Rate:0.066
Game Episode :545/1000, High Score:199,Exploration Rate:0.065
Game Episode :546/1000, High Score:151,Exploration Rate:0.065
Game Episode :547/1000, High Score:146,Exploration Rate:0.065
Game Episode :548/1000, High Score:138,Exploration Rate:0.064
Game Episode :549/1000, High Score:199,Exploration Rate:0.064
Game Episode :550/1000, High Score:199,Exploration Rate:0.064
Game Episode :551/1000, High Score:199,Exploration Rate:0.063
Game Episode :552/1000, High Score:169,Exploration Rate:0.063
Game Episode :553/1000, High Score:152,Exploration Rate:0.063
Game Episode :554/1000, High Score:199,Exploration Rate:0.063
Game Episode :555/1000, High Score:199,Exploration Rate:0.062
Game Episode :556/1000, High Score:199,Exploration Rate:0.062
Game Episode :557/1000, High Score:199,Exploration Rate:0.062
Game Episode :558/1000, High Score:199,Exploration Rate:0.061
Game Episode :559/1000, High Score:199,Exploration Rate:0.061
Game Epi

In [9]:
env.close()

--2020-04-07 14:57:47--  https://raw.githubusercontent.com/coding-blocks-archives/machine-learning-online-2018/master/Reinforcement%20Learning/Reinforcement_Learning_Tutorial_Deep_Q_Learning.ipynb
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.152.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.152.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 83826 (82K) [text/plain]
Saving to: ‘Reinforcement_Learning_Tutorial_Deep_Q_Learning.ipynb’


2020-04-07 14:57:48 (842 KB/s) - ‘Reinforcement_Learning_Tutorial_Deep_Q_Learning.ipynb’ saved [83826/83826]

