## Reinforcement Learning 

In [1]:
#!pip install gym

In [2]:
import gym

In [3]:
# create Environment 

env = gym.make('CartPole-v0')

### Environment comes with ceratin methods/attributes ,few are 
- action_space - All posible actions. Eg go left, go right,etc.
- observation_space
- reset() - Takes to the intial state
- step()
- render() - gives a pop up of the game



In [4]:
env.reset()

array([ 0.02173096, -0.02197105,  0.03675878,  0.04813006])

In [5]:
env.action_space #2 actions are either move left or right

Discrete(2)

In [6]:
#object of type Box , to represent n dimensional Box
env.observation_space

Box(4,)

## Refer 
https://gym.openai.com/docs/

for t in range(1000):
    random_action = env.action_space.sample()
    env.step(random_action) #step exeutes the step
    env.render()
env.close()

for e in range(20): # e stands for Episode
    #Play 20 games(episodes)
    observation = env.reset() #After every game reset the environment
    for t in range(50): #Rendering the game (Max time range for which we'll play 1 episode)
        env.render()
        action = env.action_space.sample()
        observation,reward,done,other_info = env.step(action)
        
        if done:
            #Game Episode over
            print("Game Episode:{}/{} High Score :{}".format(e,20,t))
            break

env.close()
print("All 20 episodes over!")

## Playing Game with a Random Strategy
- Game Episode - Start of the game to the end of the game.
- Step() Function in More Detail
- Game Over?

### step() gives : 
- observation
- reward
- done
- info

# Q-Learning

# Agent Design & Neural Model

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import random

Using TensorFlow backend.


In [8]:
class Agent:
    def __init__(self,state_size,action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        # Exploration vs Exploitation Tradeoff
        # Exploration : Good in th ebeinning --> helps you try various random things
        # Exploiation : Sample Good experience from the past(memory) --> good in the end
        self.epsilon = 1.0 # 100% Random Exploration in the beginning 
                           # Gradually lowering the value of epsilon for more taking 
                           # actions in the memory rather than random exploration
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._create_model()
        
    
    def _create_model(self):
        model = Sequential()
        model.add(Dense(24,input_dim=self.state_size,activation='relu'))
        model.add(Dense(24,activation='relu'))
        model.add(Dense(self.action_size,activation='linear'))
        model.compile(loss='mse',optimizer=Adam(lr=0.001))
        
        return model
    
    def remember(self,state,action,reward,next_state,done):
        #Remember past experience
        self.memory.append((state,action,reward,next_state,done))
        
    def act(self,state):
        #Sampling according to Epsilon GREEDY Method
        if np.random.rand()<=self.epsilon:
            #Take a Random action
            return random.randrange(self.action_size)
        #Ask neural network to give me the suitable action
        return np.argmax(self.model.predict(state)[0])
    
    def train(self,batch_size=32) :
        #Training with 'Replay Buffer'
        minibatch = random.sample(self.memory,batch_size)
        for experience in minibatch:
            state,action,reward,next_state,done = experience
            # X,Y : state,expected reward
            
            if not done:
                #game is not yet over ,Bellman eqn to approximate the target value of the reward
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
                
            else:
                target = reward
                
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            #X = state, Y = target_f
            self.model.fit(state,target_f,epochs=1,verbose=0)
            
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
                
                
    def load(self,name):
        self.model.load_weights(name)
        
    #def save(self,name):
     #self.model.save_weights(name)
                
                

## Training the DQN Agent(Deep Q-Learner)

In [9]:
n_episodes = 1000
output_dir = "carpole_model/"

In [10]:
agent = Agent(state_size=4,action_size=2)
done = False
state_size = 4
action_size = 2

In [11]:
for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state,[1,state_size])
    batch_size = 32
    
    for time in range(500):
        env.render()
        action = agent.act(state) #action is 0 or 1
        next_state,reward,done,other_info = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state,[1,state_size])
        agent.remember(state,action,reward,next_state,done) #Experience for the agent
        
        if done:
            print("Game Episode:{}/{} High Score :{} Exploration Rate:{:.2}".format(e,20,time,agent.epsilon))
            break
            
            
    if len(agent.memory)>batch_size:
        agent.train(batch_size)
        
    #if e%50==0:
        #agent.save(output_dir+"weights_"+'{:04d}'.format(e)+".hdf5")
            
            
print("Deep Q-Learner Model Trained")
env.close()

            

Game Episode:0/20 High Score :22 Exploration Rate:1.0
Game Episode:1/20 High Score :53 Exploration Rate:1.0
Game Episode:2/20 High Score :9 Exploration Rate:0.99
Game Episode:3/20 High Score :28 Exploration Rate:0.99
Game Episode:4/20 High Score :13 Exploration Rate:0.99
Game Episode:5/20 High Score :13 Exploration Rate:0.98
Game Episode:6/20 High Score :16 Exploration Rate:0.98
Game Episode:7/20 High Score :18 Exploration Rate:0.97
Game Episode:8/20 High Score :12 Exploration Rate:0.97
Game Episode:9/20 High Score :17 Exploration Rate:0.96
Game Episode:10/20 High Score :43 Exploration Rate:0.96
Game Episode:11/20 High Score :17 Exploration Rate:0.95
Game Episode:12/20 High Score :13 Exploration Rate:0.95
Game Episode:13/20 High Score :21 Exploration Rate:0.94
Game Episode:14/20 High Score :51 Exploration Rate:0.94
Game Episode:15/20 High Score :15 Exploration Rate:0.93
Game Episode:16/20 High Score :11 Exploration Rate:0.93
Game Episode:17/20 High Score :21 Exploration Rate:0.92
Game 

Game Episode:147/20 High Score :12 Exploration Rate:0.48
Game Episode:148/20 High Score :17 Exploration Rate:0.48
Game Episode:149/20 High Score :13 Exploration Rate:0.48
Game Episode:150/20 High Score :16 Exploration Rate:0.47
Game Episode:151/20 High Score :16 Exploration Rate:0.47
Game Episode:152/20 High Score :9 Exploration Rate:0.47
Game Episode:153/20 High Score :10 Exploration Rate:0.47
Game Episode:154/20 High Score :7 Exploration Rate:0.46
Game Episode:155/20 High Score :15 Exploration Rate:0.46
Game Episode:156/20 High Score :13 Exploration Rate:0.46
Game Episode:157/20 High Score :11 Exploration Rate:0.46
Game Episode:158/20 High Score :14 Exploration Rate:0.46
Game Episode:159/20 High Score :13 Exploration Rate:0.45
Game Episode:160/20 High Score :21 Exploration Rate:0.45
Game Episode:161/20 High Score :8 Exploration Rate:0.45
Game Episode:162/20 High Score :10 Exploration Rate:0.45
Game Episode:163/20 High Score :10 Exploration Rate:0.44
Game Episode:164/20 High Score :15

Game Episode:292/20 High Score :13 Exploration Rate:0.23
Game Episode:293/20 High Score :7 Exploration Rate:0.23
Game Episode:294/20 High Score :13 Exploration Rate:0.23
Game Episode:295/20 High Score :11 Exploration Rate:0.23
Game Episode:296/20 High Score :10 Exploration Rate:0.23
Game Episode:297/20 High Score :12 Exploration Rate:0.23
Game Episode:298/20 High Score :7 Exploration Rate:0.23
Game Episode:299/20 High Score :10 Exploration Rate:0.22
Game Episode:300/20 High Score :9 Exploration Rate:0.22
Game Episode:301/20 High Score :9 Exploration Rate:0.22
Game Episode:302/20 High Score :9 Exploration Rate:0.22
Game Episode:303/20 High Score :13 Exploration Rate:0.22
Game Episode:304/20 High Score :10 Exploration Rate:0.22
Game Episode:305/20 High Score :11 Exploration Rate:0.22
Game Episode:306/20 High Score :7 Exploration Rate:0.22
Game Episode:307/20 High Score :9 Exploration Rate:0.22
Game Episode:308/20 High Score :8 Exploration Rate:0.21
Game Episode:309/20 High Score :10 Expl

Game Episode:439/20 High Score :8 Exploration Rate:0.11
Game Episode:440/20 High Score :7 Exploration Rate:0.11
Game Episode:441/20 High Score :9 Exploration Rate:0.11
Game Episode:442/20 High Score :9 Exploration Rate:0.11
Game Episode:443/20 High Score :8 Exploration Rate:0.11
Game Episode:444/20 High Score :8 Exploration Rate:0.11
Game Episode:445/20 High Score :11 Exploration Rate:0.11
Game Episode:446/20 High Score :9 Exploration Rate:0.11
Game Episode:447/20 High Score :9 Exploration Rate:0.11
Game Episode:448/20 High Score :10 Exploration Rate:0.11
Game Episode:449/20 High Score :8 Exploration Rate:0.11
Game Episode:450/20 High Score :8 Exploration Rate:0.11
Game Episode:451/20 High Score :7 Exploration Rate:0.1
Game Episode:452/20 High Score :10 Exploration Rate:0.1
Game Episode:453/20 High Score :7 Exploration Rate:0.1
Game Episode:454/20 High Score :8 Exploration Rate:0.1
Game Episode:455/20 High Score :11 Exploration Rate:0.1
Game Episode:456/20 High Score :8 Exploration Rat

Game Episode:583/20 High Score :9 Exploration Rate:0.054
Game Episode:584/20 High Score :8 Exploration Rate:0.054
Game Episode:585/20 High Score :10 Exploration Rate:0.054
Game Episode:586/20 High Score :9 Exploration Rate:0.053
Game Episode:587/20 High Score :8 Exploration Rate:0.053
Game Episode:588/20 High Score :8 Exploration Rate:0.053
Game Episode:589/20 High Score :9 Exploration Rate:0.052
Game Episode:590/20 High Score :9 Exploration Rate:0.052
Game Episode:591/20 High Score :8 Exploration Rate:0.052
Game Episode:592/20 High Score :9 Exploration Rate:0.052
Game Episode:593/20 High Score :7 Exploration Rate:0.051
Game Episode:594/20 High Score :9 Exploration Rate:0.051
Game Episode:595/20 High Score :9 Exploration Rate:0.051
Game Episode:596/20 High Score :9 Exploration Rate:0.051
Game Episode:597/20 High Score :8 Exploration Rate:0.05
Game Episode:598/20 High Score :9 Exploration Rate:0.05
Game Episode:599/20 High Score :9 Exploration Rate:0.05
Game Episode:600/20 High Score :8

Game Episode:727/20 High Score :9 Exploration Rate:0.026
Game Episode:728/20 High Score :9 Exploration Rate:0.026
Game Episode:729/20 High Score :7 Exploration Rate:0.026
Game Episode:730/20 High Score :9 Exploration Rate:0.026
Game Episode:731/20 High Score :10 Exploration Rate:0.026
Game Episode:732/20 High Score :10 Exploration Rate:0.026
Game Episode:733/20 High Score :7 Exploration Rate:0.025
Game Episode:734/20 High Score :7 Exploration Rate:0.025
Game Episode:735/20 High Score :9 Exploration Rate:0.025
Game Episode:736/20 High Score :7 Exploration Rate:0.025
Game Episode:737/20 High Score :9 Exploration Rate:0.025
Game Episode:738/20 High Score :8 Exploration Rate:0.025
Game Episode:739/20 High Score :7 Exploration Rate:0.025
Game Episode:740/20 High Score :8 Exploration Rate:0.025
Game Episode:741/20 High Score :10 Exploration Rate:0.024
Game Episode:742/20 High Score :9 Exploration Rate:0.024
Game Episode:743/20 High Score :9 Exploration Rate:0.024
Game Episode:744/20 High Sco

Game Episode:873/20 High Score :8 Exploration Rate:0.013
Game Episode:874/20 High Score :9 Exploration Rate:0.013
Game Episode:875/20 High Score :8 Exploration Rate:0.013
Game Episode:876/20 High Score :8 Exploration Rate:0.012
Game Episode:877/20 High Score :9 Exploration Rate:0.012
Game Episode:878/20 High Score :9 Exploration Rate:0.012
Game Episode:879/20 High Score :9 Exploration Rate:0.012
Game Episode:880/20 High Score :9 Exploration Rate:0.012
Game Episode:881/20 High Score :8 Exploration Rate:0.012
Game Episode:882/20 High Score :7 Exploration Rate:0.012
Game Episode:883/20 High Score :8 Exploration Rate:0.012
Game Episode:884/20 High Score :9 Exploration Rate:0.012
Game Episode:885/20 High Score :10 Exploration Rate:0.012
Game Episode:886/20 High Score :9 Exploration Rate:0.012
Game Episode:887/20 High Score :9 Exploration Rate:0.012
Game Episode:888/20 High Score :8 Exploration Rate:0.012
Game Episode:889/20 High Score :7 Exploration Rate:0.012
Game Episode:890/20 High Score