In [1]:
!pip install gym

Collecting gym
  Downloading https://files.pythonhosted.org/packages/8f/1f/edf9611bc202069790e4d43a3bb70550fc1a9b22649b8666efe41b3601ec/gym-0.12.4.tar.gz (1.5MB)
Collecting pyglet>=1.2.0 (from gym)
  Downloading https://files.pythonhosted.org/packages/1c/fc/dad5eaaab68f0c21e2f906a94ddb98175662cc5a654eee404d59554ce0fa/pyglet-1.3.2-py2.py3-none-any.whl (1.0MB)
Collecting future (from pyglet>=1.2.0->gym)
  Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB)
Building wheels for collected packages: gym, future
  Building wheel for gym (setup.py): started
  Building wheel for gym (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Deepika\AppData\Local\pip\Cache\wheels\bb\53\4d\2b8728cbdd500a073178cd90e0221b2ccd7caee595ed612285
  Building wheel for future (setup.py): started
  Building wheel for future (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Deep

You are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


# Reinforcement learning

In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Random Action Selection On a Game 

In [2]:
## creating a cartpole event
env = gym.make('CartPole-v0')

## e is the iterator over all the episodes
for e in range(20):
    observation = env.reset()
  
    for t in range(50):
        env.render()
        ## selecting the address from the address space
        action = env.action_space.sample()
    
        ## env.step function returns the observation, reward, done or not, other_info
    
        observation, reward, done, other_info = env.step(action)
    
        if done:
            print("Your highscore = {} and Game episode is {}".format(t, e))
            break
env.close()


Your highscore = 26 and Game episode is 0
Your highscore = 20 and Game episode is 1
Your highscore = 49 and Game episode is 2
Your highscore = 9 and Game episode is 3
Your highscore = 15 and Game episode is 4
Your highscore = 21 and Game episode is 5
Your highscore = 10 and Game episode is 6
Your highscore = 11 and Game episode is 7
Your highscore = 30 and Game episode is 8
Your highscore = 15 and Game episode is 9
Your highscore = 22 and Game episode is 10
Your highscore = 14 and Game episode is 11
Your highscore = 27 and Game episode is 12
Your highscore = 22 and Game episode is 13
Your highscore = 26 and Game episode is 14
Your highscore = 42 and Game episode is 15
Your highscore = 11 and Game episode is 16
Your highscore = 20 and Game episode is 17
Your highscore = 8 and Game episode is 18
Your highscore = 24 and Game episode is 19


# Creating an AI Agent based on Neural Network

In [3]:
from collections import deque

In [4]:
class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen = 2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        
        self.model = self._create_model()
        
    def _create_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim = self.state_size, activation = 'relu'))
        model.add(Dense(24, activation = 'relu'))
        model.add(Dense(self.action_size, activation = 'linear'))
        model.compile(loss = 'mse', optimizer = Adam(lr = self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        
        if np.random.rand() <= self.epsilon:
            
            return random.randrange(self.action_size)
        
        return np.argmax(self.model.predict(state)[0])
    
    def train(self, batch_size = 32):
        ## replay buffer training
        
        minibatch = random.sample(self.memory, batch_size)
        
        for experience in minibatch:
            state, action, reward, next_state, done = experience
            
            if not done:
                
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
                
            else:
                target = reward
                
            target_f = self.model.predict(state)
            
            target_f[0][action] = target
            
            self.model.fit(state, target_f, epochs = 1, verbose = 0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon = self.epsilon * self.epsilon_decay
            
    def load(self, name):
        self.model.load_weights(name)
        
    def save(self, name):
        self.model.save_weights(name)
        


In [5]:
n_episodes = 1000
state_size = 4
action_size = 2
done = False

In [6]:
agent = Agent(state_size = 4, action_size = 2)

In [None]:
for e in range(n_episodes):
    
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    for t in range(500):
        env.render()
        action = agent.act(state)
        
        next_state, reward, done, other_info = env.step(action)
        
        reward = reward if not done else -10
        
        next_state = np.reshape(next_state, [1, state_size])
        
        agent.remember(state, action, reward, next_state, done)
        
        state = next_state
        
        if done :
            print("Your highscore = {} and Game episode is {}, exploration_rate is {}".format(t, e, agent.epsilon))
            break
            
    if len(agent.memory) > 32:
        agent.train(32)
env.close()

Your highscore = 56 and Game episode is 0, exploration_rate is 1.0
Your highscore = 12 and Game episode is 1, exploration_rate is 0.995
Your highscore = 23 and Game episode is 2, exploration_rate is 0.990025
Your highscore = 40 and Game episode is 3, exploration_rate is 0.985074875
Your highscore = 14 and Game episode is 4, exploration_rate is 0.9801495006250001
Your highscore = 13 and Game episode is 5, exploration_rate is 0.9752487531218751
Your highscore = 35 and Game episode is 6, exploration_rate is 0.9703725093562657
Your highscore = 22 and Game episode is 7, exploration_rate is 0.9655206468094844
Your highscore = 12 and Game episode is 8, exploration_rate is 0.960693043575437
Your highscore = 10 and Game episode is 9, exploration_rate is 0.9558895783575597
Your highscore = 46 and Game episode is 10, exploration_rate is 0.9511101304657719
Your highscore = 23 and Game episode is 11, exploration_rate is 0.946354579813443
Your highscore = 50 and Game episode is 12, exploration_rate 

Your highscore = 59 and Game episode is 100, exploration_rate is 0.6057704364907278
Your highscore = 37 and Game episode is 101, exploration_rate is 0.6027415843082742
Your highscore = 82 and Game episode is 102, exploration_rate is 0.5997278763867329
Your highscore = 154 and Game episode is 103, exploration_rate is 0.5967292370047992
Your highscore = 33 and Game episode is 104, exploration_rate is 0.5937455908197752
Your highscore = 19 and Game episode is 105, exploration_rate is 0.5907768628656763
Your highscore = 41 and Game episode is 106, exploration_rate is 0.5878229785513479
Your highscore = 54 and Game episode is 107, exploration_rate is 0.5848838636585911
Your highscore = 37 and Game episode is 108, exploration_rate is 0.5819594443402982
Your highscore = 57 and Game episode is 109, exploration_rate is 0.5790496471185967
Your highscore = 30 and Game episode is 110, exploration_rate is 0.5761543988830038
Your highscore = 47 and Game episode is 111, exploration_rate is 0.57327362

Your highscore = 57 and Game episode is 198, exploration_rate is 0.3706551064126331
Your highscore = 82 and Game episode is 199, exploration_rate is 0.36880183088056995
Your highscore = 106 and Game episode is 200, exploration_rate is 0.3669578217261671
Your highscore = 40 and Game episode is 201, exploration_rate is 0.36512303261753626
Your highscore = 37 and Game episode is 202, exploration_rate is 0.3632974174544486
Your highscore = 50 and Game episode is 203, exploration_rate is 0.3614809303671764
Your highscore = 21 and Game episode is 204, exploration_rate is 0.3596735257153405
Your highscore = 19 and Game episode is 205, exploration_rate is 0.3578751580867638
Your highscore = 28 and Game episode is 206, exploration_rate is 0.35608578229633
Your highscore = 23 and Game episode is 207, exploration_rate is 0.3543053533848483
Your highscore = 23 and Game episode is 208, exploration_rate is 0.35253382661792404
Your highscore = 53 and Game episode is 209, exploration_rate is 0.3507711

Your highscore = 53 and Game episode is 295, exploration_rate is 0.22793384675362674
Your highscore = 67 and Game episode is 296, exploration_rate is 0.22679417751985861
Your highscore = 100 and Game episode is 297, exploration_rate is 0.22566020663225933
Your highscore = 180 and Game episode is 298, exploration_rate is 0.22453190559909803
Your highscore = 114 and Game episode is 299, exploration_rate is 0.22340924607110255
Your highscore = 120 and Game episode is 300, exploration_rate is 0.22229219984074702
Your highscore = 84 and Game episode is 301, exploration_rate is 0.2211807388415433
Your highscore = 78 and Game episode is 302, exploration_rate is 0.22007483514733558
Your highscore = 147 and Game episode is 303, exploration_rate is 0.2189744609715989
Your highscore = 140 and Game episode is 304, exploration_rate is 0.2178795886667409
Your highscore = 194 and Game episode is 305, exploration_rate is 0.2167901907234072
Your highscore = 113 and Game episode is 306, exploration_rate

Your highscore = 199 and Game episode is 391, exploration_rate is 0.14087196468590776
Your highscore = 199 and Game episode is 392, exploration_rate is 0.14016760486247823
Your highscore = 199 and Game episode is 393, exploration_rate is 0.13946676683816583
Your highscore = 199 and Game episode is 394, exploration_rate is 0.138769433003975
Your highscore = 119 and Game episode is 395, exploration_rate is 0.13807558583895513
Your highscore = 199 and Game episode is 396, exploration_rate is 0.13738520790976036
Your highscore = 199 and Game episode is 397, exploration_rate is 0.13669828187021155
Your highscore = 199 and Game episode is 398, exploration_rate is 0.13601479046086049
Your highscore = 199 and Game episode is 399, exploration_rate is 0.1353347165085562
Your highscore = 199 and Game episode is 400, exploration_rate is 0.1346580429260134
Your highscore = 199 and Game episode is 401, exploration_rate is 0.13398475271138335
Your highscore = 199 and Game episode is 402, exploration_

Your highscore = 199 and Game episode is 487, exploration_rate is 0.08706434220766679
Your highscore = 199 and Game episode is 488, exploration_rate is 0.08662902049662846
Your highscore = 199 and Game episode is 489, exploration_rate is 0.08619587539414532
Your highscore = 199 and Game episode is 490, exploration_rate is 0.08576489601717459
Your highscore = 199 and Game episode is 491, exploration_rate is 0.08533607153708872
Your highscore = 199 and Game episode is 492, exploration_rate is 0.08490939117940327
Your highscore = 199 and Game episode is 493, exploration_rate is 0.08448484422350626
Your highscore = 199 and Game episode is 494, exploration_rate is 0.08406242000238873
Your highscore = 199 and Game episode is 495, exploration_rate is 0.08364210790237678
Your highscore = 199 and Game episode is 496, exploration_rate is 0.0832238973628649
Your highscore = 199 and Game episode is 497, exploration_rate is 0.08280777787605056
Your highscore = 199 and Game episode is 498, explorati

Your highscore = 130 and Game episode is 583, exploration_rate is 0.05380914294022049
Your highscore = 115 and Game episode is 584, exploration_rate is 0.05354009722551939
Your highscore = 124 and Game episode is 585, exploration_rate is 0.05327239673939179
Your highscore = 147 and Game episode is 586, exploration_rate is 0.053006034755694834
Your highscore = 145 and Game episode is 587, exploration_rate is 0.052741004581916356
Your highscore = 181 and Game episode is 588, exploration_rate is 0.052477299559006776
Your highscore = 165 and Game episode is 589, exploration_rate is 0.052214913061211746
Your highscore = 173 and Game episode is 590, exploration_rate is 0.05195383849590569
Your highscore = 184 and Game episode is 591, exploration_rate is 0.05169406930342616
Your highscore = 188 and Game episode is 592, exploration_rate is 0.05143559895690903
Your highscore = 199 and Game episode is 593, exploration_rate is 0.051178420962124486
Your highscore = 117 and Game episode is 594, exp

Your highscore = 199 and Game episode is 678, exploration_rate is 0.033423255248208356
Your highscore = 199 and Game episode is 679, exploration_rate is 0.03325613897196732
Your highscore = 199 and Game episode is 680, exploration_rate is 0.03308985827710748
Your highscore = 130 and Game episode is 681, exploration_rate is 0.032924408985721944
Your highscore = 199 and Game episode is 682, exploration_rate is 0.03275978694079333
Your highscore = 199 and Game episode is 683, exploration_rate is 0.032595988006089364
Your highscore = 199 and Game episode is 684, exploration_rate is 0.032433008066058915
Your highscore = 199 and Game episode is 685, exploration_rate is 0.03227084302572862
Your highscore = 199 and Game episode is 686, exploration_rate is 0.032109488810599975
Your highscore = 199 and Game episode is 687, exploration_rate is 0.031948941366546975
Your highscore = 199 and Game episode is 688, exploration_rate is 0.03178919665971424
Your highscore = 199 and Game episode is 689, ex

Your highscore = 199 and Game episode is 773, exploration_rate is 0.020760672449809284
Your highscore = 163 and Game episode is 774, exploration_rate is 0.020656869087560238
Your highscore = 159 and Game episode is 775, exploration_rate is 0.020553584742122436
Your highscore = 163 and Game episode is 776, exploration_rate is 0.020450816818411825
Your highscore = 177 and Game episode is 777, exploration_rate is 0.020348562734319765
Your highscore = 199 and Game episode is 778, exploration_rate is 0.020246819920648168
Your highscore = 199 and Game episode is 779, exploration_rate is 0.020145585821044927
Your highscore = 199 and Game episode is 780, exploration_rate is 0.020044857891939702
Your highscore = 199 and Game episode is 781, exploration_rate is 0.019944633602480003
Your highscore = 100 and Game episode is 782, exploration_rate is 0.019844910434467605
Your highscore = 155 and Game episode is 783, exploration_rate is 0.019745685882295267
Your highscore = 199 and Game episode is 78

Your highscore = 94 and Game episode is 868, exploration_rate is 0.012895378303744804
Your highscore = 27 and Game episode is 869, exploration_rate is 0.01283090141222608
Your highscore = 27 and Game episode is 870, exploration_rate is 0.012766746905164949
Your highscore = 94 and Game episode is 871, exploration_rate is 0.012702913170639124
Your highscore = 98 and Game episode is 872, exploration_rate is 0.012639398604785928
Your highscore = 26 and Game episode is 873, exploration_rate is 0.012576201611761997
Your highscore = 17 and Game episode is 874, exploration_rate is 0.012513320603703188
Your highscore = 20 and Game episode is 875, exploration_rate is 0.012450754000684672
Your highscore = 98 and Game episode is 876, exploration_rate is 0.012388500230681249
Your highscore = 100 and Game episode is 877, exploration_rate is 0.012326557729527843
Your highscore = 199 and Game episode is 878, exploration_rate is 0.012264924940880204
Your highscore = 103 and Game episode is 879, explora

Your highscore = 199 and Game episode is 964, exploration_rate is 0.00998645168764533
Your highscore = 195 and Game episode is 965, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 966, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 967, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 968, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 969, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 970, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 971, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 972, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 973, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 974, exploration_rate is 0.00998645168764533
Your highscore = 199 and Game episode is 975, explorat