In [1]:
import gym 
import numpy as np
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
import warnings
import matplotlib.pyplot as plt

In [2]:
warnings.filterwarnings("ignore")

In [3]:
env = gym.make("MountainCar-v0")

In [4]:
class Agent:
    def __init__(self,state_size,action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = 0.95
        self.epsilon = 1.00
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate= 0.001
        self.memory = deque(maxlen = 2000)
        self.model = self.create_model()
        
    def create_model(self):
        model = Sequential()
        model.add(Dense(16,activation = "relu",input_dim = self.state_size))
        model.add(Dense(32,activation = "relu"))
        model.add(Dense(self.action_size))
        model.compile(loss = "mse",optimizer = Adam(lr = 0.001))
        return model
    
    def remember(self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward,next_state,done))
    
    def act(self,state):
        if np.random.rand()<= self.epsilon:
            return random.randrange(self.action_size)
        else:
            return np.argmax(self.model.predict(state)[0])
        
    def train(self,batch_size):
        minibatch = random.sample(self.memory,batch_size)
        for experience in minibatch:
            state , action,reward,next_state,done = experience
            
            if not done:
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
            else:
                target = reward   
            target_f = self.model.predict(state)
            target_f[0][action] = target 
            
            self.model.fit(state,target_f,epochs = 1,verbose= 0)
        if self.epsilon >self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self,name):
        self.model.load_weights(name)
        
    def save(self,name):
        self.model.save_weights(name)
        
            

In [5]:
state_size = 2
action_size = 3
agent = Agent(state_size,action_size)
n_episodes = 500
done = False

In [None]:
score_list = []
for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state,[1,state_size])
    score = 0.0
    for time in range(200):
        
        env.render()
        
        action = agent.act(state) #action is 0 or 1

        next_state,reward,done,other_info = env.step(action) 
        
        if next_state[0]>state[0][0] and next_state[1]>0 and state[0][1]>0:
            reward= reward +20
        elif next_state[0]<state[0][0] and next_state[1]<= 0 and state[0][1]<= 0:
            reward = reward +20
            
        if done :
            reward += 3000
        else :
            reward -= 30
            
        next_state = np.reshape(next_state,[1,state_size])
        agent.remember(state,action,reward,next_state,done)
        state = next_state
        score += reward
       
        if done:
            score_list.append(score)
            print("Game Episode :{}/{}, High Score:{:.2f},Exploration Rate:{:.2}".format(e,n_episodes,score,agent.epsilon))
            agent.train(128)
            agent.save("mountaincar1.h5")
            break
env.close()       
plt.plot(score_list)
plt.show()

Game Episode :0/500, High Score:2690.00,Exploration Rate:1.0
Game Episode :1/500, High Score:2590.00,Exploration Rate:0.99
Game Episode :2/500, High Score:2570.00,Exploration Rate:0.99
Game Episode :3/500, High Score:2670.00,Exploration Rate:0.99
Game Episode :4/500, High Score:2630.00,Exploration Rate:0.98
Game Episode :5/500, High Score:2650.00,Exploration Rate:0.98
Game Episode :6/500, High Score:2550.00,Exploration Rate:0.97
Game Episode :7/500, High Score:2410.00,Exploration Rate:0.97
Game Episode :8/500, High Score:2550.00,Exploration Rate:0.96
Game Episode :9/500, High Score:2510.00,Exploration Rate:0.96
Game Episode :10/500, High Score:2430.00,Exploration Rate:0.95
Game Episode :11/500, High Score:2650.00,Exploration Rate:0.95
Game Episode :12/500, High Score:2530.00,Exploration Rate:0.94
Game Episode :13/500, High Score:2630.00,Exploration Rate:0.94
Game Episode :14/500, High Score:2630.00,Exploration Rate:0.93
Game Episode :15/500, High Score:2650.00,Exploration Rate:0.93
Gam