In [4]:
import gym
import matplotlib.pyplot as plt

In [5]:
# AI for Autonomous Vehicles - Build a Self-Driving Car

# Importing the libraries

import os
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

# Creating the architecture of the Neural Network

class Network(nn.Module):
    
    def __init__(self, input_size, nb_action):
        super(Network, self).__init__()
        self.input_size = input_size
        self.nb_action = nb_action
        self.fc1 = nn.Linear(input_size, 45)
        self.fc2 = nn.Linear(45, nb_action)
    
    def forward(self, state):
        x = F.relu(self.fc1(state))
        q_values = self.fc2(x)
        return q_values

# Implementing Experience Replay

class ReplayMemory(object):
    
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
    
    def push(self, event):
        self.memory.append(event)
        if len(self.memory) > self.capacity:
            del self.memory[0]
    
    def sample(self, batch_size):
        samples = zip(*random.sample(self.memory, batch_size))
        return map(lambda x: Variable(torch.cat(x, 0)), samples)

# Implementing Deep Q-Learning

class Dqn(object):
    
    def __init__(self, input_size, nb_action, gamma):
        self.gamma = gamma
        self.model = Network(input_size, nb_action)
        self.memory = ReplayMemory(capacity = 100000)
        self.optimizer = optim.Adam(params = self.model.parameters())
        self.last_state = torch.Tensor(input_size).unsqueeze(0)
        self.last_action = 0
        self.last_reward = 0
    
    def select_action(self, state):
        probs = F.softmax(self.model(Variable(state))*100,dim=-1)
        action = probs.multinomial(len(probs))
        return action.item()
    
    def learn(self, batch_states, batch_actions, batch_rewards, batch_next_states):
        batch_outputs = self.model(batch_states).gather(1, batch_actions.unsqueeze(1)).squeeze(1)
        batch_next_outputs = self.model(batch_next_states).detach().max(1)[0]
        batch_targets = batch_rewards + self.gamma * batch_next_outputs
        td_loss = F.smooth_l1_loss(batch_outputs, batch_targets)
        self.optimizer.zero_grad()
        td_loss.backward()
        self.optimizer.step()
    
    def update(self, new_state, new_reward):
        new_state = torch.Tensor(new_state).float().unsqueeze(0)
        self.memory.push((self.last_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward]), new_state))
        new_action = self.select_action(new_state)
        if len(self.memory.memory) > 100:
            batch_states, batch_actions, batch_rewards, batch_next_states = self.memory.sample(100)
            self.learn(batch_states, batch_actions, batch_rewards, batch_next_states)
        self.last_state = new_state
        self.last_action = new_action
        self.last_reward = new_reward
        return new_action
    
    def save(self):
        torch.save({'state_dict': self.model.state_dict(),
                    'optimizer' : self.optimizer.state_dict(),
                   }, 'last_brain.pth')
    
    def load(self):
        if os.path.isfile('last_brain.pth'):
            print("=> loading checkpoint... ")
            checkpoint = torch.load('last_brain.pth')
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            print("done !")
        else:
            print("no checkpoint found...")

# Constantes

In [6]:
args = {'gamma' : 0.9,
        'batch_size' : 32,
        'input_size' : 2,
        'n_actions'  : 3}

# Denição do cérebro

# Geração do jogo

In [7]:
env = gym.make('MountainCar-v0',new_step_api=True)

In [8]:
print('State space: ', env.observation_space)
print(env.observation_space.low)
print(env.observation_space.high)

State space:  Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
[-1.2  -0.07]
[0.6  0.07]


In [9]:
print('State space: ', env.action_space)

State space:  Discrete(3)


In [10]:
state = env.reset()
reward = 0

In [11]:
brain = Dqn(2,3,0.9)

In [12]:
action = brain.update(state,reward)
print(action)

2


In [13]:
class GameRunner:
    def __init__(self, model, env, render=False):
        self.env = env
        self.model = model
        self.render = render
        self.steps = 0
        self.list_steps = []
        self.reward_store = []
        self.max_x_store = []
    def run(self):
        state = self.env.reset()
        reward = 0
        action = self.model.update(state,reward)
        tot_reward = 0
        max_x = -100
        self.list_steps = []
        while True:
            if self.render:
                self.env.render()
            self.steps += 1
            next_state, reward, done, info, _ = env.step(action)
            if next_state[0] >= -0.25:
                reward += 1  
            elif next_state[0] >= 0.1:
                reward += 1
            elif next_state[0] >= 0.25:
                reward += 1  
            elif next_state[0] >= 0.5:
                reward += 200
            action = self.model.update(next_state,reward)
            
            if next_state[0] > max_x:
                max_x = next_state[0]
                #print(max_x)
            if max_x > 0.5:
                print("You Win")
            # is the game complete? If so, set the next state to
            # None for storage sake
            if done or self.steps > 1000:
                next_state = None

            # move the agent to the next state and accumulate the reward
            state = next_state
            tot_reward += reward

            # if the game is done, break the loop
            if done or self.steps > 1000:
                self.reward_store.append(tot_reward)
                self.max_x_store.append(max_x)
                self.list_steps.append(self.steps)
                print("Step {}, Total reward: {}, Max: ".format(self.steps, tot_reward,max_x))
                if self.steps < 180:
                    self.model.save()
                self.steps = 0
                break

In [14]:
steps = [];
gr = GameRunner(model = brain,env = env,render=False)
for i in range(20000):
    gr.run()

Step 1001, Total reward: -602.0, Max: 
Step 1001, Total reward: -908.0, Max: 
You Win
Step 786, Total reward: -549.0, Max: 
You Win
Step 831, Total reward: -805.0, Max: 
You Win
Step 172, Total reward: -129.0, Max: 
You Win
Step 101, Total reward: -69.0, Max: 
You Win
Step 179, Total reward: -124.0, Max: 
You Win
Step 188, Total reward: -145.0, Max: 
You Win
Step 173, Total reward: -111.0, Max: 
You Win
Step 165, Total reward: -118.0, Max: 
You Win
Step 181, Total reward: -112.0, Max: 
You Win
Step 172, Total reward: -124.0, Max: 
You Win
Step 202, Total reward: -108.0, Max: 
You Win
Step 142, Total reward: -102.0, Max: 
You Win
Step 180, Total reward: -124.0, Max: 
You Win
Step 177, Total reward: -129.0, Max: 
You Win
Step 170, Total reward: -109.0, Max: 
You Win
Step 105, Total reward: -72.0, Max: 
You Win
Step 173, Total reward: -152.0, Max: 
You Win
Step 142, Total reward: -105.0, Max: 
You Win
Step 170, Total reward: -118.0, Max: 
You Win
Step 159, Total reward: -115.0, Max: 
You 

KeyboardInterrupt: 

In [None]:
plt.plot(gr.max_x_store)

In [None]:
plt.plot(gr.list_steps)

In [15]:
brain.load()
gr = GameRunner(model = brain,env = env,render=True)

=> loading checkpoint... 
done !


In [25]:
gr.run()

You Win
Step 138, Total reward: -87.0, Max: 
