# Deep Q Networks

Code can be found at: https://deeplizard.com/learn/video/PyQNfsGUnQA

## 1. Setup

In [1]:
%matplotlib inline
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image   
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

In [2]:
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython: from IPython import display

In [3]:
env = gym.make('CartPole-v0')
env.reset()

array([-0.01246179, -0.0496933 ,  0.04495671,  0.00026702])

## 2. Testing the environment without any AI 

Make sure to click the popup window quickly!

In [4]:
env.reset()
for _ in range(100):
    env.render()
    time.sleep(.2)
    env.step(env.action_space.sample())
    env.close()



## 3. Building the Network

#### DQN

In [5]:
class DQN(nn.Module):
    def __init__(self, img_height, img_width):
        super().__init__()
            
        # Pytorch refers to fully connected layers as "linear layers"
        # The first layer input is img_height*img_width times 3, for each RGB color
        self.fc1 = nn.Linear(in_features=img_height*img_width*3, out_features=24)   
        self.fc2 = nn.Linear(in_features=24, out_features=32)
        # The final layer has two outputs corresponding to the two actions the agent can take: right or left
        self.out = nn.Linear(in_features=32, out_features=2)
        
    def forward(self, t):
        # For any image tensor t, flatten that tensor
        t = t.flatten(start_dim=1)
        # Apply relu
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        return t
        

#### Experience

In [6]:
Experience = namedtuple(
    'Experience',
    ('state', 'action', 'next_state', 'reward')
)

#### Replay Memory

In [7]:
class ReplayMemory():
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.push_count = 0 # push count keeps track of how many experiences have been added to the memory
        
        def push(self, experience):
            if len(self.memory) < self.capacity:
                self.memory.append(experience)
            else:
                self.memory[self.push_count % self.capacity] = experience
            self.push_count += 1
        
        def sample(self, batch_size):
            return random.sample(self.memory, batch_size)
        
        def can_provide_sample(self, batch_size):
            return len(self.memory) >= batch_size

#### Strategy

In [8]:
class EpsilonGreedyStrategy():
    def __init__(self, start, end, decay):
        self.start = start
        self.end = end
        self.decay = decay
        
    def get_exploration_rate(self, current_step):
        return self.end + (self.start - self.end) * \
            math.exp(-1. * current_step * self.decay)

#### Reinforcement Learning Agent

In [9]:
class Agent():
    def __init__(self, strategy, num_actions):
        self.current_step = 0
        self.strategy = strategy
        self.num_actions = num_actions
        
    def select_action(self, state, policy_net):
        rate = strategy.get_exploration_rate(self.current_step)
        self.current_step += 1

        if rate > random.random(): # Explore if exploration rate exceeds a random value
            return random.randrange(self.num_actions) # explore      
        else:
            with torch.no_grad():
                return policy_net(state).argmax(dim=1).item() # exploit