# Deep Q-Network (DQN)
---
In this notebook, you will implement a DQN agent with OpenAI Gym's LunarLander-v2 environment.

### 1. Import the Necessary Packages

In [1]:
import retro
from retro_contest.local import make
from sonic_util import make_env
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Instantiate the Environment and Agent

Initialize the environment in the code cell below.

In [2]:
env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
# env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1', record=True)
# env = make_env(stack=False, scale_rew=False)

In [3]:
# env.seed(0)
state_space = list(env.observation_space.shape)
action_space = env.action_space.n
print('State shape: ', state_space)
print('Number of actions: ', (1, action_space))

State shape:  [224, 320, 3]
Number of actions:  (1, 12)


Please refer to the instructions in `Deep_Q_Network.ipynb` if you would like to write your own DQN agent.  Otherwise, run the code cell below to load the solution files.

In [4]:
from dqn_agent import Agent

agent = Agent(state_size=state_space, action_size=action_space, seed=0, multi_action=True)

cuda:1
QNetwork(
  (conv1): Conv2d(3, 20, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(4, 4), stride=(1, 1))
  (fc1): Linear(in_features=1390400, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=12, bias=True)
)
QNetwork(
  (conv1): Conv2d(3, 20, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(4, 4), stride=(1, 1))
  (fc1): Linear(in_features=1390400, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=12, bias=True)
)


In [None]:
# load the weights from file
agent.qnetwork_local.load_state_dict(torch.load('step_checkpoint.pth'))

In [None]:
# watch an untrained agent
window = []
n_epsiode = 10
for _ in range(n_epsiode):
    state = env.reset()
    total_score = 0
    for j in range(4500):
        state = state.reshape(state_space[2], state_space[0], state_space[1])
    #     print(state.shape)
        action = agent.act(state)
    #     print(action)
#         env.render()
        state, reward, done, _ = env.step(action)
        total_score += reward
        if done:
            break
    window.append(total_score)
    print('Total score for this episode {:.4f}'.format(total_score))
print('Avg score {.4f}'.format(np.mean(window)))

Total score for this episode 2321.8373


In [None]:
# watch an untrained agent
window = []
n_epsiode = 10
for _ in range(n_epsiode):
    state = env.reset()
    total_score = 0
    for j in range(4500):
        state = state.reshape(state_space[2], state_space[0], state_space[1])
    #     print(state.shape)
        if np.random.uniform() < 0.9:
            action = agent.act(state)
        else:
            action = env.action_space.sample()
    #     print(action)
#         env.render()
        state, reward, done, _ = env.step(action)
        total_score += reward
        if done:
            break
    window.append(total_score)
    print('Total score for this episode {:.4f}'.format(total_score))
print('Avg score {.4f}'.format(np.mean(window)))