# model.py

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class QNetwork(nn.Module):
  """Actor (Policy) Model."""
  
  def __init__(self, state_size, action_size, seed):
    """Initialize parameters and build model.
    Params
    ======
      state_size (int): Dimension of each state
      action_size (int): Dimension of each action
      seed (int): Random seed
    """
    super(QNetwork, self).__init__()
    self.seed = torch.manual_seed(seed)
    "*** YOUR CODE HERE ***"
    
  def forward(self, state):
    """Build a network that maps state -> action values."""
    pass
  

# dqn_agent.py

In [6]:
import numpy as np
import random
from collections import namedtuple, deque

# from model import QNetwork

import torch
import torch.nn.functional as F
import torch.optim as optim

BUFFER_SIZE = int(1e5)
BATCH_SIZE = 64
GAMMA = 0.99
TAU = 1e-3
LR = 5e-4
UPDATE_EVERY = 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class Agent():
  """Interacts with and learns from the environment."""
  
  def __init__(self, state_size, action_size, seed):
    """Initialize an Agent object.
    Params
    ======
      state_size (int): dimension of each state
      action_size (int): dimension of each action
      seed (int): random seed
    """
    self.state_size = state_size
    self_action_size = action_size
    self.seed = random.seed(seed)
    
    # Q-Network
    self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
    self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
    self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
    
    # Replay memory
    self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
    self.t_step = 0
    
  def step(self, state, action, reward, next_state, done):
    # Save experience in replay memory
    self.memory.add(state, action, reward, next_state, done)
    
    # Learn every UPDATE_EVERY time steps
    self.t_step = (self.t_step + 1) % UPDATE_EVERY
    if self.t_step == 0:
      # If enough samples are available in memory, geet random subset and learn
      if len(self.memory) > BATCH_SIZE:
        experiences = self.memory.sample()
        self.learn(experiences, GAMMA)
        
  def act(self, state, eps=0.):
    """Returns actions for given state as per current policy.
    
    Params
    ======
      state (array_like): current state
      eps (float): epsilon, for epsilon-greedy actio selection
    """
    state = torch.from_numpy(state).float().unsqueeze(0).to(device)
    self.qnetwork_local.eval()
    with torch.no_grad():
      action_values = self.qnetwork_local(state)
    self.qnetwork_local.train()
    
    if random.random() > eps:
      return np.argmax(action_values.cpu().data.numpy())
    else:
      return random.choice(np.arange(self.action_size))
    
  def learn(self, experiences, gamma):
    """Update value parameters using give batch of experience tuples.
    
    Params
    ======
      experiences (Tuple[torch.Variable]): tuple of (s, a, r, s', done) tuples
      gamma (float): discount factor
    """
    states, actions, rewards, next_states, dones = experiences
    
    ## TODO: compute and minimize the loss
    "*** YOUR CODE HERE ***"
    
    # -------------------- update target network ----------------------- #
    self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)
    
  def soft_update(self, local_model, target_model, tau):
    """Soft update model parameters.
    θ_target = τ*θ_local + (1 - τ)*θ_target
    
    
    Params
    ======
      local_model (PyTorch model): weights will be copied from
      target_model (PyTorch model): weights will be copied to
      tau (float): interpolation parameter
    """
    for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
      target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)

      
class ReplayBuffer:
  """Fixed-size buffer to store experience tuples."""
  
  def __init__(self, action_size, buffer_size, batch_size, seed):
    """Initialize a ReplayBuffer object
    
    Params
    ======
      action_size (int): dimension of each action
      buffer_size (int): maximum size of buffer
      batch_size (int): size of each training batch
      seed (int): random seed
    """
    self.action_size = action_size
    self.memory = deque(maxlen=buffer_size)
    self.batch_size = batch_size
    self.experience = namedtuple('Experience', field_names=['state', 'action', 'reward', 'next_state', 'done'])
    self.seed = random.seed(seed)
    
  def add(self, state, action, reward, next_state, done):
    """Add a new expereince to memory."""
    e = self.experience(state, action, reward, next_state, done)
    self.momory.append(e)
    
  def sample(self):
    """Randomly sample a batch of experience from memory."""
    experiences = random.sampe(self.memory, k=self.batch_size)
    
    states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
    actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
    rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
    next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
    dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
    
    return (states, actions, rewards, next_states, dones)
  
  def __len__(self):
    """Return the current size of internal memory."""
    return len(self.memory)

# Deep Q-Network (DQN)
In this notebook, you will implement a DQN agent with OpenAI Gym's LunarLander-v2 environment

### 1. Import the Necessary Packages

In [11]:
import gym
# !pip3 install box2d
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

# !python -m pip install pyvirtualdisplay
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

is_ipython = 'inline' in plt.get_backend()
if is_ipythoh:
  from IPython import display
  
plt.ion()

EasyProcessCheckInstalledError: cmd=['Xvfb', '-help']
OSError=[Errno 2] No such file or directory: 'Xvfb'
Program install error! 