# Navigation

This notebook contains the solution to the navigation problem in the udacity deep reinforcement learning program. 

# 1. Importing dependencies and starting the environment

We will start by importing the required dependencies for this project, this project contains two python classes, one for the model and one of the RL agent

In [1]:
from unityagents import UnityEnvironment
import numpy as np
from collections import deque

from plot import Plot
import sys
print(sys.version)

3.6.6 |Anaconda, Inc.| (default, Jun 28 2018, 11:27:44) [MSC v.1900 64 bit (AMD64)]


Next we will load the unity environment and get the default brain

In [None]:
env = UnityEnvironment(file_name="./Banana_Windows_x86_64/Banana.app")

In [None]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

print("Using the default brain name {}".format(brain_name))

### 2. Examine the State and Action Spaces

The simulation contains a single agent that navigates a large environment.  At each time step, it has four actions at its disposal:
- `0` - walk forward 
- `1` - walk backward
- `2` - turn left
- `3` - turn right

The state space has `37` dimensions and contains the agent's velocity, along with ray-based perception of objects around agent's forward direction.  A reward of `+1` is provided for collecting a yellow banana, and a reward of `-1` is provided for collecting a blue banana. 


In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

### 3. Create the train dqn agent 

In [2]:
import torch
def dqn(agent, agent_type, env, brain_name, num_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    
    eps=eps_start
    score = 0
    score_window = deque(maxlen=100)
    scores = []
    
    for i_episode in range(1, num_episodes+1):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps).astype(int)
            
            env_info = env.step(action)[brain_name]        # send the action to the environment
            next_state = env_info.vector_observations[0]   # get the next state
            reward = env_info.rewards[0]                   # get the reward
            done = env_info.local_done[0] 
            
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
                
        score_window.append(score)
        scores.append(score)
            
        eps = max(eps_end, eps_decay*eps)
            
        #print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(score_window)), end="")
            
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(score_window)))
        
        if np.mean(score_window)>=13.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(score_window)))
            torch.save(agent.qnet_local.state_dict(), agent_type)
            break
    
    return scores

In [3]:
def reset_env():
    env = UnityEnvironment(file_name="./Banana_Windows_x86_64/Banana.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    state_size = len(env_info.vector_observations[0])
    num_actions = brain.vector_action_space_size
    return env, env_info, state_size, num_actions, brain_name
    

In [6]:
from agent import Agent

env, env_info, state_size, num_actions, brain_name = reset_env()

vanilla_dqn = Agent(state_size,  num_actions, use_double_q=False, use_dueling_net=False)

# Train the agent
print("Training Vanilla DQN")
env.reset(train_mode=True)[brain_name]
scores = dqn(vanilla_dqn,"Vanilla",  env, brain_name)

plt = Plot(scores_vanilla_dqn, scores_double_dqn, scores_dueling_dqn, scores_double_due ling_dqn)
plt.make_plot()
env.close()

OSError: handle is closed

ERROR:root:Exception calling application: [WinError 232] The pipe is being closed
Traceback (most recent call last):
  File "c:\program files (x86)\microsoft visual studio\shared\anaconda3_64\envs\rl\lib\site-packages\grpc\_server.py", line 385, in _call_behavior
    return behavior(argument, context), True
  File "c:\program files (x86)\microsoft visual studio\shared\anaconda3_64\envs\rl\lib\site-packages\unityagents\rpc_communicator.py", line 25, in Exchange
    self.child_conn.send(request)
  File "c:\program files (x86)\microsoft visual studio\shared\anaconda3_64\envs\rl\lib\multiprocessing\connection.py", line 206, in send
    self._send_bytes(_ForkingPickler.dumps(obj))
  File "c:\program files (x86)\microsoft visual studio\shared\anaconda3_64\envs\rl\lib\multiprocessing\connection.py", line 280, in _send_bytes
    ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closed


In [None]:
from agent import Agent

# Get the environment params
env, env_info, state_size, num_actions, brain_name = reset_env()

double_dqn = Agent(state_size,  num_actions, use_double_q=True, use_dueling_net=False)

print("Training Double DQN")
env.reset(train_mode=True)[brain_name]
scores_double_dqn = dqn(double_dqn, "Double DQN", env, brain_name)

In [None]:
from agent import Agent

# Get the environment params
env, env_info, state_size, num_actions, brain_name = reset_env()

dueling_dqn = Agent(state_size,  num_actions, use_double_q=False, use_dueling_net=True)

print("Training Dueling DQN")
env.reset(train_mode=True)[brain_name]
scores_dueling_dqn = dqn(dueling_dqn, "Dueling DQN", env, brain_name)

In [4]:
from agent import Agent

# Get the environment params
env, env_info, state_size, num_actions, brain_name = reset_env()

double_dualing_dqn = Agent(state_size,  num_actions, use_double_q=True, use_dueling_net=True)

print("Training Double Dualing DQN")

scores_double_dualing_dqn = dqn(double_dualing_dqn, "Double Dualing DQN", env, brain_name)

env.close()

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


THE DEVICE IS
cuda:0
Dueling_DQN(
  (h1): Linear(in_features=37, out_features=10, bias=True)
  (h2): Linear(in_features=10, out_features=10, bias=True)
  (adv): Linear(in_features=10, out_features=10, bias=True)
  (val): Linear(in_features=10, out_features=10, bias=True)
  (adv2): Linear(in_features=10, out_features=4, bias=True)
  (val2): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)
Training Double Dualing DQN
Episode 100	Average Score: 0.00
Episode 200	Average Score: 0.75
Episode 300	Average Score: 2.14
Episode 400	Average Score: 3.60
Episode 500	Average Score: 6.52
Episode 600	Average Score: 7.06
Episode 700	Average Score: 7.63
Episode 800	Average Score: 8.88
Episode 900	Average Score: 7.74
Episode 1000	Average Score: 8.32
Episode 1100	Average Score: 8.16
Episode 1200	Average Score: 9.76
Episode 1300	Average Score: 8.35
Episode 1400	Average Score: 7.85
Episode 1500	Average Score: 8.15
Episode 1600	Average Score: 8.84
Episode 1700	Average Score: 8.78
Episode 1

### 4. Plot the results from training



In [None]:
from plot import Plot

In [None]:
plt = Plot(scores_vanilla_dqn, scores_double_dqn, scores_dueling_dqn, scores_double_due ling_dqn)
plt.make_plot()

In [None]:
env.close()