# Navigation

---

In this notebook, you will learn how to use the Unity ML-Agents environment for the first project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893).

### 1. Start the Environment

We begin by importing some necessary packages.  If the code cell below returns an error, please revisit the project instructions to double-check that you have installed [Unity ML-Agents](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md) and [NumPy](http://www.numpy.org/).

In [1]:
from unityagents import UnityEnvironment
import numpy as np

In [2]:
env = UnityEnvironment(file_name="p1_navigation/Banana_Linux_NoVis/Banana.x86_64")

UnityTimeOutException: The Unity environment took too long to respond. Make sure that :
	 The environment does not need user interaction to launch
	 The Academy and the External Brain(s) are attached to objects in the Scene
	 The environment and the Python interface have compatible versions.

In [None]:
# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))

In [None]:
import torch 
from collections import deque

def train(agent, n_episodes, max_steps, eps_start, eps_end, eps_decay, success_reward):
    
    # Reset scores
    scores = []                       
    scores_window = deque(maxlen=100) 

    # Initialize epsilon
    eps = eps_start

    # Training loop                
    for i_episode in range(1, n_episodes+1):
        
        # Reset environment and get first   
        env_info = env.reset(train_mode=True)[brain_name]
        
        # Get initial state
        state = env_info.vector_observations[0]
    
        # Loop over step by step 
        score = 0
        for t in range(max_steps):

            # Epsilon-greedy action selection
            action = agent.act(state, eps)

            # Take action
            env_info = env.step(int(action))[brain_name]

            # Get next state
            next_state = env_info.vector_observations[0]   

            # Get reward
            reward = env_info.rewards[0]

            # Get done              
            done = env_info.local_done[0]  

            # Take one step (add experience & learn)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
            
        scores_window.append(score)       
        scores.append(score)              
        eps = max(eps_end, eps_decay*eps)

        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        
        # If the we reached the minimum success reward -> Environment solved, save model
        if np.mean(scores_window)>= success_reward:
            print('\nEnvironment solved in {:d} episodes!')
            print('Average Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
            break

    return scores

In [None]:
# Reset environment
env_info = env.reset(train_mode=True)[brain_name] # reset the environment

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
# Agent 
GAMMA = 0.99            # Discount factor
TAU = 1e-3              # For soft update of target parameters
LR = 5e-4               # Learning rate
UPDATE_EVERY = 8        # How often to update the network

BUFFER_SIZE = int(1e5)  # Replay buffer size
BATCH_SIZE = 128         # Minibatch size

from dqn_agent import Agent
agent = Agent(state_size, action_size, GAMMA, TAU, LR, UPDATE_EVERY, BUFFER_SIZE, BATCH_SIZE)

In [None]:
# Training 
N_EPISODES = 2000
MAX_STEPS = 2000
EPSILON_START = 1.0
EPSILON_END = 0.01
EPSILON_DECAY = 0.99

SUCCESS_REWARD = 14

scores = train(agent, N_EPISODES, MAX_STEPS, EPSILON_START, EPSILON_END, EPSILON_DECAY, SUCCESS_REWARD)

# Plot the scores
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
# plt.savefig('training_best.png')
plt.show()

In [None]:
def test(self, env, brain_name,filename):
    weights = torch.load(filename)
    self.qnetwork_local.load_state_dict(weights)
    self.qnetwork_target.load_state_dict(weights)
        
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]

    for t in range(2000):
        action = self.act(state, 0)
        env_info = env.step(action.astype(np.int32))[brain_name] 
        next_state = env_info.vector_observations[0]         
        done = env_info.local_done[0]      
        state = next_state

        if done:
            break 

In [None]:

# Create environment (this time with visualization)
env = UnityEnvironment(file_name="p1_navigation/Banana_Linux/Banana.x86_64")

# Get brain name
brain_name = env.brain_names[0]

test(env, brain_name, "checkpoint.pth")


In [None]:

# Reset the environment for training
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# Number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# Examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

# Create the agent
agente_bananero = Agent(state_size, action_size)

# Train the agent