# Navigation Project

---


### 1. Import Necessary Packages

In [1]:
from unityagents import UnityEnvironment
import numpy as np
from collections import deque
import os
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Load and explore the environment

The simulation contains a single agent that navigates a large environment.  At each time step, it has four actions at its disposal:
- `0` - walk forward 
- `1` - walk backward
- `2` - turn left
- `3` - turn right

The state space has `37` dimensions and contains the agent's velocity, along with ray-based perception of objects around agent's forward direction.  A reward of `+1` is provided for collecting a yellow banana, and a reward of `-1` is provided for collecting a blue banana. 

In [None]:
# load the environment
env = UnityEnvironment(file_name="/home/taylor/Classes/deep_rl/projects/deep-reinforcement-learning/p1_navigation/Banana_Linux/Banana.x86_64")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

#get the current state
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

### 3. Load the agent

In [None]:
from dqn_agent import Agent
#initialize the agent - # of states, actions, and a seed of 0
agent = Agent(state_size=37, action_size=4, seed=2)

### 4. Train the agent

In [None]:
def dqn(n_episodes = 2000, max_t = 1000, eps_start = 1.0, eps_end = 0.01, eps_decay = 0.995):
    """ Deep Q learning for Navigations Project
    Params:
        n_episodes (int): max n umber of training episodes
        max_t (int): max n umber of time steps per episode
        eps_start (oat): startign vbalue for epsilon, for the epsilon greedy action section
        eps_end (float): min value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon    
    """
    scores = [] # list containing scores for each episode
    scores_window = deque(maxlen=100) # last 100 scores
    eps = eps_start # initialize epsilon
    score = 0
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    reward = 0
    for i_episode in range(1, n_episodes +1): 
        
        for t in range(max_t):
            action = agent.act(state, eps)                 # choose an action                 
            env_info = env.step(action)[brain_name]        # send the action to the environment
            next_state = env_info.vector_observations[0]   # get the next state
            reward = env_info.rewards[0]                   # get the reward
            done = env_info.local_done[0]                  # see if episode has finished
            score+= reward                                 # increment the score
            if done:                                       # if you have reached the goal, then exit the loop
                break

                        
            agent.step(state, action, reward,  next_state, done) # step into next state and return the resulting state
            state = next_state
            
            scores_window.append(score) #save most recent score
            scores.append(score) # save most recent score
            eps = max(eps_end, eps_decay*eps) #decrease epsilon
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
            if i_episode % 100 == 0:
                print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            if np.mean(scores_window)>=1.0:
                print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
                torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
                break
        return scores

scores = dqn()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

env.close()

