# Navigation Project report

A number of different agents were created to solve the 'Banana Navition' game. These were:

1. Basic Deep Queue Network using experience replay, using a linear neural network model
2. Similar to 1, but including a 'Double' network
3. Building on 2, but also including a 'Dueling' network
4. Prioritised experience replay

All of these implementations 'solved' the solution in so much as they achieved an average score over 100 episodes > 13. Results of these are shown below


## Learning algorithm
Something about the learning algorithms

## Plot of rewards Rewards

In [1]:
from unityagents import UnityEnvironment
import numpy as np
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64")
#env = UnityEnvironment(file_name="Banana.app")

import torch
from lib.agents import AgentExerperienceReplay, AgentPrioritizedExperience

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

import matplotlib.pyplot as plt
%matplotlib inline

from lib.dqn import dqn

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)


agents = []

agent = AgentExerperienceReplay(state_size=state_size, action_size=action_size, seed=0, train_mode=False)
#agent.load_model('qnetwork')
agents.append({'agent': agent, 
               'name': 'qnetwork', 
               'test': False,
              })

agent = AgentPrioritizedExperience(state_size=state_size, action_size=action_size, seed=0,
                                   prioritized_experience=True,
                                   compute_weights=True,
                                   train_mode=False)


agents.append({'agent': agent, 'name': 'ddqn_with_prioritized_experiences_qnetwork', 'test': True})

agent = AgentPrioritizedExperience(state_size=state_size, action_size=action_size, seed=0,
                                   prioritized_experience=True,
                                   compute_weights=True,
                                   train_mode=False)


agents.append({'agent': agent, 'name': 'dueling_ddqn_with_prioritized_experiences_qnetwork', 'test': True})

for info in agents:
    if info['test']:
        print('Testing', info['name'])
        info['agent'].load_model(info['name'])
        info['scores'] = dqn(env,
                             brain_name,
                             info['agent'], 
                             train_mode=False, 
                             n_episodes=100, 
                             eps_start=0.01,
                             checkpoint=13)




INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of actions: 4
Number of agents: 1
States look like: [1.         0.         0.         0.         0.84408134 0.
 0.         1.         0.         0.0748472  0.         1.
 0.         0.         0.25755    1.         0.         0.
 0.         0.74177343 0.         1.         0.         0.
 0.25854847 0.         0.         1.         0.         0.09355672
 0.         1.         0.         0.         0.31969345 0.
 0.        ]
States have length: 37
Testing ddqn_with_prioritized_experiences_qnetwork
Episode 5	Average Score: 16.00

KeyboardInterrupt: 

In [1]:
from unityagents import UnityEnvironment
import numpy as np
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64")
#env = UnityEnvironment(file_name="Banana.app")

import torch
from lib.agents import AgentExerperienceReplay, AgentPrioritizedExperience

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

import matplotlib.pyplot as plt
%matplotlib inline

from lib.dqn import dqn

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of actions: 4
Number of agents: 1
States look like: [1.         0.         0.         0.         0.84408134 0.
 0.         1.         0.         0.0748472  0.         1.
 0.         0.         0.25755    1.         0.         0.
 0.         0.74177343 0.         1.         0.         0.
 0.25854847 0.         0.         1.         0.         0.09355672
 0.         1.         0.         0.         0.31969345 0.
 0.        ]
States have length: 37


In [2]:
agent = AgentPrioritizedExperience(state_size=state_size, action_size=action_size, seed=0,
                                   prioritized_experience=True,
                                   compute_weights=True,
                                   train_mode=True)

agents = []

agents.append({'agent': agent, 'name': 'ddqn_with_prioritized_experiences_qnetwork', 'test': False})

info = agents[0]
scores = dqn(env,
             brain_name,
                             info['agent'], 
                             train_mode=True, 
                             n_episodes=100, 
                             eps_start=0.01,
                             checkpoint=13)

Episode 3	Average Score: 1.330

KeyboardInterrupt: 

### Basic Deep Queue Network results

In [None]:
scores = agents[0]['scores']
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()


### Double Deep Queue Network results

In [None]:
scores = agents[1]['scores']
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

### Dueling double deep queue network results

In [None]:
scores = agents[2]['scores']
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()