In [None]:
from unityagents import UnityEnvironment
import numpy as np

In [None]:
from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
env = UnityEnvironment(file_name="env/Banana_Linux_NoVis/Banana.x86", base_port=64738, worker_id=1, seed=1)

Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. Here we check for the first brain available, and set it as the default brain we will be controlling from Python.

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
def plot(rewards, losses, action_takens):
    clear_output(True)
    plt.figure(figsize=(20,5))

    plt.subplot(131)
    plt.title('rewards')
    plt.plot(rewards)

    plt.subplot(132)
    plt.title('loss')
    plt.plot(losses)

    plt.subplot(133)
    unique, counts = np.unique(action_takens, return_counts=True)
    plt.bar(unique, counts/np.sum(counts))
    plt.title("Action distribution")

    plt.show()

In [None]:
from agent import DqnAgent
from unityagents import UnityEnvironment
import numpy as np

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size
state = env_info.vector_observations[0]

update_every = 10
learn_every = 5

agent = DqnAgent(state_size=len(state), action_size=action_size)

scores = []
loses = []
action_takens = []

for episode in range(3000):
    done = False
    score = 0
    loss = 0
    action_taken = []

    env_info = env.reset(train_mode=True)[brain_name]
    action_size = brain.vector_action_space_size
    state = env_info.vector_observations[0]

    while True:
        action = agent.act(state)                     # ask agent action selection
        env_info = env.step(action)[brain_name]                # send the action to the environment

        next_state = env_info.vector_observations[0]           # get the next state
        reward = env_info.rewards[0]                           # get the reward
        done = env_info.local_done[0]                          # see if episode has finished
        
        transition_info = (state, action, reward, next_state, done)
        agent.memory.add_recollection(transition_info)

        score += reward
        action_taken.append(action)

        if done:
            scores.append(score)
            loses.append(loss)
            action_takens += action_taken
            break

        state = next_state        

    if episode % learn_every == 0:
        samples = agent.memory.sample()
        loss += agent.learn(samples)

    if episode % update_every == 0:
        plot(scores, loses, action_takens)
        print('Episode: {} - '.format(episode), end="")
        print('Averaged Score of the last {} episodes : {}'.format(update_every, np.mean(scores[-update_every:]).round(2)))

env.close()