# Continuous Control - Markus Bader's verison

---

In this notebook, you will learn how to use the Unity ML-Agents environment for the second project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893) program.


### My Turn!

In [None]:
from unityagents import UnityEnvironment
import numpy as np
env = UnityEnvironment(file_name='./Reacher_Linux/Reacher.x86_64')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
from ddpg_agent import Agent



env_info = env.reset(train_mode=True)[brain_name]
num_agents = len(env_info.agents)
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)

state_size = states.shape[1]
action_size = brain.vector_action_space_size
agent = Agent(state_size=state_size, action_size=action_size, random_seed=10)


In [None]:
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
import torch

actor_state_dict = torch.load('checkpoint_actor_finished.pth')
critic_state_dict = torch.load('checkpoint_critic_finished.pth')
agent.actor_local.load_state_dict(actor_state_dict)
agent.critic_local.load_state_dict(critic_state_dict)

In [None]:

        
env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)
while True:
    action = agent.act(states[0])
    actions = np.clip(action, -1, 1)                 # all actions between -1 and 1
    env_info = env.step(actions)[brain_name]           # send all actions to tne environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = env_info.rewards                         # get reward (for each agent)
    dones = env_info.local_done                        # see if episode finished
    scores += env_info.rewards                         # update the score (for each agent)
    states = next_states                               # roll over states to next time step
    if np.any(dones):                                  # exit loop if episode finished
        break
print('Total score (averaged over agents) this episode: {}'.format(np.mean(scores)))
