# Continuous Control

---


### Start the Environment

Run the next code cell to install a few packages. This might take a few minutes to run.

In [1]:
import numpy as np
import gym
import random
import torch
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

In [2]:

import numpy as np


# select this option to load version 1 (with a single agent) of the environment
# env = UnityEnvironment(file_name='/data/Reacher_One_Linux_NoVis/Reacher_One_Linux_NoVis.x86_64')

# select this option to load version 2 (with 20 agents) of the environment
env = gym.make("BipedalWalker-v3", render_mode='human')

state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
num_agents = 1



### Train a DDPG agent

Run the code cells below to train the agent.

In [3]:
agent = Agent(state_size=state_size, action_size=action_size, num_agents=num_agents, random_seed=np.random.randint(100))

In [6]:

def ddpg(n_episodes=10000, max_t=10000, print_every=30):
    scores_deque = deque(maxlen=print_every)
    scores = []
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        score = np.zeros(num_agents)
        for t in range(max_t):
            actual_state = state[0] if isinstance(state, tuple) else state
            state = actual_state
            action = agent.act(actual_state)
   
            next_state, reward, done, info,_ = env.step(action)
             

            
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_deque.append(np.mean(score))
        scores.append(np.mean(score))
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
        print('\rEpisode {}\t Score: {:.2f}'.format(i_episode, np.mean(score)))
        if i_episode % print_every == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
            
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 1	 Score: -176.88-176.88
Episode 2	 Score: -114.24-145.56
Episode 3	 Score: -130.07-140.39
Episode 4	 Score: -162.61-145.95
Episode 5	 Score: -117.59-140.28
Episode 6	 Score: -161.52-143.82
Episode 7	 Score: -152.78-145.10
Episode 8	 Score: -130.06-143.22
Episode 9	 Score: -113.65-139.93
Episode 10	 Score: -197.49-145.69
Episode 11	 Score: -130.18-144.28
Episode 12	 Score: -159.54-145.55
Episode 13	 Score: -232.74-152.26
Episode 14	 Score: -108.84-149.16
Episode 15	 Score: -111.73-146.66
Episode 16	 Score: -119.38-144.96
Episode 17	 Score: -111.11-142.96
Episode 18	 Score: -121.00-141.74
Episode 19	 Score: -195.02-144.55
Episode 20	 Score: -163.12-145.48
Episode 21	 Score: -165.92-146.45
Episode 22	 Score: -172.48-147.63
Episode 23	 Score: -160.02-148.17
Episode 24	 Score: -657.08-169.38
Episode 25	 Score: -145.13-168.41
Episode 26	 Score: -163.43-168.22
Episode 27	 Score: -107.56-165.97
Episode 28	 Score: -169.51-166.10
Episode 29	 Score: -131.66-164.91
Episode 30	 Score: -117

When finished, you can close the environment.

In [12]:
env.close()