# Continuous Control


In [4]:
import numpy as np
import matplotlib.pyplot as plt
import torch

from unityagents import UnityEnvironment
from agent import Agent
from ddpg_learning import ddpg


In [5]:
# Initialize the Environment
env = UnityEnvironment(file_name="Reacher_20.app")

# Get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# Reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# Get the action size
action_size = brain.vector_action_space_size

# Get the state size
state_size = len(env_info.vector_observations[0])

num_agents = len(env_info.agents)


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [6]:
BUFFER_SIZE = int(1e6)  # replay buffer size
BATCH_SIZE = 256        # minibatch size
GAMMA = 0.99            # discount factor
TAU = 1e-3              # for soft update of target parameters
LR_ACTOR = 1e-4         # learning rate of the actor
LR_CRITIC = 1e-3        # learning rate of the critic
UPDATE_EVERY = 2

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

agent = Agent(state_size=state_size,
              action_size=action_size,
              num_agents=num_agents,
              buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE,
              gamma=GAMMA,
              tau=TAU,
              learning_rate_actor=LR_ACTOR,
              learning_rate_critic=LR_CRITIC,
              device=device,
              update_every=UPDATE_EVERY,
              random_seed=42)


In [None]:
AVERAGE_SCORE_SOLVED=30.0

scores, num_episodes_solved = ddpg(env=env,
                                   agent=agent,
                                   num_agents=num_agents,
                                   average_score_solved=AVERAGE_SCORE_SOLVED)


Episode 2	Average Score: 0.73	Score: 0.73

In [None]:
#skip learning every timestep
#change batch size (increase)
#change size of networks (smallest possible)
#change noise reduce sigma=0.2 | random noise for each set of actions
#add dropout | BatchNorm1d
#skip learning every timestep
#change LR

#comments
#add report, README

In [None]:
env.close()

In [None]:
#Network (256) (256, 256, 128)

#BUFFER_SIZE = int(1e6)  # replay buffer size
#BATCH_SIZE = 64        # minibatch size
#GAMMA = 0.99            # discount factor
#TAU = 1e-3              # for soft update of target parameters
#LR_ACTOR = 1e-4         # learning rate of the actor
#LR_CRITIC = 1e-3        # learning rate of the critic
#WEIGHT_DECAY = 0   # L2 weight decay

#ONE AGENT
#Episode 100	Average Score: 1.8539999585598708	Score: 1.92999995686113833
#Episode 200	Average Score: 4.243399905152619	Score: 7.33999983593821585
#Episode 300	Average Score: 6.629699852038175	Score: 8.46999981068074755
#Episode 323	Average Score: 7.2578998377732935	Score: 8.1299998182803445

#20 AGENTS
#Episode 100	Average Score: 6.294999859295785	Score: 12.4614997214637755
#Episode 101	Average Score: 6.44864985586144	Score: 15.647999650239944

In [None]:
#Network (128) (128, 64, 32)

#BUFFER_SIZE = int(1e6)  # replay buffer size
#BATCH_SIZE = 256        # minibatch size
#GAMMA = 0.99            # discount factor
#TAU = 1e-3              # for soft update of target parameters
#LR_ACTOR = 1e-4         # learning rate of the actor
#LR_CRITIC = 1e-3        # learning rate of the critic
#WEIGHT_DECAY = 0   # L2 weight decay

#Episode 100	Average Score: 12.235059726748619	Score: 25.371499432902784
#Episode 104	Average Score: 13.194844705072235	Score: 23.469499475415795

In [None]:
#Network (128) (128, 64, 32)

#BUFFER_SIZE = int(1e6)  # replay buffer size
#BATCH_SIZE = 256        # minibatch size
#GAMMA = 0.99            # discount factor
#TAU = 1e-3              # for soft update of target parameters
#LR_ACTOR = 1e-4         # learning rate of the actor
#LR_CRITIC = 1e-3        # learning rate of the critic
#WEIGHT_DECAY = 0   # L2 weight decay

#update_every=2
#different noise for each agent

#Episode 100	Average Score: 9.05	Score: 19.61
#Episode 101	Average Score: 9.24	Score: 19.74