# 3D Ball

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch

from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper
import numpy as np

from agent import Agent
from ddpg_learning import ddpg

In [3]:
# Initialize the Environment
unity_env = UnityEnvironment(file_name="3Dball_visual.app")
env = UnityToGymWrapper(unity_env)

# Get the action size
action_size = 2

# Get the state size
state_shape = (84,84,12)

# Get number of agents
num_agents = 1


2021-04-09 19:46:09 INFO [environment.py:113] Connected to Unity environment with package version 2.0.0-exp.1 and communication version 1.5.0
2021-04-09 19:46:09 INFO [environment.py:282] Connected new brain:
Visual3DBall?team=0


In [4]:
#Initialize the Agent with given hyperparameters

BUFFER_SIZE = int(1e5)  # replay buffer size
BATCH_SIZE = 64       # batch size
GAMMA = 0.99            # discount factor
TAU = 5e-2              # for soft update of target parameters
LR_ACTOR = 5e-4         # learning rate of the actor
LR_CRITIC = 5e-3        # learning rate of the critic
UPDATE_EVERY = 20        # how often to update the network

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #Torch device to use

agent = Agent(state_shape=state_shape,
              action_size=action_size,
              num_agents=num_agents,
              buffer_size=BUFFER_SIZE,
              batch_size=BATCH_SIZE,
              gamma=GAMMA,
              tau=TAU,
              learning_rate_actor=LR_ACTOR,
              learning_rate_critic=LR_CRITIC,
              device=device,
              update_every=UPDATE_EVERY,
              random_seed=42)


In [None]:
# Train the agent

AVERAGE_SCORE_SOLVED=2000

scores, num_episodes_solved = ddpg(env=env,
                                   agent=agent,
                                   num_agents=num_agents,
                                   average_score_solved=AVERAGE_SCORE_SOLVED)


Episode 15	Average Score: 0.85	Score: 0.40

In [None]:
#Plot the training session (scores per episode averaged across all agents)

def plot_scores(scores):
    plt.plot(scores, color='royalblue')
    plt.title('Scores per episode')
    plt.ylabel('score')
    plt.xlabel('episode #')
    plt.show()
    
plot_scores(scores)


In [None]:
#See the trained agent in action.

env_info = env.reset(train_mode=False)[brain_name] # reset the environment
states = env_info.vector_observations              # get the current state

#Uncomment these lines to use a saved checkpoint:
#agent = Agent(state_size=state_size,
#              action_size=action_size,
#              num_agents=num_agents,
#              buffer_size=BUFFER_SIZE,
#              batch_size=BATCH_SIZE,
#              gamma=GAMMA,
#              tau=TAU,
#              learning_rate_actor=LR_ACTOR,
#              learning_rate_critic=LR_CRITIC,
#              device=device,
#              update_every=UPDATE_EVERY,
#              random_seed=42)
#agent.actor_local.load_state_dict(torch.load('solved_checkpoint_actor.pth'))

scores = np.zeros(num_agents) 
while True:
    actions = agent.act(states, add_noise=False)   # select an action
    env_info = env.step(actions)[brain_name]       # send the action to the environment
    next_states = env_info.vector_observations     # get the next state
    rewards = env_info.rewards                     # get the reward
    dones = env_info.local_done                    # see if episode has finished
    states = next_states                           # roll over the state to next time step
    scores += rewards                              # update the score
    if np.any(dones):                              # exit loop if episode finished
        break
    
print("Average Score: {}".format(np.mean(scores)))


In [5]:
env.close()

2021-04-08 18:22:08 INFO [environment.py:429] Environment shut down with return code 0.


In [None]:
#a = env._get_vis_obs_shape()
#obs = env.step(action)
#obs[0][:,:,3:6].shape
#show_image(obs[0][:,:,0:3])
#show_image(obs[0][:,:,3:6])
#show_image(obs[0][:,:,6:9])
#show_image(obs[0][:,:,9:12])

In [4]:
#See the trained agent in action.

for i in range(10):
    state = env.reset()
    scores = np.zeros(num_agents) 
    while True:
        actions = np.clip(np.random.normal(size=(2,)), -1, 1)
        next_state, reward, done, _ = env.step(actions)       # send the action to the environment
        scores += reward                              # update the score
        if done:                              # exit loop if episode finished
            break
    
    print("Average Score: {}".format(np.mean(scores)))


Average Score: 0.30000001937150955
Average Score: 1.3000000342726707
Average Score: 0.40000002086162567
Average Score: 0.20000001788139343
Average Score: 0.9000000283122063
Average Score: 0.40000002086162567
Average Score: 0.700000025331974
Average Score: 1.600000038743019
Average Score: 0.6000000238418579
Average Score: 0.5000000223517418


In [None]:

def show_image(img):
    def scale_lumininance(img):
        return np.dot(img[...,:3], [0.299, 0.587, 0.114])

    def normalize(img):
        return img / 255

    img = scale_lumininance(img)
    img = normalize(img)

    plt.figure(figsize=(12,8))
    plt.imshow(img, cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.show()

