# Imports

In [1]:
# ! conda install -c conda-forge pytorch-model-summary 
# ! conda install -c conda-forge gym 
from torchsummary import summary
import gym
import torch.nn.functional as F
from A2C.a2c_agents import AgentA2C
from A2C.a2c_networks import Network1, Network2
from A2C.workers import Worker
import keyboard
import torch.utils.tensorboard


# Create environments and display environment properties

In [2]:
mc_discrete_name = 'MountainCar-v0'
mc_discrete = gym.make(mc_discrete_name)

mc_continuous_name = 'MountainCarContinuous-v0'
mc_continuous = gym.make(mc_continuous_name)

cp_discrete_name = 'CartPole-v1'
cp_discrete = gym.make(cp_discrete_name)

ll_discrete_name = 'LunarLander-v2'
ll_discrete = gym.make(ll_discrete_name)

ll_continuous_name = 'LunarLanderContinuous-v2'
ll_continuous = gym.make(ll_continuous_name)

envs = {
    "mcd": {
        'name': mc_discrete_name,
        'env': mc_discrete,
        'actions_count': mc_discrete.action_space.n,
        "state_dim": len(mc_discrete.reset())
    },
    "mcc": {
        "name": mc_continuous_name,
        "env": mc_continuous,
        "actions_count": len(mc_continuous.action_space.shape),
        "state_dim": len(mc_continuous.reset())
    },
    "cpd":  {
        "name": cp_discrete_name,
        "env": cp_discrete,
        "actions_count": cp_discrete.action_space.n,
        "state_dim": len(cp_discrete.reset())
    },
    "lld": {
        "name": ll_discrete_name,
        "env": ll_discrete,
        "actions_count": ll_discrete.action_space.n,
        "state_dim": len(ll_discrete.reset())
    },
    "llc": {
        "name": ll_continuous_name,
        "env": ll_continuous,
        "actions_count": len(ll_continuous.action_space.shape) + 1,
        "state_dim": len(ll_continuous.reset())
    }
}

for env in envs:
    print(envs[env]['name'], envs[env]
          ['actions_count'], envs[env]['state_dim'])


MountainCar-v0 3 2
MountainCarContinuous-v0 1 2
CartPole-v1 2 4
LunarLander-v2 4 8
LunarLanderContinuous-v2 2 8


# Shared parameters

In [3]:
EPISODES = 1000

# Initialization of A2C

In [4]:
A2C_NAME = "a2c_cpd_n2"
A2C_ID = 0
LEN_WORKERS = 20
ENV_STEPS = 15


a2c_network = Network2(state_dim=envs["cpd"]["state_dim"], actions_count=envs["cpd"]["actions_count"])
a2c_agent = AgentA2C(model_name=A2C_NAME, id=A2C_ID, model=a2c_network)

# Create workers and environments
workers = []
for id_w in range(LEN_WORKERS):
    env = gym.make(envs["cpd"]["name"])
    env.seed(id_w)
    w = Worker(id_w, env, a2c_agent, print_score=False)
    workers.append(w)
    
# Load model (if we trained with interruptions)
a2c_agent.load_model()
a2c_agent.load_progress()


Device:  cuda:0
Model not found
No progress found


# Training

In [5]:
a2c_agent.train(workers=workers, total_episodes=EPISODES, steps=ENV_STEPS)    
a2c_agent.save_progress()
a2c_agent.save_model()

Episodes:   0%|          | 0/1000 [00:00<?, ?it/s]

Saving model, best score is:  13.528571428571428
Saving model, best score is:  21.08
Saving model, best score is:  22.55
Saving model, best score is:  23.57


PermissionError: [Errno 13] Permission denied: 'progress/a2c_cpd_n2/0.json'

In [6]:
%reload_ext tensorboard
%tensorboard --logdir runs --host localhost --port 8888

Reusing TensorBoard on port 8888 (pid 13188), started 4:53:40 ago. (Use '!kill 13188' to kill it.)

# Evaluation

In [7]:
obs = envs["cpd"]["env"].reset()
while(True):
    envs["cpd"]["env"].render()
    actor_prob, _ = a2c_agent.act(obs)
    step_probs = F.softmax(actor_prob, dim=-1)
    step_actions = step_probs.multinomial(num_samples=1)
    obs, _, term, _ = envs["cpd"]["env"].step(step_actions.item())

    if term:
        obs = envs["cpd"]["env"].reset()

    if keyboard.is_pressed('Tab'):
        break

envs["cpd"]["env"].close()

RuntimeError: The expanded size of the tensor (512) must match the existing size (4) at non-singleton dimension 1.  Target sizes: [4, 512, 4].  Tensor sizes: [4, 1]