# Imports

In [1]:
# ! conda install -c conda-forge gym 
import gym
from A2C.a2c_discrete import DiscreteA2C
from A2C.a2c_continuos import ContinuousA2C
from A2C.a2c_networks import A2CContinuousNetwork1 , A2CDiscreteNetwork1


# Create environments and display environment properties

In [2]:
mc_discrete_name = 'MountainCar-v0'
mc_discrete = gym.make(mc_discrete_name)

mc_continuous_name = 'MountainCarContinuous-v0'
mc_continuous = gym.make(mc_continuous_name)

cp_discrete_name = 'CartPole-v1'
cp_discrete = gym.make(cp_discrete_name)

ll_discrete_name = 'LunarLander-v2'
ll_discrete = gym.make(ll_discrete_name)

ll_continuous_name = 'LunarLanderContinuous-v2'
ll_continuous = gym.make(ll_continuous_name)

envs = {
    "mcd": {
        'name': mc_discrete_name,
        'env': mc_discrete,
        'actions_count': mc_discrete.action_space.n,
        "state_dim": len(mc_discrete.reset())
    },
    "mcc": {
        "name": mc_continuous_name,
        "env": mc_continuous,
        "actions_count": len(mc_continuous.action_space.shape),
        "state_dim": len(mc_continuous.reset())
    },
    "cpd":  {
        "name": cp_discrete_name,
        "env": cp_discrete,
        "actions_count": cp_discrete.action_space.n,
        "state_dim": len(cp_discrete.reset())
    },
    "lld": {
        "name": ll_discrete_name,
        "env": ll_discrete,
        "actions_count": ll_discrete.action_space.n,
        "state_dim": len(ll_discrete.reset())
    },
    "llc": {
        "name": ll_continuous_name,
        "env": ll_continuous,
        "actions_count": len(ll_continuous.action_space.shape) + 1,
        "state_dim": len(ll_continuous.reset())
    }
}

for env in envs:
    print(envs[env]['name'], envs[env]
          ['actions_count'], envs[env]['state_dim'])


MountainCar-v0 3 2
MountainCarContinuous-v0 1 2
CartPole-v1 2 4
LunarLander-v2 4 8
LunarLanderContinuous-v2 2 8


# Shared parameters

In [3]:
EPISODES = 5000

# Initialization of A2C

In [4]:
A2C_NAME = "a2c_benchmark"
A2C_ID = 0
NUM_ENVS = 20
ENV_STEPS = 10


a2c_network = A2CDiscreteNetwork1(state_dim=envs["lld"]["state_dim"], actions_count=envs["lld"]["actions_count"])
a2c_agent = DiscreteA2C(model_name=A2C_NAME, id=A2C_ID, model=a2c_network, lr=0.0001)

# Create environments
train_envs = []
for id_e in range(NUM_ENVS):
    env = gym.make(envs["lld"]["name"])
    env.seed(id_e)
    train_envs.append(env)
    
# Load model (if we trained with interruptions)
a2c_agent.load_model()
a2c_agent.load_progress()


Device:  cuda 
Tensor:  <class 'torch.cuda.FloatTensor'>
Model not found
No progress found


# Training

In [5]:
a2c_agent.train(envs=train_envs, total_episodes=EPISODES, steps=ENV_STEPS)    

Episodes:   0%|          | 0/5000 [00:00<?, ?it/s]

Saving model, best score is:  4.5842640557254795
Saving model, best score is:  7.8379401437326415
Saving model, best score is:  10.70561188688619
Saving model, best score is:  19.671288193483313
Saving model, best score is:  31.761136468688473
Saving model, best score is:  37.84549413280158
Saving model, best score is:  52.58604924618801
Saving model, best score is:  57.85871184451114
Saving model, best score is:  65.309704211724
Saving model, best score is:  70.62880467790833
Saving model, best score is:  72.87544707947643
Saving model, best score is:  84.65071208093792
Saving model, best score is:  105.1396254279201
Saving model, best score is:  126.67028908126098
Saving model, best score is:  132.00819561082957


In [6]:
%reload_ext tensorboard
%tensorboard --logdir runs --host localhost --port 9000