# Imports

In [1]:
# ! conda install -c conda-forge gym 
import gym
from A2C.a2c_discrete import DiscreteA2C
from A2C.a2c_continuos import ContinuousA2C
from A2C.a2c_networks import A2CDB, A2CCB, A2CDRBF1NA, A2CDRBF1A, A2CCRBF1NA, A2CCRBF1A, A2CCRBF, A2CDRBF


# Create environments

In [2]:
mc_discrete_name = 'MountainCar-v0'
mc_discrete = gym.make(mc_discrete_name)

mc_continuous_name = 'MountainCarContinuous-v0'
mc_continuous = gym.make(mc_continuous_name)

cp_discrete_name = 'CartPole-v1'
cp_discrete = gym.make(cp_discrete_name)

ll_discrete_name = 'LunarLander-v2'
ll_discrete = gym.make(ll_discrete_name)

ll_continuous_name = 'LunarLanderContinuous-v2'
ll_continuous = gym.make(ll_continuous_name)

envs_discrete = [
    {
        'name': mc_discrete_name,
        'actions_count': mc_discrete.action_space.n,
        "state_dim": len(mc_discrete.reset())
    },
    {
        "name": cp_discrete_name,
        "actions_count": cp_discrete.action_space.n,
        "state_dim": len(cp_discrete.reset())
    },
    {
        "name": ll_discrete_name,
        "actions_count": ll_discrete.action_space.n,
        "state_dim": len(ll_discrete.reset())
    }
]

envs_continuous = [
    {
        "name": mc_continuous_name,
        "actions_count": len(mc_continuous.action_space.shape),
        "state_dim": len(mc_continuous.reset())
    },
    {
        "name": ll_continuous_name,
        "actions_count": len(ll_continuous.action_space.shape) + 1,
        "state_dim": len(ll_continuous.reset())
    }
]


# Define tests

In [3]:
test_params = {
    "episodes": 5000,
    "number_of_parallel_envs": 20,
    "steps_in_env": 10,
    "learning_rate": 0.0001,
    "number_of_models": 2
 }

tests = [
    # *[{
    #     "name": "Benchmark Discrete A2C " + "(" + envs_discrete[i]["name"] + ")",
    #     "env": envs_discrete[i],
    #     "network": A2CDB(envs_discrete[i]["state_dim"], envs_discrete[i]["actions_count"]),
    #     "discrete": True
    # } for i in range(len(envs_discrete))],
    # *[{
    #     "name": "Discrete A2C with 1 RBF without activation function" + "(" + envs_discrete[i]["name"] + ")",
    #     "env": envs_discrete[i],
    #     "network": A2CDRBF1NA(envs_discrete[i]["state_dim"], envs_discrete[i]["actions_count"]),
    #     "discrete": True
    # } for i in range(len(envs_discrete))],
    # *[{
    #     "name": "Discrete A2C with 1 RBF with activation function" + "(" + envs_discrete[i]["name"] + ")",
    #     "env": envs_discrete[i],
    #     "network": A2CDRBF1A(envs_discrete[i]["state_dim"], envs_discrete[i]["actions_count"]),
    #     "discrete": True
    # } for i in range(len(envs_discrete))],

    # *[{
    #     "name": "Benchmark Continuous A2C " + "(" + envs_continuous[i]["name"] + ")",
    #     "env": envs_continuous[i],
    #     "network": A2CCB(envs_continuous[i]["state_dim"], envs_continuous[i]["actions_count"]),
    #     "discrete": False
    # } for i in range(len(envs_continuous))],
    # *[{
    #     "name": "Continuous A2C with 1 RBF without activation function" + "(" + envs_continuous[i]["name"] + ")",
    #     "env": envs_continuous[i],
    #     "network": A2CCRBF1NA(envs_continuous[i]["state_dim"], envs_continuous[i]["actions_count"]),
    #     "discrete": False
    # } for i in range(len(envs_continuous))],
    # *[{
    #     "name": "Continuous A2C with 1 RBF with activation function" + "(" + envs_continuous[i]["name"] + ")",
    #     "env": envs_continuous[i],
    #     "network": A2CCRBF1A(envs_continuous[i]["state_dim"], envs_continuous[i]["actions_count"]),
    #     "discrete": False
    # } for i in range(len(envs_continuous))],
    
    # *[{
    #     "name": "Discrete A2C with small and pure RBF network" + "(" + envs_discrete[i]["name"] + ")",
    #     "env": envs_discrete[i],
    #     "network": A2CDRBF(envs_discrete[i]["state_dim"], envs_discrete[i]["actions_count"]),
    #     "discrete": True
    # } for i in range(len(envs_discrete))],
    # *[{
    #     "name": "Continuous A2C with small and pure RBF network" + "(" + envs_continuous[i]["name"] + ")",
    #     "env": envs_continuous[i],
    #     "network": A2CCRBF(envs_continuous[i]["state_dim"], envs_continuous[i]["actions_count"]),
    #     "discrete": False
    # } for i in range(len(envs_continuous))],

    *[{
        "name": "Continuous A2C with small and Linear network" + "(" + envs_continuous[0]["name"] + ")",
        "env": envs_continuous[0],
        "network": A2CCB(envs_continuous[0]["state_dim"], envs_continuous[0]["actions_count"], shared_layers=2),
        "discrete": False
    } for i in range(1)], #len(envs_continuous))],
]


# A2C training

In [4]:
%reload_ext tensorboard
%tensorboard --logdir runs --host localhost --port 17000

In [5]:
for test in tests:
    for version in range(test_params["number_of_models"]):
        agent = None
        if test["discrete"]:
            agent = DiscreteA2C(model_name=test["name"], id=version, model=test["network"], lr=test_params["learning_rate"])
        else:
            agent = ContinuousA2C(model_name=test["name"], id=version, model=test["network"], lr=test_params["learning_rate"])
        
        # Create environments
        train_envs = []
        for id_e in range(test_params["number_of_parallel_envs"]):
            env = gym.make(test["env"]["name"])
            env.seed(id_e)
            train_envs.append(env)
    
        print("Training: " + test["name"] + " | version: " + str(version))
        agent.train(envs=train_envs, total_episodes=test_params["episodes"], steps=test_params["steps_in_env"])


Device:  cuda 
Tensor:  <class 'torch.cuda.FloatTensor'>
Training: Continuous A2C with small and Linear network(MountainCarContinuous-v0) | version: 0


Episodes:   0%|          | 0/5000 [00:00<?, ?it/s]

Saving model, best score is:  -47.61904761904762
Saving model, best score is:  -40.35952805677625
Saving model, best score is:  -30.420474134130238
Saving model, best score is:  -23.432939932726846
Saving model, best score is:  -9.088141914824023
Saving model, best score is:  -2.647852122498813
Saving model, best score is:  -0.6803567085871174
Saving model, best score is:  -0.29429977922044154
Saving model, best score is:  -0.21736473427664688
Saving model, best score is:  -0.1932078273476366
Saving model, best score is:  -0.17787818987291473
Saving model, best score is:  -0.16011330952949276
Saving model, best score is:  -0.15109215133037066
Saving model, best score is:  -0.14422605039158226
Saving model, best score is:  -0.13968603024363277
Saving model, best score is:  -0.13909775370758654
Saving model, best score is:  -0.13668548971543343
Saving model, best score is:  -0.12654734509175425
Saving model, best score is:  -0.12539994163203158
Saving model, best score is:  -0.1221971378

Episodes:   0%|          | 0/5000 [00:00<?, ?it/s]

Saving model, best score is:  -47.61904761904762
Saving model, best score is:  -24.445693812772454
Saving model, best score is:  -16.475691995588257
Saving model, best score is:  -12.442378115320306
Saving model, best score is:  -0.10310640042348615


KeyboardInterrupt: 