<a href="https://colab.research.google.com/github/mirklys/little-projects/blob/main/thesis/training_full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Sun Apr 24 08:22:47 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    12W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip3 install Box2D
!pip3 install box2d-py
!pip3 install gym[all]
!pip3 install gym[Box_2D]

Collecting Box2D
  Downloading Box2D-2.3.10-cp37-cp37m-manylinux1_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 32.4 MB/s 
[?25hInstalling collected packages: Box2D
Successfully installed Box2D-2.3.10
Collecting box2d-py
  Downloading box2d_py-2.3.8-cp37-cp37m-manylinux1_x86_64.whl (448 kB)
[K     |████████████████████████████████| 448 kB 31.4 MB/s 
[?25hInstalling collected packages: box2d-py
Successfully installed box2d-py-2.3.8
Collecting mujoco-py<2.0,>=1.50
  Downloading mujoco-py-1.50.1.68.tar.gz (120 kB)
[K     |████████████████████████████████| 120 kB 33.1 MB/s 
Collecting glfw>=1.4.0
  Downloading glfw-2.5.3-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38-none-manylinux2014_x86_64.whl (206 kB)
[K     |████████████████████████████████| 206 kB 59.3 MB/s 
Collecting lockfile>=0.12.2
  Downloading lockfile-0.12.2-py2.py3-none-any.whl (13 kB)
Building wheels for collected packages: mujoco-py
  Building wheel for mujoco-py (setup.py) ... [?25l

In [4]:
!pip install stable_baselines
!pip install stable_baselines3

In [5]:
!pip install tensorflow

In [6]:
import gym
import os
import numpy as np
import torch as th
from torch import nn
from torch.distributions.bernoulli import Bernoulli
import matplotlib.pyplot as plt
from collections import defaultdict
import time

from stable_baselines3 import PPO, A2C, SAC
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.utils import get_device
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines.common import set_global_seeds, make_vec_env
from stable_baselines3.common.monitor import Monitor

In [7]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [8]:
PATH_BASE = '/content/gdrive/MyDrive/Thesis Project'
PATH_DATA = os.path.join(PATH_BASE, 'data/')
PATH_NETWORKS = os.path.join(PATH_BASE, 'networks/')
PATH_PLOTS = os.path.join(PATH_BASE, 'plots/')
PATH_RESULTS = os.path.join(PATH_BASE, 'results/')
PATH_LOGS = os.path.join(PATH_BASE, 'logs/')
os.chdir(PATH_BASE)

In [9]:
dev = th.device('cuda' if th.cuda.is_available() else 'cpu')
dev

device(type='cuda')

In [10]:
class MaskedMLP(BaseFeaturesExtractor):
    def __init__(self, observation_space, dropout_param=0.0, percent_to_mask=0.0, size=32, job='train'):
        assert job == 'train' or job == 'train_masked',\
            "This class can only be initialized for jobs: train, train_masked"

        self.l1_size = size
        self.l2_size = size
        self.job = job
        super(MaskedMLP, self).__init__(observation_space, self.l2_size)

        self.dropout_param = dropout_param
        self.percent_to_mask = percent_to_mask

        input_size = observation_space.shape[0]

        self.linear1 = nn.Linear(input_size, self.l1_size)
        self.linear2 = nn.Linear(self.l1_size, self.l2_size)
        self.elu = nn.ELU()
        self.dropout = nn.Dropout(p=self.dropout_param)

        self.mask_units(self.percent_to_mask)

    def mask_units(self, percent_to_mask):
        self.mask_distribution = Bernoulli(th.tensor([1.0-percent_to_mask]*self.l2_size))   
        self.mask = self.mask_distribution.sample()

    def forward(self, observations):
        x = self.linear1(observations)
        x = self.elu(x)
        x = self.linear2(x)
        if self.job == 'train': x = self.dropout(x)
        l2 = self.elu(x)

        if not self.training or self.job == 'train_masked':
            self.mask = self.mask.to(l2.device)
            l2 = l2*self.mask


        return l2


In [None]:
"""
Training LunarLander walker
"""
#game = 'LunarLander-v2'
game = 'Pendulum-v1'
max_rew = 500
with open(os.path.join(PATH_LOGS, "{}.txt".format(game)), "w") as f:
    f.write("Started training {} models for different number of steps \n".format(game))
    print("Started training {} models for different number of steps \n".format(game))
    for size in [128, 256, 512, 1024]:
        for dropout in np.arange(0, 1, 0.1):
            num_training_steps = int(3e5)
            rew = 0
            policy_kwargs = dict(
                features_extractor_class=MaskedMLP,
                features_extractor_kwargs=dict(dropout_param=dropout, size=size, job='train')
            )
            env = make_vec_env(game, n_envs=10, seed=0, vec_env_cls=DummyVecEnv)
            model = SAC('MlpPolicy', env, verbose=0,
                        policy_kwargs = policy_kwargs, device=dev)
            d = get_device()
            print("device", d)
            f.write("Training {} {} model for {} steps \n".format(dropout, size, num_training_steps))
            print("Training {} {} model for {} steps \n".format(dropout, size, num_training_steps))
            t = time.process_time()
            while rew < max_rew:
                model.learn(num_training_steps)
                model_save_title = "{}.{}x{}.dropout_{}".format(game, model.policy.features_extractor.l1_size, model.policy.features_extractor.l2_size, dropout)
                model.save(os.path.join(PATH_NETWORKS, game, model_save_title))
                f.write("we saved it nevertheless")
                print("we saved it nevertheless")
                rew, std = evaluate_policy(model, env, n_eval_episodes=int(1e2))
                f.write("we reached {} +-{} reward score".format(rew, std))
                print("we reached {} +-{} reward score".format(rew, std))
                if rew < max_rew*0.9 and std/rew < 0.1:
                    num_training_steps = int(1e5)
                    f.write("we need additional {} steps to try to reach around 300 cumulative reward score\n".format(num_training_steps))
                    print("we need additional {} steps to try to reach around 300 cumulative reward score\n".format(num_training_steps))
                    
            f.write("it took {} min to train the {}x{} model with {}% dropout\n".format(round((time.process_time() - t)/60, 2), size, size, dropout*100))
            print("it took {} min to train the {}x{} model with {}% dropout\n".format(round((time.process_time() - t)/60, 2), size, size, dropout*100))
            model_save_title = "{}.{}x{}.dropout_{}".format(game, model.policy.features_extractor.l1_size, model.policy.features_extractor.l2_size, dropout)
            model.save(os.path.join(PATH_NETWORKS, game, model_save_title))

Started training Pendulum-v1 models for different number of steps 

device cuda
Training 0.0 128 model for 300000 steps 

