<a href="https://colab.research.google.com/github/mirklys/little-projects/blob/main/thesis/testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install Box2D
!pip3 install box2d-py
!pip3 install gym[all]
!pip3 install gym[Box_2D]

In [None]:
!pip install stable_baselines
!pip install stable_baselines3

In [None]:
!pip install tensorflow

In [None]:
import gym
import os
import numpy as np
import torch as th
from torch import nn
from torch.distributions.bernoulli import Bernoulli
import matplotlib.pyplot as plt
from collections import defaultdict
import time

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.utils import get_device
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines.common import set_global_seeds, make_vec_env
from stable_baselines3.common.monitor import Monitor

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
device = th.device('cuda' if th.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
PATH_BASE = '/content/gdrive/MyDrive/Thesis Project'
PATH_DATA = os.path.join(PATH_BASE, 'data/')
PATH_NETWORKS = os.path.join(PATH_BASE, 'networks/')
PATH_PLOTS = os.path.join(PATH_BASE, 'plots/')
PATH_RESULTS = os.path.join(PATH_BASE, 'results/')
PATH_LOGS = os.path.join(PATH_BASE, 'logs/')
os.chdir(PATH_BASE)

In [None]:
def test_network(game:str, size:int, dropout:float) -> str:
    title = "{}.{}x{}.dropout_{}".format(game, size, size, dropout)
    env = make_vec_env(game, n_envs=10, seed=0, vec_env_cls=DummyVecEnv)
    model = PPO.load(os.path.join(PATH_NETWORKS, game, title))
    rew, std = evaluate_policy(model, env, n_eval_episodes=10)

    return "Networks cumulative reward {:.2f} ±{:.2f}".format(rew, std)

In [None]:
test_network('CartPole-v1', 128, 0.8)



'Networks cumulative reward 500.00 ±0.00'

In [None]:
def parse_title(title: str) -> tuple:
    splitted = title.split(".")
    game = splitted[0]
    size = splitted[1].split("x")[0]
    dropout = float(title.split("_")[1])
    return game, size, dropout

In [None]:
def eval_with_mask(model, percent_to_mask, env) -> tuple:
    rewards = []
    for i in range(20): # repeat over multiple random masks
        model.policy.features_extractor.mask_units(percent_to_mask=percent_to_mask)
        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
        rewards.append(mean_reward)
    return np.round(np.mean(rewards), 3), np.round(np.std(rewards), 3)

In [None]:
import time
import warnings
import pickle
warnings.filterwarnings("ignore")
num_training_steps = 10
overtrained_evals = []
inc = 0.05
environment = 'LunarLander-v2'
nets = os.listdir(os.path.join(PATH_NETWORKS, environment)) #retrieved all the network names of an environment
evals = []
env = make_vec_env(environment, n_envs=10, seed=0, vec_env_cls=DummyVecEnv) # created the environment
max_rew = 180 # i reduced the amount of max_reward because I did not want to be stuck trying reach 200, so I was fine with 180 too.
for net in nets:
    game, size, dropout = parse_title(net)
    print(size, dropout)
    model = PPO.load(os.path.join(PATH_NETWORKS, game, net), device=device)
    model.set_env(env)
    # i was apprehensive to mask and retrain the same network, it coudl have affected the initial testing
    model_msk = PPO.load(os.path.join(PATH_NETWORKS, game, net), device=device)
    model_msk.set_env(env)
    model.policy.features_extractor.training = False # disabling training
    print("Masking size: ")
    name = 'f'
    mean_rews, std_rews = [], []
    for mask in np.arange(0., 1, inc):
        name = net
        print(mask, end=" ")
        eval = eval_with_mask(model, percent_to_mask=float(mask), env=env) # inital testing
        print(eval, end='\n')
        mean_rews.append(eval[0])
        std_rews.append(eval[1])
        rew = eval[0]
        std = eval[1]
        if rew < max_rew:
            print("we need some more training for it!")
            name = net
            model_msk.policy.features_extractor.job = 'train_masked' # enabling masked_training, does not pass through the dropout layer
            model_msk.policy.features_extractor.percent_to_mask = float(mask)
            model_msk.policy.features_extractor.mask_units(percent_to_mask=float(mask))

            total_further_train_steps = 0

            print(name, mask)
            print("steps: ", total_further_train_steps, "rew: ", rew, "+-", std)
            while rew < max_rew:
                model_msk.learn(num_training_steps)
                rew, std = evaluate_policy(model_msk, env, n_eval_episodes=10)
                total_further_train_steps += num_training_steps
                print("steps: ", total_further_train_steps, ". rew: ", rew, "+- ", std)
                if np.abs(std/rew) < 0.1 and rew < 30: break
                elif np.abs(std/rew) < .03 and rew > max_rew: break
            
            name += '.mask_{}.overtrained'.format(mask)
            model_msk.save(os.path.join(PATH_NETWORKS, game, 'further_trained', name))
            print('saved freshly further trained model \n')
            overtrained_evals.append([name, rew, std, total_further_train_steps])
            # I saved my results every iteration 
            # because google colab stops running after around 6 hours if I don't do anything there,
            # so I continued from where it stopped, saw it in outputs
            with open(os.path.join(PATH_RESULTS, "overtrained_evals_LunarLander-v2.pickle"), "wb") as f:
                pickle.dump(overtrained_evals, f)
                print("we have overwritten overtrained_evals_LunarLander-v2.pickle")
    print('\n')
    
    evals.append([game, size, dropout, mean_rews, std_rews])
    with open(os.path.join(PATH_RESULTS, "evals_LunarLander-v2.pickle"), "wb") as f:
        pickle.dump(evals, f)
        print("we have overwritten evals_LunarLander-v2.pickle")
