In [None]:
pip install scipy==1.6.3
pip install stable-baselines3[extra]
apt-get install -y xvfb python-opengl > /dev/null 2>&1
pip install gym pyvirtualdisplay > /dev/null 2>&1

pip install -U kora
pip install tensorboard
pip install seaborn

In [None]:
from torch import nn as nn
from stable_baselines3 import PPO, A2C, SAC, DQN, DDPG
import gym
import csv
import json
import os
import time
from glob import glob
from typing import List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display #as ipythondisplay
from pyvirtualdisplay import Display
import tensorflow as tf
from google.colab import files
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.type_aliases import GymStepReturn
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.callbacks import BaseCallback
import torch as th
import seaborn as sb
import itertools
from itertools import product
from matplotlib import colors
import scipy
from sklearn.preprocessing import StandardScaler
import matplotlib.gridspec as GridSpec
from statsmodels.tsa.stattools import adfuller
from scipy import signal
from scipy import stats

In [None]:
__all__ = ["Monitor", "get_monitor_files", "load_results"]

def create_model(env: gym.Env, algo: str, hyperparams:dict):
        return {"ppo": PPO,
                "a2c": A2C,
                "dqn": DQN,
                "sac": SAC,
                "ddpg": DDPG}[algo](policy='MlpPolicy', env=env, **hyperparams)

#%%

def load_model(algo: str, net_file:str):
    return {"ppo": PPO, "a2c":A2C, "dqn":DQN, "sac":SAC, "ddpg":DDPG}[algo].load(net_file)

#%%

def load_hyperparams(env_id:str, algo:str, json_file:str):
    return get_hyperparams(env_id=env_id, algo=algo, hyperparams=load_dict(filename=json_file))

#%%


def get_hyperparams(env_id:str, algo:str, hyperparams:dict)->dict:
    #policy_kwargs=dict
    policy_kwargs = get_extractor_hyperparams(env_id=env_id, hyperparams=hyperparams)
    is_actor_critic=(algo=='ppo' or algo=='a2c')
    if 'n_neurons' in hyperparams.keys() and 'n_layers' in hyperparams.keys():
        if is_actor_critic:
            net_arch = [dict(pi=list([int(hyperparams['n_neurons'])] * int(hyperparams['n_layers'])),
                             vf=list([int(hyperparams['n_neurons'])] * int(hyperparams['n_layers'])))]
        else:
            net_arch = list([int(hyperparams['n_neurons'])] * int(hyperparams['n_layers']))
        policy_kwargs['net_arch']=net_arch
        del hyperparams['n_neurons'], hyperparams['n_layers']
    if 'activation_fn' in hyperparams.keys():
        policy_kwargs['activation_fn'] = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}[hyperparams['activation_fn']]
        del hyperparams['activation_fn']
    if is_actor_critic:
        policy_kwargs['ortho_init'] = False
    kwargs = dict(policy_kwargs=policy_kwargs)

    if 'action_noise' in hyperparams.keys():
        n_actions=0
        env = gym.make(env_id)
        if env.action_space.shape != ():
            n_actions = env.action_space.shape[-1]
        env.close()
        del env
        if hyperparams['action_noise'] == "normal":
            hyperparams["action_noise"] = NormalActionNoise(mean=np.zeros(n_actions), sigma=hyperparams['noise_std'] * np.ones(n_actions))
        elif hyperparams['action_noise'] == "ornstein-uhlenbeck":
            hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=hyperparams['noise_std'] * np.ones(n_actions))
        del hyperparams['action_noise'], hyperparams["noise_std"]
    for h in hyperparams.keys():
        kwargs[h] = hyperparams[h]
    return kwargs



def load_dict(filename:str)->dict:
    data={}
    with open(filename) as json_file:
        data = json.load(json_file)
    return data

#%%

def get_extractor_hyperparams(env_id:str, hyperparams:dict)->dict:
    policy_kwargs={}
    if 'order' in hyperparams.keys() or 'n_tilings' in hyperparams.keys() or 'normalize' in hyperparams.keys() or 'octa' in hyperparams.keys() or 'fourier_nn' in hyperparams.keys():
        features_extractor_kwargs = dict(env_id=env_id, device=get_device())
        if 'normalize' in hyperparams.keys():
            del hyperparams['normalize']
        if 'unbounded_normalizer' in hyperparams.keys():
            features_extractor_kwargs['unbounded_normalizer']={"tanh": Unbounded_Normalizer.tanh, "arctan": Unbounded_Normalizer.arctan}[hyperparams['unbounded_normalizer']]
            del hyperparams['unbounded_normalizer']
        if 'unbounded_normalizer_scale' in hyperparams.keys():
            features_extractor_kwargs['unbounded_normalizer_scale'] = hyperparams['unbounded_normalizer_scale']
            del hyperparams['unbounded_normalizer_scale']
        if 'order' in hyperparams.keys():
            features_extractor_kwargs['order']=hyperparams['order']
            del hyperparams['order']
            for env in UNBOUNDED_NORMALIZER_DEFAULT.keys():
                if env_id.rfind(env) != -1:
                    features_extractor_kwargs.update(dict(unbounded_normalizer=Unbounded_Normalizer.arctan,
                                                          unbounded_normalizer_scale=UNBOUNDED_NORMALIZER_DEFAULT[env]))
            if 'gaussian_std' in hyperparams.keys():
                features_extractor_kwargs['gaussian_std'] = hyperparams['gaussian_std']
                del hyperparams['gaussian_std']
                policy_kwargs = dict(features_extractor_class=Random_Fourier_Feature_Extractor, features_extractor_kwargs=features_extractor_kwargs)
            elif 'poly' in hyperparams.keys():
                del hyperparams['poly']
                policy_kwargs = dict(features_extractor_class=Poly_Basis_Extractor, features_extractor_kwargs=features_extractor_kwargs)
            elif 'chebyshev' in hyperparams.keys():
                del hyperparams['chebyshev']
                policy_kwargs = dict(features_extractor_class=Chebyshev_Basis_Extractor, features_extractor_kwargs=features_extractor_kwargs)
            else:
                policy_kwargs = dict(features_extractor_class=Fourier_Basis_Extractor, features_extractor_kwargs=features_extractor_kwargs)

        elif 'n_tilings' in hyperparams.keys():
            features_extractor_kwargs['n_tilings'] = hyperparams['n_tilings']
            features_extractor_kwargs['n_tiles_dim'] = hyperparams['n_tiles_dim']
            del hyperparams['n_tilings'], hyperparams['n_tiles_dim']
            policy_kwargs = dict(features_extractor_class=Tile_Coding_Extractor, features_extractor_kwargs=features_extractor_kwargs)
        elif 'octa' in hyperparams.keys():
            del hyperparams['octa']
            policy_kwargs = dict(features_extractor_class=Fourier_Octagonal_Extractor, features_extractor_kwargs=features_extractor_kwargs)

        elif 'fourier_nn' in hyperparams.keys():
            del hyperparams['fourier_nn']
            policy_kwargs = dict(features_extractor_class=Fourier_NN_Extractor, features_extractor_kwargs=features_extractor_kwargs)
        else:
            policy_kwargs = dict(features_extractor_class=Normalizer_Extractor, features_extractor_kwargs=features_extractor_kwargs)
    return policy_kwargs

#################### POUR INTRODUIRE DES BRUITS GAUSSIENS PENDANT L'ENTRAINEMENT###########################################
GAUSSIAN_NOISE_STD=dict(MountainCar=np.array([0.018,0.0014])) #CartPole=np.array([0.0025, 0.01, 0.013, 0.01]),

class Gaussian_Noise:
    def __init__(self, env_id:str, device:th.device=th.device("cuda:0")):
        env = gym.make(env_id)
        self._n_dim = len(env.observation_space.low)
        env.close()
        del env
        self._device = device
        self._mean, self._std = np.zeros((self._n_dim)), np.ones((self._n_dim))
        for env in GAUSSIAN_NOISE_STD.keys():
            if env_id.rfind(env) != -1:
                self._std = GAUSSIAN_NOISE_STD[env]

        self._mean, self._std = th.tensor(self._mean,device=self._device,dtype=th.float32),th.tensor(self._std,device=self._device,dtype=th.float32)

    def get_feature(self, obs) -> th.Tensor:  #ajout d'un bruit tiré d'une loi normale de moyenne 0 et std le vecteur gaussian_noise_std
        return th.normal(mean=self._mean, std=self._std)+obs

    @property
    def n_dim(self):
        return self._n_dim


class Gaussian_Noise_Extractor(BaseFeaturesExtractor):

    def __init__(self, observation_space:gym.Space, env_id:str, device:th.device=th.device("cuda:0")):
        self._gaussian_noise = Gaussian_Noise(env_id=env_id, device=device)
        super().__init__(observation_space, self._gaussian_noise.n_dim)
        self._flatten = nn.Flatten()

    def forward(self, obs: th.Tensor) -> th.Tensor:
        return self._flatten(self._gaussian_noise.get_feature(obs))


############## POUR LE SAUVEGARDE DES réseaux de neurone #########################
class Save_Net_Callback(BaseCallback):

    def __init__(self, log_dir:str, check_freq:int, verbose=0):
        super(Save_Net_Callback, self).__init__(verbose)
        self._check_freq = check_freq
        self._log_dir=log_dir
        #create_directory(path=self._log_dir)


    def _on_step(self) -> bool:
        if self.n_calls % self._check_freq == 0:
            self.model.save(self._log_dir+str(self.n_calls))
        return True

class Save_Net_Callback_By_Episode(BaseCallback):

    def __init__(self, log_dir:str, verbose=0):
        super(Save_Net_Callback_By_Episode, self).__init__(verbose)
        self._log_dir=log_dir
        self.count=1
        #create_directory(path=self._log_dir)


    def _on_step(self) -> bool:
        done_array = np.array(self.locals.get("done") if self.locals.get("done") is not None else self.locals.get("dones"))
        fin_episode = np.sum(done_array).item() #fin_episode=1 si l'episode est fini sinon on aura 0. Ce vecteur reprend de vide 
                                                #à chaque début d'episode

        #if(fin_episode>=self.count): #pour éviter == et problème de précision #https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/callbacks.html#BaseCallback
        if fin_episode>=1:
          self.model.save(self._log_dir+str(self.count))
          self.count+=1
        return True


In [None]:
#fonction qui fait les entrainements (selon des hyperparamètres entrés) et sauvegarde les NN dans un fichier .zip
#on sauvegarde le dernier modèle de chaque entrainement
#env_id: nom du jeu
#algo:algorithme (dqn,ppo,a2c..)
#json_file:nom du fichier (input) des hyperparamètres
#nb_of_trainings: nombre d'entrainements à effectuer
#n_timesteps: nombre de pas de temps
#noise: variable booléenne (true si on introduit un bruit gaussien aux entrainements, false sinon)

def Train_And_SaveNN_hyp(env_id:str, algo:str, json_file:str, nb_of_trainings:int, n_timesteps:int, noise:bool):

    list_of_trainings=[] #liste vide où on va sauvegarder chacun des entrainements 
    hyperparam=load_hyperparams(env_id, algo, json_file)
    if noise:
        policy_kwargs = dict(
        features_extractor_class=Gaussian_Noise_Extractor,
        features_extractor_kwargs=dict(env_id=env_id),
        )
        hyperparam['policy_kwargs']={**hyperparam['policy_kwargs'], **policy_kwargs }
    for i in range(1,nb_of_trainings+1):
      log_dir="log"+str(i)+"/" #pour avoir un log de chq entrainement
      os.makedirs(log_dir, exist_ok=True)
      env=Monitor(gym.make(env_id), log_dir, allow_early_resets=True)


      model=create_model(env,algo,hyperparam)
      callback = Save_Net_Callback_By_Episode(log_dir=log_dir,  verbose=0)

      model.learn(total_timesteps=n_timesteps, callback=callback)

      #récupération des rewards:
      training_rewards = env.get_episode_rewards()[1:] #supression de la première episode #rewards à la première episode dépendent de l'initialisation(semble être parfait)
      #concatenation des rewards
      list_of_trainings.append(training_rewards)

    # transformer en matrice (même nombre de colonnes)
    #trouver taille min des listes dans newlist
    t_min=min([len(x) for x in list_of_trainings])
    #garder uniquement t valeurs pour chaque entrainement
    matrice_rewards=[x[:t_min] for x in list_of_trainings]

    #on obtient une matrice des rewards qui comporte en ligne: chaque entrainement, en colonne: les timesteps

    #sauvegarder dans un fichier les récompenses des entrainements
    np.savetxt(env_id+"_"+algo+".txt",matrice_rewards)
    files.download(env_id+"_"+algo+".txt")



  
  

In [None]:
#Pour pouvoir appliquer les fonctions qui génèrent les rollouts, il faut y avoir les logs de l'entraînement
def get_rollouts(path:str, session_id:int, env_id:str, algo:str, net:str, n_rollouts:int, init_states_file:str, verbose:bool=False)->None:
    init_states=np.loadtxt(init_states_file)
    env = gym.make(env_id)
    model = load_model(algo, net)
    rollout_i = []
    for i in range(n_rollouts):
        done = False
        env.reset()
        env._elapsed_steps=0
        obs=init_states[i]
        env.unwrapped.state=obs
        cumul_reward=0.0
        
        while not (done):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            cumul_reward+=reward
            
        rollout_i.append(cumul_reward)
        if verbose:
            print("\rRun {}/{}".format(i + 1, n_rollouts), end="")
    if verbose:
        print("")
    
    #sauvegarder le vecteur contenant les récompenses
    np.savetxt(path,rollout_i)
    env.close()
    del model, env

In [None]:
#Introduction du bruit gaussien aux rollouts: var_position=1.8 et var_velocity=0.14
def get_noisy_rollouts(path:str, session_id:int, env_id:str, algo:str, net:str, n_rollouts:int, init_states_file:str, var_position:int,  var_velocity:int, var_percent:int, verbose:bool=False)->None:
    init_states=np.loadtxt(init_states_file)
    env = gym.make(env_id)
    _n_dim = len(env.observation_space.low)
    model = load_model(algo, net)
    rollout_i = []
    for i in range(n_rollouts):
        done = False
        env.reset()
        env._elapsed_steps=0
        obs=init_states[i]
        mu_noise, sigma_noise = np.zeros(_n_dim), np.ones(_n_dim)
        sigma_noise= sigma_noise*[var_position*var_percent,var_velocity*var_percent]
        obs=obs+np.random.normal(mu_noise, sigma_noise)
        env.unwrapped.state=obs
        cumul_reward=0.0

        while not (done):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            obs=obs+np.random.normal(mu_noise, sigma_noise)
            cumul_reward+=reward
            
        rollout_i.append(cumul_reward)
        if verbose:
            print("\rRun {}/{}".format(i + 1, n_rollouts), end="")
    if verbose:
        print("")
    np.savetxt(path,rollout_i)
    env.close()
    del model, env

In [None]:
def rollouts_on_fixed_states(env_id:str, session_id:int, algo:str,  n_rollouts:int,  nb_of_trainings:int, init_states:str):
  
  path_init=algo+"_hyp_rollout.txt"
  #avant on faisait le directory (rollouts) puis on regarde le log
  #ici on a plusieurs log dont chacun doit avoir un directory rollouts
  os.makedirs("rollouts", exist_ok=True) #à chaque log et à chaque fichier du log correspond un rollout
  for i in range(1,nb_of_trainings+1):
      files_list=os.listdir("log"+str(i)) #pour chacun des log sauvegardé
      
      for n in files_list:
        if n.endswith(".zip"):
            path="rollouts/ent"+str(i)+"_ep"+os.path.splitext(n)[0]+"_"+path_init
            get_rollouts(path=path, session_id=session_id, env_id=env_id, algo=algo, net="log"+str(i)+"/"+os.path.splitext(n)[0], n_rollouts=n_rollouts, init_states_file=init_states)

 

In [None]:
#Dans cette fonction, le but est de faire les rollouts sur la politique optimale de chaque entrainement
def rollouts_on_fixed_states_optimalPol( env_id:str, session_id:int, algo:str,  n_rollouts:int,  nb_of_trainings:int, init_states:str):
  
  path_init=algo+"_hyp_opt_rollout.txt"
  #avant on faisait le directory (rollouts) puis on regarde le log
  #ici on a plusieurs log dont chacun doit avoir un directory rollouts
  os.makedirs("rollouts/", exist_ok=True) #à chaque log et à chaque fichier du log correspond un rollout
  for i in range(1,nb_of_trainings+1):
      files_list=os.listdir("log"+str(i)) #pour chacun des log sauvegardé
      file=""
      max_ep=0 #max_ep pour détecter le dernier episode correspondant à la politique 
      for n in files_list:
        if n.endswith(".zip"):
          episode=os.path.splitext(n)[0]
          episode=int(episode)
          if episode>max_ep:
            max_ep=episode
      path="rollouts/ent"+str(i)+"_ep"+str(max_ep)+"_"+path_init
      get_rollouts(path=path, session_id=session_id, env_id=env_id, algo=algo, net="log"+str(i)+"/"+str(max_ep), n_rollouts=n_rollouts, init_states_file=init_states)

 


In [None]:
def rollouts_on_fixed_states_optimalPol( env_id:str, session_id:int, algo:str,  n_rollouts:int,  nb_of_trainings:int, init_states:str, var_position:int,  var_velocity:int, var_percent:int):
  
  path_init=algo+"_hyp_noisy_rollout.txt"
  #avant on faisait le directory (rollouts) puis on regarde le log
  #ici on a plusieurs log dont chacun doit avoir un directory rollouts
  os.makedirs("noisy_rollouts/", exist_ok=True) #à chaque log et à chaque fichier du log correspond un rollout
  for i in range(1,nb_of_trainings+1):
      files_list=os.listdir("log"+str(i)) #pour chacun des log sauvegardé
      file=""
      max_ep=0
      for n in files_list:
        if n.endswith(".zip"):
          episode=os.path.splitext(n)[0]
          episode=int(episode)
          if episode>max_ep:
            max_ep=episode
      path="noisy_rollouts/ent"+str(i)+"_ep"+str(max_ep)+"_"+path_init
      get_noisy_rollouts(path=path, session_id=session_id, env_id=env_id, algo=algo, net="log"+str(i)+"/"+str(max_ep), n_rollouts=n_rollouts, init_states_file=init_states, var_position=var_position,  var_velocity=var_velocity, var_percent=var_percent)

 
 

In [None]:
#Pour les rollouts donnant les détails de la position et de la vitesse (Environnement MountainCar)

def get_rollouts_details(path:str, session_id:int, env_id:str, algo:str, net:str, n_rollouts:int, init_states_file:str, verbose:bool=False)->None:
    init_states=np.loadtxt(init_states_file)
    env = gym.make(env_id) 
    model = load_model(algo, net)
    obs_i = []
    reward_i = []
    list_count=[]
    rollout_listNum=[]
    for i in range(n_rollouts):
        done = False
        env.reset()
        env._elapsed_steps=0
        obs=init_states[i]
        env.unwrapped.state=obs
        # on veut avoir dans un vecteur les observations qu'on reçoit au cours du temps ainsi que les rewards dans un autre 
        #vecteur (vecteur reward au cours du temps)
        while not (done):
            obs_i.append(obs)
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)         
         
            reward_i.append(reward)
            rollout_listNum.append(i)

        if verbose:
            print("\rRun {}/{}".format(i + 1, n_rollouts), end="")
    if verbose:
        print("")

    with open(path,"w") as f:
      k=0
      for (obs,rew,rollout) in zip(obs_i,reward_i,rollout_listNum):
        if k>0:
          f.write("\n")
        k=k+1  
        f.write("{0},{1},{2}".format(obs,rew,rollout))
    
    env.close()
    del model, env
    
def rollouts_details(env_id:str, session_id:int, algo:str,  n_rollouts:int,  nb_of_trainings:int, init_states:str):
  
  path_init=algo+"_hyp_rollout_details.txt"
  #avant on faisait le directory (rollouts) puis on regarde le log
  #ici on a plusieurs log dont chacun doit avoir un directory rollouts
  os.makedirs("rollouts", exist_ok=True) #à chaque log et à chaque fichier du log correspond un rollout
  for i in range(1,nb_of_trainings+1):
      files_list=os.listdir("log"+str(i)) #pour chacun des log sauvegardé
      file=""
      max_ep=0
      for n in files_list:
        if n.endswith(".zip"):
          episode=os.path.splitext(n)[0]
          episode=int(episode)
          if episode>max_ep:
            max_ep=episode
      path="rollouts/ent"+str(i)+"_ep"+str(max_ep)+"_"+path_init
      get_rollouts_details(path=path, session_id=session_id, env_id=env_id, algo=algo, net="log"+str(i)+"/"+str(max_ep), n_rollouts=n_rollouts, init_states_file=init_states)
 