In [None]:
import os
import pybullet_envs
import numpy as np
import matplotlib.pyplot as plt
import time
# import panda_gym # Not in requirements, required for panda envs, get latest from here: https://github.com/qgallouedec/panda-gym , don't use version from pypy 

from seagul.zoo3_utils import load_zoo_agent, ALGOS, do_rollout_stable

path_to_zoo = "/home/sgillen/work/external/rl-baselines3-zoo/" # Very hacky but this is what we do for now. use: git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo

model_dir = "./keep_agents/train_all/"

In [None]:
# Iterate through save directory and load everything

model_dict = {}
env_dict = {}

for algo_file in os.scandir(model_dir):
    algo_dir = algo_file.path
    algo = algo_dir.split("/")[-1]
    model_dict[algo] = {}
    env_dict[algo] = {}
    for env_file in os.scandir(algo_dir):
        env_dir = env_file.path
        env_name = env_dir.split("/")[-1]
        model_dict[algo][env_name] = {}
        env_dict[algo][env_name] = {}
        env, original_model = load_zoo_agent(env_name, algo, zoo_path = path_to_zoo)
        env_dict[algo][env_name] = env
        model_dict[algo][env_name]['original'] = original_model
        for pkl_file in os.scandir(env_dir):
            post_name = pkl_file.path.split("/")[-1].split(".")[0]
            model_dict[algo][env_name][post_name] = ALGOS[algo].load(pkl_file.path, env=env, device='cpu')

In [None]:
algo_list = list(model_dict.keys())
env_list = list(model_dict[algo_list[0]].keys())
post_list = list(model_dict[algo_list[0]][env_list[0]].keys())

print(algo_list) # Original Algorithm
print(env_list) # Environment

# Postprocessor, autogenerated names. "original" means the original agent from zoo without additional training, "postprocess_default" means just ARS with not extra reward.
# Any other name is some new reward functio 
print(post_list)

In [None]:
# Rollout Walker for no noise for n trials
env_name = 'Walker2DBulletEnv-v0'
data_no_noise={}
#row = 0
post_name = ['original', 'refined']
env_id = 'Walker'
for k in enumerate(post_list):
    #for j in enumerate(env_list):
    row = 0
    for i in enumerate(algo_list):

        env = env_dict[i[1]][env_name]
        model = model_dict[i[1]][env_name][k[1]] # no refinement
        #model2 = model_dict[i[1]][j[1]][post_list[1]] # ARS policy refinement
        count = 1
        fail = 0
        rewards_list = []
        l_list = [] 
        n_trial = 300 #Trial number
        while count <= n_trial :
            obs,act,rew,info = do_rollout_stable(env, model)
            ep_rew = info[0]['episode']['r']
            ep_l = info[0]['episode']['l']

            rewards_list.append(ep_rew)
            l_list.append(ep_l)

            rewards_arr = np.stack(rewards_list).squeeze()
            l_arr = np.stack(l_list).squeeze()

            if info[0]['episode']['l']<1000:
                fail+=1
            count+=1
        fail_percentage = 100*fail/n_trial

        reward_mean = np.mean(rewards_arr)
        reward_std = np.std(rewards_arr)
        l_mean = np.mean(l_arr)
        if k[0] == 1:
            data_no_noise[str(row)] = data_no_noise[str(row)]+[reward_mean,reward_std,fail_percentage,l_mean]
        else:
            data_no_noise[str(row)] = [env_id,i[1],reward_mean,reward_std,fail_percentage,l_mean]

        #data_no_noise[str(row)] = [k[1],env_name,i[1],reward_mean,reward_std,fail_percentage,l_mean]
        print(row)
        row += 1

In [None]:
#data_no_noise['0'] = data_no_noise['0']+[reward_mean,reward_std,fail_percentage,l_mean]
#print(data_no_noise)
import pandas as pd
#df=pd.DataFrame.from_dict(data_no_noise, orient='index', columns=['env', 'algo', 'post', 'r_mean', 'r_std', '%fail', 'l_mean'])
df_no_noise = pd.DataFrame.from_dict(data_no_noise, orient='index', columns=['env', 'algo', 'r_mean', 'r_std', 'fall/100ep before', 'l_mean', 'r_mean_refined', 'r_std_refined', 'fail/100ep after', 'l_mean_refined'])

# Save all as csv
df_no_noise['algo'] = df_no_noise['algo'].str.upper()
#path = '~/Documents/GitHub/policy_refinement/Ty_files/Ty_csv/walker_no_noise.csv'
#df_no_noise.to_csv(path,index = False)

# Save just the fall per 100 episode as csv
df_no_noise2 = df_no_noise.drop(['r_mean', 'r_std','l_mean', 'r_mean_refined', 'r_std_refined','l_mean_refined'],axis=1)
df_no_noise2['algo'] = df_no_noise2['algo'].str.upper()
df_no_noise2 = df_no_noise2.round({ 'fall/100ep before':2, 'fall/100ep after':2})
a,b, c, d, e,f = df_no_noise2.iloc[0].copy(), df_no_noise2.iloc[1].copy(),df_no_noise2.iloc[2].copy(), df_no_noise2.iloc[3].copy(),df_no_noise2.iloc[4].copy(), df_no_noise2.iloc[5].copy()
df_no_noise2.iloc[0],df_no_noise2.iloc[1],df_no_noise2.iloc[2],df_no_noise2.iloc[3],df_no_noise2.iloc[4],df_no_noise2.iloc[5] = c,f,e,a,d,b

#path = '~/Documents/GitHub/policy_refinement/Ty_files/Ty_csv/walker_fall_only_no_noise.csv'
#df_no_noise2.to_csv(path,index = False)

df_no_noise2