In [73]:
from ast import mod
from telnetlib import DM
from turtle import mode
from unicodedata import name
import gym
from copy import deepcopy
import os
import os.path as osp
import torch
from scipy import stats
from statistics import mean 
import numpy as np
from torch.optim import Adam
import itertools
import random
import torch.nn as nn
import argparse
import pickle

def get_env_name(name):
    if ('humanoid' in name) or ('Humanoid' in name):
        return 'Humanoid-v3'
    if ('halfcheetah' in name) or ('HalfCheetah' in name):
        return 'HalfCheetah-v3'
    if ('ant' in name) or ('Ant' in name):
        return 'Ant-v3'
    if ('hopper' in name) or  ('Hopper' in name) :
        return 'Hopper-v3'
    if ('walker' in name) or ('Walker' in name) :
        return 'Walker2d-v3'
    return 'unknown'

def mlp(sizes, activation, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)

class PPO_Actor():
    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        self.pi = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)
        self.obs_mean = np.ones(obs_dim)
        self.obs_std = np.ones(obs_dim)
        self.clip = 10.0
        # print(type(self.pi))
    
    def normalize_o(self, o):
        o = o - self.obs_mean
        o = o / (self.obs_std + 1e-8)
        o = np.clip(o, -self.clip, self.clip)
        return o
    
    def act(self, o):
        o = self.normalize_o(o)
        o = torch.as_tensor(o, dtype=torch.float32)
        return self.pi(o).detach().numpy()
    
    def copy_model(self, md):
        self.pi.load_state_dict(md['pi'])
        self.obs_mean = md['obs_mean']
        self.obs_std = md['obs_std']
        self.clip = md['clip']
        
    def load(self, name):
        md = torch.load(name)
        self.copy_model(md)


def get_ppo_models(path, name):
    fpath = osp.join(path, name)
    models = []
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    env = gym.make(get_env_name(name))
    obs_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    for file_name in file_names:   
        if ".pt" not in file_name:
            continue
        fname = osp.join(fpath, file_name)
        print(file_name)
        model = PPO_Actor(obs_dim, action_dim, (64, 64), nn.Tanh)
        model.load(fname)
        models.append((name, file_name, model))
    return models

def get_models(path, name):
    print("get models ", path, name)
    if 'ppo' in name:
        return get_ppo_models(path, name)
    fpath = osp.join(path, name)
    models = {}
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    for file_name in file_names:   
        # fname = osp.join(fpath, file_name ,'pyt_save', 'model0.pt')
        fname = osp.join(fpath, file_name ,'pyt_save', 'model.pt')
        print(fname)
        model = torch.load(fname)
        models[file_name] = model
        # models.append((name, file_name, model))
    return models

def save_state(env):
    return env.sim.get_state()

def restore_state(env, old_state):
    env.reset()
    env.sim.set_state(old_state)
    env.sim.forward()
    return env.get_obs()

def get_ppo_action(o, md):
    return md.act(o)

def get_action(o, md, name):
    if 'ppo' in name:
        return get_ppo_action(o, md)
    if 'train' not in name:
        o = torch.as_tensor(o, dtype=torch.float32)
        return md.act(o)
    o = torch.as_tensor(o, dtype=torch.float32)
    return md.act(o, deterministic=False)

def get_q(o, a, md):
    o = torch.as_tensor(o, dtype=torch.float32)
    a = torch.as_tensor(a, dtype=torch.float32)
    q1 = md.q1(o, a)
    q2 = md.q2(o, a)
    return torch.min(q1, q2)

In [2]:
path  = '/home/lclan/spinningup/data/'
trajs_path = 'trajs'
algo_name = 'Humanoid-v3_sac_base'
algo_names = {}
algo_names['Humanoid-v3'] = ['Humanoid-v3_sac_base', 'Humanoid-v3_td3_base', 'vanilla_ppo_humanoid',  'sgld_ppo_humanoid']
algo_names['Ant-v3'] = ['Ant-v3_sac_base' , 'Ant-v3_td3_base', 'vanilla_ppo_ant', 'atla_ppo_ant']
algo_names['Walker2d-v3'] = ['Walker2d-v3_sac_base', 'Walker2d-v3_td3_base', 'vanilla_ppo_walker', 'atla_ppo_walker']
algo_names['HalfCheetah-v3'] = ['HalfCheetah-v3_sac_base', 'HalfCheetah-v3_td3_base',  'vanilla_ppo_halfcheetah', 'atla_ppo_halfcheetah']
algo_names['Hopper-v3'] = ['Hopper-v3_sac_base', 'Hopper-v3_td3_base', 'vanilla_ppo_hopper',  'atla_ppo_hopper']

def get_all_traj_names_with_same_env(path, trajs_path, name):
    all_trajs_names = []
    fpath = osp.join(path, trajs_path)
    print(fpath)
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    env_name = get_env_name(name)
    for file_name in file_names:
        if "trajs.pkl" not in file_name:
            continue
        tmp = get_env_name(file_name)
        if tmp == env_name:
            all_trajs_names.append(file_name)
    return all_trajs_names

all_names = get_all_traj_names_with_same_env(path, trajs_path, algo_name)
print(all_names)

/home/lclan/spinningup/data/trajs
['Humanoid-v3_td3_base_400_trajs.pkl', 'Humanoid-v3_sac_base_400_trajs.pkl', 'sgld_ppo_humanoid_400_trajs.pkl', 'vanilla_ppo_humanoid_400_trajs.pkl']


In [3]:

def load_all_same_env_results(fpath, env_name):
    print(fpath)
    file_names = os.listdir(fpath)
    rets = []
    for file_name in file_names:
        if ".pkl" in file_name and get_env_name(file_name) == env_name:
            print(file_name)
            file_name = osp.join(fpath, file_name)
            with open(file_name, 'rb') as f: 
                ret = pickle.load(f)
            rets.append(ret)
    return rets


/home/lclan/spinningup/data/trajs/test_continue/
Humanoid-v3_td3_base_s500_tr50.pkl
sgld_ppo_humanoid_s500_tr50.pkl
vanilla_ppo_humanoid_s500_tr50.pkl
Humanoid-v3_sac_base_s500_tr50.pkl


In [4]:
def is_fail(data):
    if data[3][0]:
        return 1.0
    # if data[3][1] < 100:
    #     return 1.0
    return 0.0

def get_fail_rate(result): # result of one agent to one agent's trajs
    ret = 0
    for data in result:
        ret += is_fail(data)
    return ret/len(result)

def get_result_mean(result):
    ret = 0
    for data in result: # data = (traj_id, midpoint_id, old_ret, (d, total_r))
        ret += data[3][1]
    return ret/len(result)

def get_results_tuple(results):
    fr_stats = {}
    ret_stats = {}
    for result in results:
        if (result[0], result[2]) not in fr_stats:
            fr_stats[(result[0], result[2])] = []
            ret_stats[(result[0], result[2])] = []
        fr = get_fail_rate(result[4])
        m = get_result_mean(result[4])
        
        print(result[0], result[1], result[2], result[3], fr, m,  len(result[4]))
        
        if result[1] == result[3]:
            if (result[0], "self") not in fr_stats:
                fr_stats[(result[0], "self")] = []
                ret_stats[(result[0], "self")] = []
            fr_stats[(result[0], "self")].append(fr)
            ret_stats[(result[0], "self")].append(m)
        else:
            fr_stats[(result[0], result[2])].append(fr)
            ret_stats[(result[0], result[2])].append(m)
    ret = []
    for key in fr_stats.keys():
        np_arr = np.array(fr_stats[key])
        np_arr2 = np.array(ret_stats[key])
        ret.append((key, np.mean(np_arr), np.std(np_arr), np.min(np_arr), np.max(np_arr), len(np_arr),
                    np.mean(np_arr2), np.std(np_arr2)
                    ))
        print(key, np.mean(np_arr), np.std(np_arr), np.min(np_arr), np.max(np_arr), len(np_arr),
              np.mean(np_arr2), np.std(np_arr2), np.min(np_arr), np.max(np_arr))
    return ret



In [6]:
continue_path = '/home/lclan/spinningup/data/tmp/'
results = load_all_same_env_results(continue_path, get_env_name(algo_name))



/home/lclan/spinningup/data/tmp/
Humanoid-v3_sac_base_s500_tr50_tn200.pkl
sgld_ppo_humanoid_s500_tr50_tn200.pkl
vanilla_ppo_humanoid_s500_tr50_tn200.pkl
Humanoid-v3_td3_base_s500_tr50_tn200.pkl


In [16]:
print(all_names)
path  = '/home/lclan/spinningup/data/'
trajs_path = 'trajs'
env_name = get_env_name(algo_name)
trajs_d = {}
for aname in algo_names[env_name]:
    for trajs_name in all_names:
        if aname in trajs_name:
            tname = osp.join(path, trajs_path, trajs_name)
            print(tname)
            with open(tname, 'rb') as f: 
                trajs = pickle.load(f)
            trajs_d[aname] = trajs 
            break


            




        

['Humanoid-v3_td3_base_400_trajs.pkl', 'Humanoid-v3_sac_base_400_trajs.pkl', 'sgld_ppo_humanoid_400_trajs.pkl', 'vanilla_ppo_humanoid_400_trajs.pkl']
/home/lclan/spinningup/data/trajs/Humanoid-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_humanoid_400_trajs.pkl
/home/lclan/spinningup/data/trajs/sgld_ppo_humanoid_400_trajs.pkl


In [28]:
print(len(results))
print(len(results[0]))
print(len(results[0][0]))
print(results[0][2][4][0])
print(results[0][2][4][1])
print(results[0][2][4][2])

4
432
5
(275, 70, 3212.3167572052607, (True, 193.50235377982935))
(392, 86, 3230.3920735404918, (True, 206.48765871153807))
(360, 51, 3214.7904224530043, (True, 170.18539794115583))


In [50]:
trajs_d['Humanoid-v3_sac_base'][2][1]

'Humanoid-v3_sac_base_s1211'

In [52]:
def get_state(trajs_d, algo_name, agent_name, traj_id, midpoint_id):
    for data in trajs_d[algo_name]:
        if data[1] == agent_name:
            return data[-1][traj_id][midpoint_id][1]

def get_self_ret(cresults, trajs_d):
    self_ret = {}
    for algo_results in cresults:
        for results in algo_results:
            if results[0] == results[2] and results[1] == results[3]:
                x = {}
                for data in results[4]:
                    if not data[3][0]:
                        s = get_state(trajs_d, results[0], results[1], data[0], data[1])
                        x[(data[0], data[1])] = (data[3][1],s)
                        
                if results[0] not in self_ret.keys():
                    self_ret[results[0]] = {}
                    print(results[0])
                self_ret[results[0]][results[1]] = x
    return self_ret

self_results = get_self_ret(results, trajs_d)    
    

Humanoid-v3_sac_base
sgld_ppo_humanoid
vanilla_ppo_humanoid
Humanoid-v3_td3_base


In [37]:
# print(self_results['Humanoid-v3_sac_base']['Humanoid-v3_sac_base_s1206'].keys())

dict_keys([(275, 70), (392, 86), (360, 51), (5, 80), (274, 42), (339, 20), (256, 92), (78, 56), (352, 78), (8, 4), (4, 92), (267, 26), (269, 16), (110, 68), (314, 57), (93, 96), (379, 60), (136, 10), (16, 97), (142, 40), (120, 94), (256, 59), (93, 12), (238, 64), (149, 57), (78, 89), (257, 41), (61, 72), (112, 32), (325, 71), (361, 96), (221, 23), (53, 30), (230, 68), (121, 94), (49, 89), (27, 37), (184, 5), (37, 6), (29, 71), (50, 66), (158, 94), (312, 50), (267, 35), (33, 66), (93, 92), (61, 0), (14, 22), (73, 84), (275, 31), (46, 4), (114, 89), (196, 33), (20, 66), (8, 72), (24, 74), (317, 91), (264, 51), (5, 48), (294, 76), (78, 84), (5, 85), (267, 64), (195, 51), (325, 92), (133, 32), (24, 62), (360, 65), (386, 67), (70, 94), (0, 8), (377, 3), (24, 88), (84, 97), (275, 62), (100, 33), (53, 81), (352, 53), (265, 1), (154, 16), (146, 51), (182, 79), (367, 19), (8, 42), (132, 15), (213, 3), (385, 55), (133, 94), (3, 44), (16, 88), (8, 12), (229, 73), (64, 94), (289, 23), (246, 7), (2

In [53]:
self_stats = {}
for key in self_results.keys():
    l = []
    for k2 in self_results[key].keys():
        for k3 in self_results[key][k2].keys():
            l.append(self_results[key][k2][k3][0])
    print(len(l))
    l = np.array(l)
    self_stats[key] = l
    print(key, np.mean(l), np.std(l), np.min(l), np.max(l))

            

1778
Humanoid-v3_sac_base 2776.4121531506867 124.2920999307913 2388.1548404664586 3093.100924667205
1979
sgld_ppo_humanoid 3406.2032325661435 173.27815478336356 3094.381862309973 3856.063893779141
1783
vanilla_ppo_humanoid 2879.8733473428324 184.83582019665175 2541.899453902074 3312.8166238739977
1973
Humanoid-v3_td3_base 2691.905658985236 115.22790954950138 2370.6746570321907 2868.903780588149


In [81]:
path  = '/home/lclan/spinningup/data/'


def get_q_results(cresults, self_results, path, algo_name):
    print("get q result ", algo_name) 
    models = get_models(path, algo_name)
    print(models.keys())
    success_results = {}
    fail_results = {}
    env_name = get_env_name(algo_name)
    env = gym.make(env_name)
    for algo_results in cresults:
        for results in algo_results:
            if results[1] == results[3]:
                continue
            if results[0] not in success_results.keys():
                success_results[results[0]] = []
                fail_results[results[0]] = []
            if results[2] == algo_name:
                for data in results[4]:
                    try:
                        oa_ret, s = self_results[results[0]][results[1]][(data[0], data[1])]
                    except:
                        print(results[0], results[1], (data[0], data[1]))
                        continue
                    md = models[results[3]]
                    o = restore_state(env, s)
                    a = get_action(o, md, algo_name)
                    v = get_q(o, a, md)
                    tmp = (v, oa_ret, data[3][1])
                    if data[3][0]:
                        fail_results[results[0]].append(tmp)
                    else:
                        success_results[results[0]].append(tmp)
    
    return fail_results, success_results

                    
                    
# q_result = get_q_results(results, self_results, path, 'Humanoid-v3_sac_base')



    

In [66]:
def get_mean_std(l):
    la = np.array(l)
    return str(int(np.mean(l))) + " $\pm$ "  + str(int(np.std(l)))

def print_q_results(q_results):
    for key in q_results[0].keys():
        oa_ret = []
        a_ret  = []
        v = []
        vor = []
        s = key
        s += ' & '
        for x in q_results[0][key]:
            oa_ret.append(x[1])
            a_ret.append(x[2])
            vv = x[0].detach().numpy()
            v.append(vv)
            vor.append(vv/x[1])
        
        s += get_mean_std(oa_ret)
        s += ' & '
        s += get_mean_std(a_ret)
        s += ' & '
        s += get_mean_std(v)
        s += ' & '
        # print(key, "fail data: ")
        # print('oa_ret', get_mean_std(oa_ret))
        # print('a_ret', get_mean_std(a_ret))
        # print('v', get_mean_std(v))
        # print('vor', get_mean_std(vor))
        oa_ret = []
        a_ret  = []
        v = []
        vor = []
        for x in q_results[1][key]:
            oa_ret.append(x[1])
            a_ret.append(x[2])
            vv = x[0].detach().numpy()
            v.append(vv)
            vor.append(vv/x[1])
        s += get_mean_std(oa_ret)
        s += ' & '
        s += get_mean_std(a_ret)
        s += ' & '
        s += get_mean_std(v)
        s += ' \\\\'
        print(s)
        # print(key, "sucess data: ")
        # print('oa_ret', get_mean_std(oa_ret))
        # print('a_ret', get_mean_std(a_ret))
        # print('v', get_mean_std(v))
        # print('vor', get_mean_std(vor))
            
            
print_q_results(q_result)



Humanoid-v3_sac_base & 2829 $\pm$ 143 & 319 $\pm$ 294 & 103 $\pm$ 136 & 2743 $\pm$ 96 & 2733 $\pm$ 104 & 305 $\pm$ 182 \\
sgld_ppo_humanoid & 3428 $\pm$ 175 & 226 $\pm$ 127 & 50 $\pm$ 59 & 3288 $\pm$ 92 & 2758 $\pm$ 118 & 140 $\pm$ 161 \\
vanilla_ppo_humanoid & 2939 $\pm$ 179 & 302 $\pm$ 233 & 81 $\pm$ 109 & 2822 $\pm$ 171 & 2734 $\pm$ 110 & 270 $\pm$ 191 \\
Humanoid-v3_td3_base & 2698 $\pm$ 114 & 388 $\pm$ 306 & 145 $\pm$ 144 & 2688 $\pm$ 115 & 2746 $\pm$ 110 & 261 $\pm$ 166 \\


In [82]:
path  = '/home/lclan/spinningup/data/'
trajs_path = 'trajs'
env_names = list(algo_names.keys())
algo_name = algo_names[env_names[0]][0]
continue_path = '/home/lclan/spinningup/data/tmp/'


def print_all_results(algo_name, path, trajs_path, continue_path, algo_names):
    all_names = get_all_traj_names_with_same_env(path, trajs_path, algo_name)
    print(all_names)
    cresults = load_all_same_env_results(continue_path, get_env_name(algo_name))
    env_name = get_env_name(algo_name)
    # load all trajs
    trajs_d = {}
    for aname in algo_names[env_name]:
        for trajs_name in all_names:
            if aname in trajs_name:
                tname = osp.join(path, trajs_path, trajs_name)
                print(tname)
                with open(tname, 'rb') as f: 
                    trajs = pickle.load(f)
                trajs_d[aname] = trajs 
                break
    # end
    self_results = get_self_ret(cresults, trajs_d)   
    
    q_result = get_q_results(cresults, self_results, path, algo_name)
    print_q_results(q_result)
    return q_result, trajs_d, cresults
    


x = print_all_results(algo_name, path, trajs_path, continue_path, algo_names)

/home/lclan/spinningup/data/trajs
['Humanoid-v3_td3_base_400_trajs.pkl', 'Humanoid-v3_sac_base_400_trajs.pkl', 'sgld_ppo_humanoid_400_trajs.pkl', 'vanilla_ppo_humanoid_400_trajs.pkl']
/home/lclan/spinningup/data/tmp/
Humanoid-v3_sac_base_s500_tr50_tn200.pkl
sgld_ppo_humanoid_s500_tr50_tn200.pkl
vanilla_ppo_humanoid_s500_tr50_tn200.pkl
Humanoid-v3_td3_base_s500_tr50_tn200.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_humanoid_400_trajs.pkl


KeyboardInterrupt: 

In [83]:
algo_name = algo_names[env_names[2]][0]
print(algo_name)
x = print_all_results(algo_name, path, trajs_path, continue_path, algo_names)


Walker2d-v3_sac_base
/home/lclan/spinningup/data/trajs
['Walker2d-v3_td3_base_400_trajs.pkl', 'atla_ppo_walker_400_trajs.pkl', 'Walker2d-v3_sac_base_400_trajs.pkl', 'vanilla_ppo_walker_400_trajs.pkl']
/home/lclan/spinningup/data/tmp/
Walker2d-v3_td3_base_s500_tr50_tn200.pkl
Walker2d-v3_sac_base_s500_tr50_tn200.pkl
atla_ppo_walker_s500_tr50_tn200.pkl
vanilla_ppo_walker_s500_tr50_tn200.pkl
/home/lclan/spinningup/data/trajs/Walker2d-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Walker2d-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_walker_400_trajs.pkl
/home/lclan/spinningup/data/trajs/atla_ppo_walker_400_trajs.pkl
Walker2d-v3_td3_base
Walker2d-v3_sac_base
atla_ppo_walker
vanilla_ppo_walker
get q result  Walker2d-v3_sac_base
get models  /home/lclan/spinningup/data/ Walker2d-v3_sac_base
/home/lclan/spinningup/data/Walker2d-v3_sac_base/Walker2d-v3_sac_base_s1207/pyt_save/model.pt
/home/lclan/spinningup/data/Walker2d-v3_sac_base/Walker2d-v3_sac_base_s1



atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
atla_ppo_walker atla_ppo_walker_20.pt (383, 33)
Walker2d-v3_td3_base & 3118 $\pm$ 350 & 404 $\pm$ 362 & 316 $\pm$ 133 & 2974 $\pm$ 365 & 2835 $\pm$ 259 & 411 $\pm$ 109 \\
Walker2d-v3_sac_base & 3139 $\pm$ 292 & 386 $\pm$ 368 & 298 $\pm$ 135 & 2979 $\pm$ 213 & 2834 $\pm$ 257 & 394 $\pm$ 112 \\
atla_ppo_walker & 2673 $\pm$ 375 & 275 $\pm$ 301 & 195 $\pm$ 117 & 2511 $\pm$ 321 & 2721 $\pm$ 236 & 318 $\pm$ 114 \\
vanilla_ppo_walker & 2558 $\pm$ 250 & 312 $\pm$ 354 & 236 $\

In [85]:
algo_name = algo_names[env_names[4]][0]
print(algo_name)
x = print_all_results(algo_name, path, trajs_path, continue_path, algo_names)

Hopper-v3_sac_base
/home/lclan/spinningup/data/trajs
['Hopper-v3_sac_base_400_trajs.pkl', 'vanilla_ppo_hopper_400_trajs.pkl', 'Hopper-v3_td3_base_400_trajs.pkl', 'atla_ppo_hopper_400_trajs.pkl']
/home/lclan/spinningup/data/tmp/
vanilla_ppo_hopper_s500_tr50_tn200.pkl
atla_ppo_hopper_s500_tr50_tn200.pkl
Hopper-v3_td3_base_s500_tr50_tn200.pkl
Hopper-v3_sac_base_s500_tr50_tn200.pkl
/home/lclan/spinningup/data/trajs/Hopper-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Hopper-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_hopper_400_trajs.pkl
/home/lclan/spinningup/data/trajs/atla_ppo_hopper_400_trajs.pkl
vanilla_ppo_hopper
atla_ppo_hopper
Hopper-v3_td3_base
Hopper-v3_sac_base
get q result  Hopper-v3_sac_base
get models  /home/lclan/spinningup/data/ Hopper-v3_sac_base
/home/lclan/spinningup/data/Hopper-v3_sac_base/Hopper-v3_sac_base_s1207/pyt_save/model.pt
/home/lclan/spinningup/data/Hopper-v3_sac_base/Hopper-v3_sac_base_s1200/pyt_save/model.pt
/home/lc

In [86]:
algo_name = algo_names[env_names[1]][0]
print(algo_name)
x = print_all_results(algo_name, path, trajs_path, continue_path, algo_names)

Ant-v3_sac_base
/home/lclan/spinningup/data/trajs
['Ant-v3_td3_base_400_trajs.pkl', 'vanilla_ppo_ant_400_trajs.pkl', 'atla_ppo_ant_400_trajs.pkl', 'Ant-v3_sac_base_400_trajs.pkl']
/home/lclan/spinningup/data/tmp/
atla_ppo_ant_s500_tr50_tn200.pkl
Ant-v3_sac_base_s500_tr50_tn200.pkl
Ant-v3_td3_base_s500_tr50_tn200.pkl
vanilla_ppo_ant_s500_tr50_tn200.pkl
/home/lclan/spinningup/data/trajs/Ant-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Ant-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_ant_400_trajs.pkl
/home/lclan/spinningup/data/trajs/atla_ppo_ant_400_trajs.pkl
atla_ppo_ant
Ant-v3_sac_base
Ant-v3_td3_base
vanilla_ppo_ant
get q result  Ant-v3_sac_base
get models  /home/lclan/spinningup/data/ Ant-v3_sac_base
/home/lclan/spinningup/data/Ant-v3_sac_base/Ant-v3_sac_base_s1203/pyt_save/model.pt
/home/lclan/spinningup/data/Ant-v3_sac_base/Ant-v3_sac_base_s1201/pyt_save/model.pt
/home/lclan/spinningup/data/Ant-v3_sac_base/Ant-v3_sac_base_s1202/pyt_save/mo

In [92]:
def get_self_results2(algo_name, path, trajs_path, continue_path, algo_names):
    all_names = get_all_traj_names_with_same_env(path, trajs_path, algo_name)
    print(all_names)
    cresults = load_all_same_env_results(continue_path, get_env_name(algo_name))
    env_name = get_env_name(algo_name)
    # load all trajs
    trajs_d = {}

    for trajs_name in all_names:
        if algo_name in trajs_name:
            tname = osp.join(path, trajs_path, trajs_name)
            print(tname)
            with open(tname, 'rb') as f: 
                trajs = pickle.load(f)
            trajs_d[algo_name] = trajs 
            break
    # end
    self_results = get_self_ret(cresults, trajs_d) 
    models = get_models(path, algo_name)
    env_name = get_env_name(algo_name)
    env = gym.make(env_name)
    V = []
    for agent_name in self_results[algo_name].keys():
        md = models[agent_name]
        for k in self_results[algo_name][agent_name].keys():
            s = self_results[algo_name][agent_name][k][1]
            o = restore_state(env, s)
            a = get_action(o, md, algo_name)
            v = get_q(o, a, md)
            V.append(v.detach().numpy())
    V = np.array(V)
    print(np.mean(V), np.std(V))
    return self_results

algo_name = algo_names[env_names[0]][0]
self_results = get_self_results2(algo_name, path, trajs_path, continue_path, algo_names)

/home/lclan/spinningup/data/trajs
['Humanoid-v3_td3_base_400_trajs.pkl', 'Humanoid-v3_sac_base_400_trajs.pkl', 'sgld_ppo_humanoid_400_trajs.pkl', 'vanilla_ppo_humanoid_400_trajs.pkl']
/home/lclan/spinningup/data/tmp/
Humanoid-v3_sac_base_s500_tr50_tn200.pkl
sgld_ppo_humanoid_s500_tr50_tn200.pkl
vanilla_ppo_humanoid_s500_tr50_tn200.pkl
Humanoid-v3_td3_base_s500_tr50_tn200.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_humanoid_400_trajs.pkl
/home/lclan/spinningup/data/trajs/sgld_ppo_humanoid_400_trajs.pkl
Humanoid-v3_sac_base
sgld_ppo_humanoid
vanilla_ppo_humanoid
Humanoid-v3_td3_base
get models  /home/lclan/spinningup/data/ Humanoid-v3_sac_base
/home/lclan/spinningup/data/Humanoid-v3_sac_base/Humanoid-v3_sac_base_s1206/pyt_save/model.pt
/home/lclan/spinningup/data/Humanoid-v3_sac_base/Humanoid-v3_sac_base_s1209/pyt_save/model.pt
/home/lclan/spinni



590.4916 21.224783
