In [1]:
from ast import mod
from telnetlib import DM
from turtle import mode
from unicodedata import name
import gym
from copy import deepcopy
import os
import os.path as osp
import torch
from scipy import stats
from statistics import mean 
import numpy as np
from torch.optim import Adam
import itertools
import random
import torch.nn as nn
import argparse
import pickle

def get_env_name(name):
    if ('humanoid' in name) or ('Humanoid' in name):
        return 'Humanoid-v3'
    if ('halfcheetah' in name) or ('HalfCheetah' in name):
        return 'HalfCheetah-v3'
    if ('ant' in name) or ('Ant' in name):
        return 'Ant-v3'
    if ('hopper' in name) or  ('Hopper' in name) :
        return 'Hopper-v3'
    if ('walker' in name) or ('Walker' in name) :
        return 'Walker2d-v3'
    return 'unknown'

def mlp(sizes, activation, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)

class PPO_Actor():
    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        self.pi = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)
        self.obs_mean = np.ones(obs_dim)
        self.obs_std = np.ones(obs_dim)
        self.clip = 10.0
        # print(type(self.pi))
    
    def normalize_o(self, o):
        o = o - self.obs_mean
        o = o / (self.obs_std + 1e-8)
        o = np.clip(o, -self.clip, self.clip)
        return o
    
    def act(self, o):
        o = self.normalize_o(o)
        o = torch.as_tensor(o, dtype=torch.float32)
        return self.pi(o).detach().numpy()
    
    def copy_model(self, md):
        self.pi.load_state_dict(md['pi'])
        self.obs_mean = md['obs_mean']
        self.obs_std = md['obs_std']
        self.clip = md['clip']
        
    def load(self, name):
        md = torch.load(name)
        self.copy_model(md)


def get_ppo_models(path, name):
    fpath = osp.join(path, name)
    models = []
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    env = gym.make(get_env_name(name))
    obs_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    for file_name in file_names:   
        if ".pt" not in file_name:
            continue
        fname = osp.join(fpath, file_name)
        print(file_name)
        model = PPO_Actor(obs_dim, action_dim, (64, 64), nn.Tanh)
        model.load(fname)
        models.append((name, file_name, model))
    return models

def get_models(path, name):
    print("get models ", path, name)
    if 'ppo' in name:
        return get_ppo_models(path, name)
    fpath = osp.join(path, name)
    models = {}
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    for file_name in file_names:   
        # fname = osp.join(fpath, file_name ,'pyt_save', 'model0.pt')
        fname = osp.join(fpath, file_name ,'pyt_save', 'model.pt')
        print(fname)
        model = torch.load(fname)
        models[file_name] = model
        # models.append((name, file_name, model))
    return models

def save_state(env):
    return env.sim.get_state()

def restore_state(env, old_state):
    env.reset()
    env.sim.set_state(old_state)
    env.sim.forward()
    return env.get_obs()

def get_ppo_action(o, md):
    return md.act(o)

def get_action(o, md, name):
    if 'ppo' in name:
        return get_ppo_action(o, md)
    if 'train' not in name:
        o = torch.as_tensor(o, dtype=torch.float32)
        return md.act(o)
    o = torch.as_tensor(o, dtype=torch.float32)
    return md.act(o, deterministic=False)

def get_q(o, a, md):
    o = torch.as_tensor(o, dtype=torch.float32)
    a = torch.as_tensor(a, dtype=torch.float32)
    q1 = md.q1(o, a)
    q2 = md.q2(o, a)
    return torch.min(q1, q2)

In [12]:
def get_all_traj_names_with_same_env(path, trajs_path, name):
    all_trajs_names = []
    fpath = osp.join(path, trajs_path)
    print(fpath)
    file_names = os.listdir(fpath)
    if len(file_names) == 0:
        return []
    env_name = get_env_name(name)
    for file_name in file_names:
        if "trajs.pkl" not in file_name:
            continue
        tmp = get_env_name(file_name)
        if tmp == env_name:
            all_trajs_names.append(file_name)
    return all_trajs_names


def load_all_same_env_results(cpath, env_name):
    print('load continue results: ', cpath)
    file_names = os.listdir(cpath)
    rets = []
    for file_name in file_names:
        if ".pkl" in file_name and get_env_name(file_name) == env_name:
            print(file_name)
            file_name = osp.join(cpath, file_name)
            with open(file_name, 'rb') as f: 
                ret = pickle.load(f)
            rets.append(ret)
    return rets

def is_fail(data):
    if data[3][0]:
        return 1.0
    # if data[3][1] < 100:
    #     return 1.0
    return 0.0

def get_fail_rate(result): # result of one agent to one agent's trajs
    ret = 0
    for data in result:
        ret += is_fail(data)
    return ret/len(result)

def get_result_mean(result):
    ret = 0
    for data in result: # data = (traj_id, midpoint_id, old_ret, (d, total_r))
        ret += data[3][1]
    return ret/len(result)

def get_traj_d(path, trajs_path, all_traj_names, algo_names, test_algo_name):
    env_name = get_env_name(test_algo_name)
    trajs_d = {}
    for aname in algo_names[env_name]:
        for trajs_name in all_traj_names:
            if aname in trajs_name:
                tname = osp.join(path, trajs_path, trajs_name)
                print(tname)
                with open(tname, 'rb') as f: 
                    trajs = pickle.load(f)
                trajs_d[aname] = trajs 
                break
    return trajs_d

def get_state(trajs_d, algo_name, agent_name, traj_id, midpoint_id):
    for data in trajs_d[algo_name]:
        if data[1] == agent_name:
            return data[-1][traj_id][midpoint_id]

def get_self_ret(cresults, trajs_d):
    self_ret = {}
    for algo_results in cresults:
        for results in algo_results:
            if results[0] == results[2] and results[1] == results[3]:
                x = {}
                for data in results[4]:
                    if not data[3][0]:
                        eplen, s = get_state(trajs_d, results[0], results[1], data[0], data[1])
                        x[(data[0], data[1])] = (data[3][1],s, eplen)
                        
                if results[0] not in self_ret.keys():
                    self_ret[results[0]] = {}
                    print(results[0])
                self_ret[results[0]][results[1]] = x
    return self_ret


In [4]:
path  = '/home/lclan/spinningup/data/'
trajs_path = 'trajs'
continue_path = '/home/lclan/spinningup/data/tmp/'
algo_names = {}
algo_names['Humanoid-v3'] = ['Humanoid-v3_sac_base', 'Humanoid-v3_td3_base', 'vanilla_ppo_humanoid',  'sgld_ppo_humanoid']
algo_names['Ant-v3'] = ['Ant-v3_sac_base' , 'Ant-v3_td3_base', 'vanilla_ppo_ant', 'atla_ppo_ant']
algo_names['Walker2d-v3'] = ['Walker2d-v3_sac_base', 'Walker2d-v3_td3_base', 'vanilla_ppo_walker', 'atla_ppo_walker']
algo_names['HalfCheetah-v3'] = ['HalfCheetah-v3_sac_base', 'HalfCheetah-v3_td3_base',  'vanilla_ppo_halfcheetah', 'atla_ppo_halfcheetah']
algo_names['Hopper-v3'] = ['Hopper-v3_sac_base', 'Hopper-v3_td3_base', 'vanilla_ppo_hopper',  'atla_ppo_hopper']
env_names = list(algo_names.keys())
test_algo_name = 'Humanoid-v3_gsac_base'


In [5]:
all_traj_names = get_all_traj_names_with_same_env(path, trajs_path, test_algo_name)
print(all_traj_names)

/home/lclan/spinningup/data/trajs
['Humanoid-v3_td3_base_400_trajs.pkl', 'Humanoid-v3_sac_base_400_trajs.pkl', 'sgld_ppo_humanoid_400_trajs.pkl', 'vanilla_ppo_humanoid_400_trajs.pkl']


In [6]:
cresults = load_all_same_env_results(continue_path, get_env_name(test_algo_name))

load continue results:  /home/lclan/spinningup/data/tmp/
Humanoid-v3_sac_base_s500_tr50_tn200.pkl
sgld_ppo_humanoid_s500_tr50_tn200.pkl
vanilla_ppo_humanoid_s500_tr50_tn200.pkl
Humanoid-v3_td3_base_s500_tr50_tn200.pkl


In [7]:
trajs_d = get_traj_d(path, trajs_path, all_traj_names, algo_names, test_algo_name)

/home/lclan/spinningup/data/trajs/Humanoid-v3_sac_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/Humanoid-v3_td3_base_400_trajs.pkl
/home/lclan/spinningup/data/trajs/vanilla_ppo_humanoid_400_trajs.pkl
/home/lclan/spinningup/data/trajs/sgld_ppo_humanoid_400_trajs.pkl


In [13]:
self_ret = get_self_ret(cresults, trajs_d)

Humanoid-v3_sac_base
sgld_ppo_humanoid
vanilla_ppo_humanoid
Humanoid-v3_td3_base


In [11]:
test_models = get_models(path, test_algo_name)

get models  /home/lclan/spinningup/data/ Humanoid-v3_gsac_base
/home/lclan/spinningup/data/Humanoid-v3_gsac_base/Humanoid-v3_gsac_base_s1124/pyt_save/model.pt
/home/lclan/spinningup/data/Humanoid-v3_gsac_base/Humanoid-v3_gsac_base_s1125/pyt_save/model.pt


In [17]:
print(self_ret['sgld_ppo_humanoid'].keys())

dict_keys(['sgld_ppo_humanoid_1.pt', 'sgld_ppo_humanoid_2.pt', 'sgld_ppo_humanoid_11.pt', 'sgld_ppo_humanoid_10.pt', 'sgld_ppo_humanoid_4.pt', 'sgld_ppo_humanoid_7.pt', 'sgld_ppo_humanoid_8.pt', 'sgld_ppo_humanoid_6.pt', 'sgld_ppo_humanoid_5.pt', 'sgld_ppo_humanoid_9.pt'])


In [16]:
def run_extra_steps(env, ep_len, md, md_name, step_num=50):
    max_ep_len = 1000
    total_r = 0
    o = env.get_obs()
    for i in range(step_num):
        a = get_action(o, md, md_name)
        o, r, d, _ = env.step(a)
        total_r += r
        ep_len += 1
        if d or (ep_len == max_ep_len):
            return (d, total_r)
    return (d, total_r)

def test_models(self_ret, test_models, test_algo_name, test_num = 200, step_num = 500):
    ret = {}
    env_name = get_env_name(test_algo_name)
    env = gym.make(env_name)
    for gen_algo in self_ret.keys():
        score = []
        fail = []
        print("start testing on ", gen_algo)
        for k in self_ret[gen_algo].keys():
            cnt = 0
            for k2 in self_ret[gen_algo][k].keys():
                orig_ret, s, eplen = self_ret[gen_algo][k][k2]
                for md in test_models:
                    restore_state(env, s)
                    d, r = run_extra_steps(env, eplen, md, test_algo_name, step_num)
                    fail.append(int(d))
                    score.append(r)
                cnt += 1
                if cnt > test_num:
                    break
        ret[gen_algo] = (score, fail)
    return ret

score, fail = test_models(self_ret, test_models, test_algo_name)



start testing on  Humanoid-v3_sac_base


ValueError: too many values to unpack (expected 3)