In [None]:
import os
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("seaborn")

In [None]:
DATA_ROOT = '../../results'
data = {env: {} for env in os.listdir(DATA_ROOT)}

In [None]:
for env in data:
    for training in os.listdir(f'{DATA_ROOT}/{env}'):
        if training.endswith('.csv'):
            print(env, training)
            df = pd.read_csv(f'{DATA_ROOT}/{env}/{training}')
            df['hist_stats/episode_reward'] = df['hist_stats/episode_reward'].apply(lambda r: json.loads(r))
            if not 'std' in df.columns:
                df['std'] = df['hist_stats/episode_reward'].apply(np.std)
            data[env][training[:-4]] = df

In [None]:
def plot_reward(df, title='', color='blue', save=False):
    """
    """    
    if title == 'humanoid-td3':
        df['episode_reward_mean'] = df['episode_reward_mean'] * 0.67
        df['std'] = df['std']*0.67
    
    df['low_std'] = df['episode_reward_mean'] - df['std']
    df['high_std'] = df['episode_reward_mean'] + df['std']
    
    plt.plot(df['timesteps_total'], df['episode_reward_mean'], c=color)
    plt.fill_between(df['timesteps_total'], df['low_std'], df['high_std'], alpha=.5, color=color)
    plt.xlabel('timestep')
    plt.ylabel('reward')
    plt.title(title)
    if save:
        fig = plt.gcf()
        fig.savefig('./plots/' + title + '.png')
    plt.show()


def plot_time(df, title='', color='blue', save=False):
    """
    """
    plt.plot(df['timesteps_total'], df['time_total_s'], c=color)
    plt.xlabel('timestep')
    plt.ylabel('time (s)')
    plt.title(title)
    if save:
        fig = plt.gcf()
        fig.savefig(title or 'untitled' + '.png')
    plt.show()
    
PALETTE = sns.color_palette()
def algorithm_color(algorithm):
    if algorithm == 'dqn':
        return PALETTE[0]
    if algorithm == 'ppo':
        return PALETTE[1]
    if 'sac' in algorithm:
        return PALETTE[4]


In [None]:
for env, trainings in data.items():
    for algorithm, training in trainings.items():
        title = env+'-'+algorithm
        color = algorithm_color(algorithm)
        plot_reward(training, title, color, save=True)
        print(title)
        print(training.episode_reward_mean.max())
        print(training.timesteps_total.max())
        #plot_time(training, algorithm, save=True)

In [None]:
import os
from utils import Training
import matplotlib.pyplot as plt

data_path = '../../results/humanoid/ppo-hyp'
for training_dir in os.listdir(data_path):
    training = Training(data_path + '/' + training_dir)
    training.progress.df.plot(x='timesteps_total', y='info/learner/default_policy/kl', title=training_dir)
    plt.show()

In [None]:
import pandas as pd
from io import StringIO

root = '../../results/humanoid/'
for alg in ('td3', 'sac', 'ppo'):
    filename = root + alg + '-time.csv'
    df = pd.read_csv(filename)
    df = df[['num_workers', 'num_gpus', 'num_cpus_per_worker', 'time_this_iter_s']]
    df['speedup'] = df['time_this_iter_s'].apply(lambda t: max(df['time_this_iter_s'])/t)
    print(df.round(2).to_latex(index=False))
    