In [None]:
from pathlib import Path
import os
from typing import List, Dict, Tuple
import tensorboard as tb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.python.summary.summary_iterator import summary_iterator

In [None]:
def extract_value_name(summary):
    value_name = summary.summary.value[0].tag
    return value_name

def extract_env_step(summary):
    step = summary.step
    return step

def extract_scalar_value(summary):
    val = summary.summary.value[0].simple_value
    return val

def extract_tb_results(run_dir: str,
                       run_list: List,
                       stat_name1: str,
                       stat_name2: str) -> \
                           Dict[str, Tuple[List, Dict[str, List]]]:

    runs = {}
    for i in range(len(run_list)):    # extract for every seed
        run = run_list[i]
        full_run_dir = run_dir / run
        assert len(os.listdir(full_run_dir)) == 1
        full_run_path = full_run_dir / os.listdir(full_run_dir)[0]

        steps = []
        stat1, stat2 = [], []   # reward, success
        for summary in summary_iterator(str(full_run_path)):
            if len(summary.summary.value) == 0:
                # has no summary value
                continue
            
            value_name = extract_value_name(summary)
            
            # extract stat such as rollout/ep_rew_mean
            if value_name == stat_name1:
                val = extract_scalar_value(summary)
                stat1.append(val)
                step = extract_env_step(summary)
                steps.append(step)
                # print(stat_name, val)

            if value_name == stat_name2:
                val = extract_scalar_value(summary)
                stat2.append(val)

        # print('\n')
        runs[run] = (steps, stat1, stat2)

    return runs

def trunc_runs(runs): 
    # after gathering runs, truncate each to the smallest number of steps across seeds
    # most of the time, it should be the same, but depends on if I decided to stop early
    min_steps = get_min_steps_across_seeds(runs)
    new_runs = {}
    for run_name, (steps, stat1, stat2) in runs.items():
        steps, stat1, stat2 = steps[:min_steps], stat1[:min_steps], stat2[:min_steps]
        new_runs[run_name] = (steps, stat1, stat2)
    return new_runs

def adjust_steps_stat(steps, stat1, stat2):
    # start graph from zero
    if isinstance(steps, np.ndarray) and isinstance(stat1, np.ndarray) and isinstance(stat2, np.ndarray):
        steps, stat1, stat2 = list(steps), list(stat1), list(stat2)
    return [0]+steps, [stat1[0]] + stat1, [stat2[0]] + stat2

def get_min_steps_across_seeds(runs):
    min_epi = np.inf
    for run_name in runs:
        steps, _,  _ = runs[run_name]
        print(f"num steps for {run_name}: {len(steps)}")
        min_epi = np.minimum(min_epi, len(steps)).astype(int)
    return min_epi

# compute mean / std across seeds
def get_means_std_over_seeds(runs):

    all_stat1, all_stat2 = [], []
    for run_name in runs:
        _, stat1, stat2 = runs[run_name]
        all_stat1.append(stat1)
        all_stat2.append(stat2)
    
    all_stat1, all_stat2 = np.array(all_stat1), np.array(all_stat2)
    
    mean1_per_step = np.mean(all_stat1, axis=0).tolist()
    std1_per_step = np.std(all_stat1, axis=0).tolist()
    
    mean2_per_step = np.mean(all_stat2, axis=0).tolist()
    std2_per_step = np.std(all_stat2, axis=0).tolist()
    
    results = ((np.array(mean1_per_step), np.array(std1_per_step)),
               (np.array(mean2_per_step), np.array(std2_per_step)))
     
    return results

In [None]:
all_tasks = {
    "mini_behavior": {
        "installing_printer": ["install_printer"],
        "thawing": ["thaw_fish", "thaw_date", "thaw_olive", "thaw_any_two", "thaw_all"],
        "cleaning_car": ["soak_rag", "clean_car", "clean_rag"]
    }, 
    "igibson": {
        "igibson": ["fruit", "fruit_sink", "knife", "knife_sink"]
    }
}

PARENT_LOG_DIR = Path('baselines/ppo/logs/neurips/tb/')

stat_name1 = 'eval/mean_reward'
stat_name2 = 'eval/success_rate'

In [None]:

for env_suite in ["mini_behavior"]:
# for env_suite in ["mini_behavior", "igibson"]:
    print(env_suite)

    for task in all_tasks[env_suite]:
        print("processing task:", task)
        if task == "thawing" or task == "installing_printer": continue
        
        print(task)
        task_names = all_tasks[env_suite][task]
        RUN_DIR =  PARENT_LOG_DIR / env_suite / task

        for specific_task in task_names:
            print(specific_task)
            
            SPEC_RUN_DIR = RUN_DIR / specific_task
            RUN_LIST = os.listdir(SPEC_RUN_DIR)      # each dir in this list is a run per seed
            print(f'RUNS FOUND in {SPEC_RUN_DIR}:') 
            print(RUN_LIST)
            runs = extract_tb_results(SPEC_RUN_DIR, RUN_LIST, stat_name1, stat_name2)      # dict containing each run as a key, value is a tuple (step, metric_val)
            print(f"Runs found for {specific_task}:", runs.keys())
            
            runs = {run_name: adjust_steps_stat(steps, stat1, stat2)
                    for run_name, (steps, stat1, stat2) in runs.items()}

            # steps should be the same across all runs, take the first
            # steps, _, _= truncate_runs[list(truncate_runs.keys())[0]]
            # truncate_runs = trunc_runs(runs)
            
            truncate_runs = runs
            # steps, _, _= truncate_runs[list(runs.keys())[0]] 
            (rew_means, rew_stds), (suc_means, suc_stds) = get_means_std_over_seeds(truncate_runs)
            
            # create output csv
            for run in runs:
                print(f"processing run:{run}")
                if run == 'run-seed1-20240519-21:07_1': continue

                steps, rew, suc = runs[run]
                output = pd.DataFrame(pd.Series(steps, name='timestep')).set_index("timestep")
                rew = pd.Series(rew, index=steps, name='average_reward')
                output = output.join(rew)
                suc = pd.Series(suc, index=steps, name="average_success")
                output = output.join(suc)
                
                seed_name = run.split("-")[1]
                assert seed_name[:-1] == "seed"
                seed = seed_name[-1]
                fname = f"trial{seed}.csv"
                output_dir = Path("results") / env_suite / task / specific_task
                output_dir.mkdir(parents=True, exist_ok=True)
                output.to_csv(output_dir / fname)

        print("-----------")

In [None]:
output

# CLEANING CAR

In [None]:
# SETUP
env_suite = 'mini_behavior'
task = 'cleaning_car'
specific_task = 'clean_car'


In [None]:
# extract
runs = extract_tb_results(RUN_LIST, stat_name1, stat_name2)      # dict containing each run as a key, value is a tuple (step, metric_val)
print("Runs found:", runs.keys())
print("Number of (steps, rew, success) to plot:", len(runs['run-seed0-20240410-18:47_1'][0]))

In [None]:
runs = {run_name: adjust_steps_stat(steps, stat1, stat2)
        for run_name, (steps, stat1, stat2) in runs.items()}
trunc_runs = truncate_runs(runs)

# steps should be the same across all runs, take the first
steps, _, _= trunc_runs[list(trunc_runs.keys())[0]] 
(rew_means, rew_stds), (suc_means, suc_stds) = get_means_std_over_seeds(trunc_runs)

In [None]:
plt.plot(steps, rew_means)
plt.fill_between(steps, rew_means-rew_stds, rew_means+rew_stds, color='b', alpha=.1)

In [None]:
plt.plot(steps, suc_means)
plt.fill_between(steps, suc_means-suc_stds, suc_means+suc_stds, color='b', alpha=.1)

# CLEAN RAG

# INSTALL PRINTER

# THAW FISH

# IGIBSON KNIFE

# IGIBSON FRUIT

# SOAK RAG