In [25]:
import os
import json
import numpy as np
import pandas as pd
from IPython.display import display, HTML


def display_results_table(data):
  """
  Display the results data as a table sorted by Best Episode Reward.
  """
  # Convert data to a DataFrame and sort it by Best Episode Reward
  df = pd.DataFrame(data)
  if "Best Episode Reward Training" in df.columns:
    df = df.sort_values(by="Best Episode Reward Training", ascending=False)

  # Set pandas options to disable scientific notation
  pd.options.display.float_format = '{:,.2f}'.format

  # Display the table in Jupyter Notebook
  display(HTML(df.to_html(index=False, escape=False, border=1)))

  # Reset the formatting to default if needed elsewhere
  pd.reset_option("display.float_format")


def load_results(results_dir="results"):
  """
  Load results from all subdirectories in the specified results directory.
  """
  data = []
  for folder in os.listdir(results_dir):
    folder_path = os.path.join(results_dir, folder)
    if os.path.isdir(folder_path):
      try:
        with open(os.path.join(folder_path, "config.json"), "r") as f:
          config = json.load(f)
        with open(os.path.join(folder_path, "results.json"), "r") as f:
          results = json.load(f)

        data.append({
            "Gen": results.get("gen", ""),
            "Model": config["model"].upper(),
            "Params": config.get("model_params", ""),
            "Environment": config.get("environment", "N/A"),
            "Episodes": config["episodes"],
            "Average Episode Time": round(results.get("average_time", 0), 2),
            "Reward Strategy": config.get("reward_strategy", "default"),
            "Average Reward": results.get("average_reward", 0),
            "Reward Variance": results.get("variance_in_rewards", 0),
            "Best Episode Reward Training": np.max(results.get("all_rewards", [0])),
            "Best Episode Reward Evaluation": (
                np.max(results.get("eval_rewards", [0]))
                if "eval_rewards" in results else ""
            ),
            "Eval Landing Success Rate": results.get("eval_success_rate", ""),
            "Results Folder": folder_path
        })
      except Exception as e:
        pass

  return data


# Load and display results
results_data = load_results("results")
display_results_table(results_data)

Gen,Model,Params,Environment,Episodes,Average Episode Time,Reward Strategy,Average Reward,Reward Variance,Best Episode Reward Training,Best Episode Reward Evaluation,Eval Landing Success Rate,Results Folder
V3,TOTD,"{'alpha': 0.1, 'gamma': 0.96, 'lambda_a': 0.85}",LunarLanderContinuous-v3,100,0.04,default,-220.82,15153.59,87.67,40.77,0.03,results/TOTD-20241125-143216
V3,TRPO,"{'gamma': 0.99, 'gae_lambda': 0.95, 'target_kl': 0.01, 'net_arch': [64, 64]}",LunarLanderContinuous-v3,20,2.33,default,-127.3,19582.31,61.72,-7.24,0.01,results/TRPO-20241125-143552
V3,TRPO,"{'gamma': 0.98, 'gae_lambda': 0.9, 'target_kl': 0.02, 'net_arch': [64, 64]}",LunarLanderContinuous-v3,10,1.19,default,-204.86,38534.25,50.92,46.1,0.0,results/TRPO-20241125-143418
V3,TOTD,"{'alpha': 0.1, 'gamma': 0.96, 'lambda_a': 0.85}",LunarLanderContinuous-v3,20,0.02,default,-187.79,13995.15,48.2,-8.55,0.04,results/TOTD-20241125-143549
V3,TOTD,"{'alpha': 0.1, 'gamma': 0.96, 'lambda_a': 0.85}",LunarLanderContinuous-v3,100,0.05,default,-189.5,15922.33,37.25,8.21,0.04,results/TOTD-20241125-143214
V3,TRPO,"{'gamma': 0.99, 'gae_lambda': 0.95, 'target_kl': 0.01, 'net_arch': [64, 64]}",LunarLanderContinuous-v3,20,2.22,default,-86.26,8594.58,27.51,21.54,0.04,results/TRPO-20241125-143936
V3,TRPO,"{'gamma': 0.96, 'gae_lambda': 0.85, 'target_kl': 0.03, 'net_arch': [64, 64]}",LunarLanderContinuous-v3,10,1.33,default,-157.89,17410.2,22.63,65.98,0.01,results/TRPO-20241125-143821
V3,TRPO,"{'gamma': 0.98, 'gae_lambda': 0.9, 'target_kl': 0.02, 'net_arch': [64, 64]}",LunarLanderContinuous-v3,10,1.19,default,-209.99,27964.81,8.97,-36.27,0.0,results/TRPO-20241125-143751
V3,TOTD,"{'alpha': 0.1, 'gamma': 0.97, 'lambda_a': 0.8}",LunarLanderContinuous-v3,100,0.03,default,-193.78,15789.89,5.2,-18.22,0.04,results/TOTD-20241125-143211
V3,TOTD,"{'alpha': 0.1, 'gamma': 0.97, 'lambda_a': 0.8}",LunarLanderContinuous-v3,10,0.02,default,-231.71,19493.77,-0.51,-19.14,0.07,results/TOTD-20241125-143356
