In [5]:
import os
import json
import numpy as np
import pandas as pd
from IPython.display import display, HTML


def display_results_table(data):
  """
  Display the results data as a table sorted by Best Episode Reward.
  """
  # Convert data to a DataFrame and sort it by Best Episode Reward
  df = pd.DataFrame(data)
  if "Landing Success Rate" in df.columns:
    df = df.sort_values(by="Landing Success Rate", ascending=False)

  # Set pandas options to disable scientific notation
  pd.options.display.float_format = '{:,.3f}'.format

  # Display the table in Jupyter Notebook
  display(HTML(df.to_html(index=False, escape=False, border=1)))

  # Reset the formatting to default if needed elsewhere
  pd.reset_option("display.float_format")


def load_results(results_dir="results"):
  """
  Load results from all subdirectories in the specified results directory.
  """
  data = []
  for folder in os.listdir(results_dir):
    folder_path = os.path.join(results_dir, folder)
    if os.path.isdir(folder_path):
      try:
        with open(os.path.join(folder_path, "config.json"), "r") as f:
          config = json.load(f)
        with open(os.path.join(folder_path, "results.json"), "r") as f:
          results = json.load(f)

        data.append({
            "Model": config["model"].upper(),
            "Params": config.get("model_params", ""),
            #"Environment": config.get("environment", "N/A"),
            "Timesteps": "" if "timesteps" not in config else config["timesteps"],
            "Average Step Time": results.get("average_time", 0),
            "Reward Strategy": config.get("reward_strategy", "default"),
            "Average Reward": results.get("average_reward", 0),
            "Reward Variance": results.get("variance_reward", 0),
            "Landing Success Rate": results.get("success_rate", 0.0),
            "Results Folder": folder_path
        })
      except Exception as e:
        pass

  return data


# Load and display results
results_data = load_results("results")
display_results_table(results_data)

Model,Params,Timesteps,Average Step Time,Reward Strategy,Average Reward,Reward Variance,Landing Success Rate,Results Folder
TD3,{},5000000.0,0.001,energy,179.3,9961.648,0.402,results/td3-20241128-125450
TD3,{},5000000.0,0.002,energy,127.892,11850.872,0.255,results/td3-20241128-170459
SAC,{},5000000.0,0.001,default,143.948,7254.058,0.226,results/sac-20241127-113706
TD3,{},5000000.0,0.001,proximity,142.001,7086.431,0.217,results/td3-20241128-101951
DDPG,{},5000000.0,0.001,proximity,91.425,12800.751,0.199,results/ddpg-20241128-101951
TD3,{},5000000.0,0.001,default,134.802,6428.97,0.178,results/td3-20241127-130132
TD3,"{'buffer_size': 500000, 'batch_size': 128, 'gamma': 0.99, 'tau': 0.01}",5000000.0,0.0,default,132.1,6259.712,0.174,results/TD3-20241127-180235
DDPG,{},5000000.0,0.002,energy,97.0,8896.485,0.14,results/ddpg-20241128-170459
TRPO,{},5000000.0,0.001,proximity,117.001,5310.871,0.12,results/trpo-20241128-101951
DDPG,{},1000000.0,0.001,energy,117.0,5610.954,0.12,results/ddpg-20241127-020518
