In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

# Executive Summary

This script aims to recover the meaning of the data from `04_26_2024` to `04_28_2024` as well as `11_01_2024`. 

We start with:
- `04_26_2024/exp_0.py`: tunes PDA on GridWorld and LunarLander with RKHS and NN. Also then runs PPO and DQN (10 runs)
- `04_26_2024/exp_1.py`: tunes PDA on InvertedPendulum. Also then runs PPO and PPDG (26 runs)
- `04_27_2024/exp_0.py`: Tune PDA stepsize in LQR (14 runs)

In [None]:
path = "/Users/calebju/Code/RL-general-action-state/logs"

# 04_26_2024/exp_0.py
data_1 = np.zeros(10)
for i in range(len(data_1)):
    df = pd.read_csv(os.path.join(path, "04_26_2024/exp_0/run_%d/seed=0.csv" % i))
    data_1[i] = df['episode rewards'].iloc[-1]

# 04_26_2024/exp_1.py
data_2 = np.zeros(26)
for i in range(len(data_2)):
    df = pd.read_csv(os.path.join(path, "04_26_2024/exp_1/run_%d/seed=0.csv" % i))
    data_2[i] = df['episode rewards'].iloc[-1]

# 04_27_2024/exp_0.py
data_3 = np.zeros(14)
for i in range(len(data_3)):
    df = pd.read_csv(os.path.join(path, "04_27_2024/exp_0/run_%d/seed=0.csv" % i))
    data_3[i] = df['episode rewards'].iloc[-1]

In [None]:
print("Experiment 04_26_2024/exp_0.py\n")
print("id|    score\n------------")
for i in range(len(data_1)):
    print("%d |%.2e" % (i, data_1[i]))

Note that we do not tune stepsizes here, we just try different Bregman divergences and function approximation.

In [None]:
print("Experiment 04_26_2024/exp_1.py\n")
print("id |    score\n------------")
for i in range(len(data_2)):
    print("%d |%.2e %s" % (i, data_2[i], "*" if data_2[i] == np.min(data_2[:24]) else ""))

The best performing PDA for solving Inverted Pendulum has **no policy noise, 10 PE epochs, PO eta=1, and `pda_2` stepsize**.

In [None]:
print("Experiment 04_27_2024/exp_0.py\n")
print("id |    score\n------------")
for i in range(len(data_3)):
    print("%d |%.2e %s" % (i, data_3[i], "*" if data_3[i] == np.min(data_3[:12]) else ""))

The best performing PDA for solving LQR has **PDA noise of 1 (large), PO eta = 0.001 (medium), and `pda_2` stepsize**.