In [1]:
import pandas as pd
import numpy as np  
import os
from pathlib import Path
from collections import defaultdict
notebook_dir = Path(os.getcwd())    

# Experiment progress table

## Single transition: NSMDP consists of only two stationary MDPs

### With Notification

| Gym Environment | DDQN | Vanilla MCTS | PAMCTS | AlphaZero | ADA-MCTS | RATs | Pro-OLS | Pro-WLS |
|-----------------|------|--------------|--------|-----------|----------|------|---------|---------|
| CartPole - varying mass        | N/A     |   DONE   |        |           |          |      |         |         |
| CartPole - varying gravity      |    N/A  |   DONE   |        |           |          |      |         |         |
| MountainCar     |    N/A  |              |        |           |          |      |         |         |
| Pendulum        |     N/A |             |        |           |          |      |         |         |
| Acrobot         |   N/A   |             |        |           |          |      |         |         |
| FrozenLake      |   N/A  |       DONE       |        |           |          |      |         |         |
| CliffWalker     |      |              |        |           |          |      |         |         |
| Bridge          |      |   ****           |        |           |          |      |         |         |

### Without Notification


| Gym Environment | DDQN | Vanilla MCTS | PAMCTS | AlphaZero | ADA-MCTS | RATs | Pro-OLS | Pro-WLS |
|-----------------|------|--------------|--------|-----------|----------|------|---------|---------|
| CartPole - varying mass        |      |   DONE   |        |           |          |      |         |         |
| CartPole - varying gravity      |      |   DONE   |        |           |          |      |         |         |
| MountainCar     |      |              |        |           |          |      |         |         |
| Pendulum        |      |              |        |           |          |      |         |         |
| Acrobot         |      |              |        |           |          |      |         |         |
| FrozenLake      |   DONE   |       DONE       |        |           |          |      |         |         |
| CliffWalker     |      |              |        |           |          |      |         |         |
| Bridge          |      |              |        |           |          |      |         |         |


## Continious changes: At each MDP step the NSMDP is updtaded to a new stationary MDP (RATs assumption)

### With Notification

| Gym Environment | DDQN | Vanilla MCTS | PAMCTS | AlphaZero | ADA-MCTS | RATs | Pro-OLS | Pro-WLS |
|-----------------|------|--------------|--------|-----------|----------|------|---------|---------|
| CartPole        |      |              |        |           |          |      |         |         |
| MountainCar     |      |              |        |           |          |      |         |         |
| Pendulum        |      |              |        |           |          |      |         |         |
| Acrobot         |      |              |        |           |          |      |         |         |
| FrozenLake      | DONE     |  DONE            |        |           |          |      |         |         |
| CliffWalker     |      |              |        |           |          |      |         |         |
| Bridge          |      |              |        |           |          |      |         |         |


### Without Notification


| Gym Environment | DDQN | Vanilla MCTS | PAMCTS | AlphaZero | ADA-MCTS | RATs | Pro-OLS | Pro-WLS |
|-----------------|------|--------------|--------|-----------|----------|------|---------|---------|
| CartPole        |      |              |        |           |          |      |         |         |
| MountainCar     |      |              |        |           |          |      |         |         |
| Pendulum        |      |              |        |           |          |      |         |         |
| Acrobot         |      |              |        |           |          |      |         |         |
| FrozenLake      | DONE     |  DONE            |        |           |          |      |         |         |
| CliffWalker     |      |              |        |           |          |      |         |         |
| Bridge          |      |              |        |           |          |      |         |         |




# Vanilla MCTS

## Frozen Lake

### MCTS single transition probability update **without** notification 

Experiment code: experiments/NS_FrozenLake/single_discrete_transition.py

MCTS on NS_FrozenLake. Initial transition prob is [0.7,0.15,0.15], where the first element of the list is the intended direction and the other two are the perpendicular directions. At MDP step t = 0 the simulator updates the transition prob to [p, (1-p)/2,(1-p)/2]. The MCTS planning agent only has access to MDP_0 (transition probs =[0.7,0.15,0.15]) while the "realworld" envirment continues according to [p, (1-p)/2,(1-p)/2].


### Vanilla MCTS single transiton with Negative rewards **without** notification

This trial should be in working order. I use my own reimplementation of MCTS with chance nodes and modify the rewards so that holes have an imediate reward of -1. Ran on only 3000 iterations inst

In [2]:
exp_path = notebook_dir / 'NS_FrozenLake'/ 'results' / "VanillaMCTS_withoutChangNotif_withNegative_Rewards_2024-05-14_02:44:06.csv"
frozen_lake_single_transition_withoutNotification_withNegativeRewars = pd.read_csv(exp_path)
results = frozen_lake_single_transition_withoutNotification_withNegativeRewars.groupby("p")['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = frozen_lake_single_transition_withoutNotification_withNegativeRewars.groupby("p")['total_time'].agg(['mean']) / 60
results["num MCTS iterations"] = frozen_lake_single_transition_withoutNotification_withNegativeRewars.groupby("p")['num_iter'].agg(['mean'])

In [3]:
results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes,num MCTS iterations
p,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.4,-0.76,0.653197,-76.0,100,0.06532,1.99852,3000.0
0.5,-0.64,0.772246,-64.0,100,0.077225,1.700917,3000.0
0.6,-0.24,0.975663,-24.0,100,0.097566,2.324237,3000.0
0.8,0.34,0.945163,34.0,100,0.094516,2.498202,3000.0
0.9,0.72,0.69747,72.0,100,0.069747,2.549106,3000.0
1.0,1.0,0.0,100.0,100,0.0,1.974305,3000.0


### Vanilla MCTS single transiton with Negative rewards **with** notification

In [4]:
exp_path = notebook_dir / 'NS_FrozenLake'/ 'results' / "VanillaMCTS_withChangNotif_withNegative_Rewards_2024-05-14_12:14:23.csv"
frozen_lake_single_transition_withNotification_withNegativeRewars = pd.read_csv(exp_path)
results = frozen_lake_single_transition_withNotification_withNegativeRewars.groupby("p")['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = frozen_lake_single_transition_withNotification_withNegativeRewars.groupby("p")['total_time'].agg(['mean']) / 60
results["num MCTS iterations"] = frozen_lake_single_transition_withNotification_withNegativeRewars.groupby("p")['num_iter'].agg(['mean'])

In [5]:
results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes,num MCTS iterations
p,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.4,-0.74,0.675995,-74.0,100,0.0676,1.982085,3000.0
0.5,-0.76,0.653197,-76.0,100,0.06532,2.025658,3000.0
0.6,-0.18,0.988622,-18.0,100,0.098862,2.247237,3000.0
0.8,0.44,0.902522,44.0,100,0.090252,2.459791,3000.0
0.9,0.68,0.736906,68.0,100,0.073691,2.472721,3000.0
1.0,1.0,0.0,100.0,100,0.0,1.976788,3000.0


### Vanilla MCTS continuous transitions - negative rewards - without notification

In [6]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "MCTS_ContinuousUpdate_withoutChangNotif_2024-05-21_16:11:11.csv"
frozen_lake_continuous_update_withoutNotification = pd.read_csv(exp_path)
frozen_lake_continuous_update_withoutNotification.columns



Index(['sample_id', 'reward', 'experiment_name', 'gamma', 'c', 'num_iter',
       'total_time', 'seed'],
      dtype='object')

In [7]:
results = frozen_lake_continuous_update_withoutNotification.groupby("num_iter")['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = frozen_lake_continuous_update_withoutNotification.groupby("num_iter")['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25,-0.84,0.54532,-84.0,100,0.054532,0.02129
100,-0.86,0.512865,-86.0,100,0.051286,0.098224
1000,-0.88,0.477367,-88.0,100,0.047737,3.435035
3000,-0.9,0.438086,-90.0,100,0.043809,24.214279


### Vanilla MCTS continuous transitions - positive rewards - with notification

In [8]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "MCTS_ContinuousUpdate_with_ChangNotif_2024-05-17_22:43:11.csv"
MCTS_frozen_lake_continuous_update_with_Notification = pd.read_csv(exp_path)

results = MCTS_frozen_lake_continuous_update_with_Notification.groupby("num_iter")['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = MCTS_frozen_lake_continuous_update_with_Notification.groupby("num_iter")['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25,0.1,0.301511,10.0,100,0.030151,0.028349
50,0.12,0.326599,12.0,100,0.03266,0.045069
100,0.17,0.377525,17.0,100,0.037753,0.07181
500,0.18,0.386123,18.0,100,0.038612,0.285879
1000,0.22,0.416333,22.0,100,0.041633,0.55585
3000,0.27,0.446196,27.0,100,0.04462,1.333811


### DDQN Single discrete change - without notification

In [9]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "DDQN_singe_discrete_change_withoutChangNotif_2024-05-17_22:54:55.csv"

DDQN_frozen_lake_single_update_with_Notification = pd.read_csv(exp_path)

results = DDQN_frozen_lake_single_update_with_Notification.groupby("p")["reward"].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = DDQN_frozen_lake_single_update_with_Notification.groupby("p")['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
p,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.4,0.22,0.416333,22.0,100,0.041633,0.011522
0.5,0.47,0.501614,47.0,100,0.050161,0.005588
0.6,0.66,0.476095,66.0,100,0.04761,0.000817
0.8,0.91,0.287623,91.0,100,0.028762,0.001661
0.9,0.93,0.256432,93.0,100,0.025643,0.002758
1.0,0.0,0.0,0.0,100,0.0,0.028574


### DDQN Continuos discrete change - without notification

In [10]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "DDQN_ContinuousUpdate_withoutChangNotif_2024-05-17_22:55:12.csv"

DDQN_frozen_lake_continuous_update_with_Notification = pd.read_csv(exp_path)
results = DDQN_frozen_lake_continuous_update_with_Notification["reward"].agg(['mean', 'std','sum','count'])


results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = DDQN_frozen_lake_continuous_update_with_Notification["total_time"].agg(['mean']) / 60

results = results.to_frame().T
results

Unnamed: 0,mean,std,sum,count,std error,Mean Episode Time in Minutes
reward,0.27,0.446196,27.0,100.0,0.04462,"mean 0.015483 Name: total_time, dtype: float64"


### AlphaZero Single update with change notification -- these aer sus..



In [11]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "AlphaZero_singe_discrete_change_withoutChangNotif_2024-05-24_01:14:09.csv"

AlphaZero_frozen_lake_single_update_with_Notification = pd.read_csv(exp_path)
AlphaZero_frozen_lake_single_update_with_Notification
results = AlphaZero_frozen_lake_single_update_with_Notification.groupby(["p","num_iter"])["reward"].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])

results["Mean Episode Time in Minutes"] = AlphaZero_frozen_lake_single_update_with_Notification.groupby(["p","num_iter"])['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
p,num_iter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.4,25,0.0,0.0,0.0,100,0.0,6.393576
0.4,100,0.0,0.0,0.0,100,0.0,25.588154
0.4,1000,-0.12,0.383498,-12.0,100,0.03835,237.566912
0.4,3000,-0.92,0.393893,-92.0,100,0.039389,23.771151
0.5,25,0.0,0.0,0.0,100,0.0,6.396075
0.5,100,0.0,0.0,0.0,100,0.0,25.58568
0.5,1000,-0.12,0.326599,-12.0,100,0.03266,238.178359
0.5,3000,-0.64,0.772246,-64.0,100,0.077225,38.120951
0.6,25,0.0,0.0,0.0,100,0.0,6.450283
0.6,100,0.0,0.0,0.0,100,0.0,25.687305


### AlphaZero Continous update with change nofication

In [12]:
exp_path = notebook_dir / "NS_FrozenLake"/ "results" / "alphazero_frozenlake_continous_update_with_change_notif_config_2024-05-29.csv"
AlphaZero_frozen_lake_continous_update_with_Notification = pd.read_csv(exp_path)
results = AlphaZero_frozen_lake_continous_update_with_Notification.groupby(["num_iter"])["reward"].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = AlphaZero_frozen_lake_continous_update_with_Notification.groupby(["num_iter"])['total_time'].agg(['mean']) / 60

results


Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25,0.0,0.0,0.0,100,0.0,0.800562
100,0.0,0.0,0.0,100,0.0,2.925588
1000,-0.97,0.171447,-97.0,100,0.017145,9.348276
3000,-0.99,0.1,-99.0,100,0.01,18.358612


### PAMCTS continous update without change notification 



In [13]:
exp_path = notebook_dir /"NS_FrozenLake"/ "results" / "FrozenLake_PAMCTS_continous_update_without_change_notif_for_real_2024-05-28.csv"
FrozenLake_PAMCTS_continous_update_without_change_notif_for_real = pd.read_csv(exp_path)
# FrozenLake_PAMCTS_continous_update_without_change_notif_for_real
results = FrozenLake_PAMCTS_continous_update_without_change_notif_for_real.groupby(["num_iter","alpha"])["reward"].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = FrozenLake_PAMCTS_continous_update_without_change_notif_for_real.groupby(["num_iter","alpha"])['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
25,0.25,-0.98,0.2,-98.0,100,0.02,0.027678
25,0.5,-0.94,0.342893,-94.0,100,0.034289,0.026253
25,75.0,-0.96,0.281411,-96.0,100,0.028141,0.024763
100,0.25,-1.0,0.0,-100.0,100,0.0,0.102335
100,0.5,-0.98,0.2,-98.0,100,0.02,0.086687
100,75.0,-1.0,0.0,-100.0,100,0.0,0.077567
1000,0.25,-0.98,0.2,-98.0,100,0.02,1.192151
1000,0.5,-0.9,0.438086,-90.0,100,0.043809,0.684687
1000,75.0,-1.0,0.0,-100.0,100,0.0,0.280815
3000,0.25,-0.98,0.2,-98.0,100,0.02,2.078647


## Cartpole

In [14]:
# hyper params
exp_path = "/Users/--/Documents/--/Research/ns_gym/experiments/NS_Cartpole/results/CartpoleVanillaMCTS_withChangNotif_2024-05-21_20:55:29.csv"

cartpole_hyperparams = pd.read_csv(exp_path)

params = cartpole_hyperparams.groupby(["gamma","c","num_iter"])['reward'].agg(['mean', 'std','sum','count'])

max_reward = params["mean"].max()

params

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,sum,count
gamma,c,num_iter,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.1,0.01,100,464.0,132.158995,4640.0,10
0.1,0.1,100,352.6,136.668455,3526.0,10
0.1,0.5,100,500.5,146.513746,5005.0,10
0.1,1.0,100,359.0,150.766191,3590.0,10
0.1,1.414214,100,392.6,133.230127,3926.0,10
0.1,2.0,100,602.5,556.098363,6025.0,10
0.25,0.01,100,450.2,129.959652,4502.0,10
0.25,0.1,100,694.5,476.428903,6945.0,10
0.25,0.5,100,631.8,456.64399,6318.0,10
0.25,1.0,100,547.3,164.643757,5473.0,10


In [15]:

max_reward_params = params[params["mean"] == max_reward]

max_reward_params



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,sum,count
gamma,c,num_iter,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,1.414214,100,750.2,349.191065,7502.0,10


### Single transition **without** notification.

The after the first MDP decsion epoch the pole mass changes from 0.1 kg to a mass from [0.1, 1.0, 1.2, 1.3, 1.5]. The MCTS agent has access to a the MDP where pole mass == 0.1 but in the true env. the pole mass has changed. Results were generated using this file: "experiments/NS_Cartpole/NSCartpole_MCTS_single_discrete_transition_without_change_notification.py"



In [16]:
exp_path = notebook_dir / 'NS_Cartpole' / 'results' / 'CartpoleVanillaMCTS_withoutChangNotif_2024-05-21_21:50:47.csv'
cartpole_single_transition_withoutNotification = pd.read_csv(exp_path)
results = cartpole_single_transition_withoutNotification.groupby(["polemass","num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = cartpole_single_transition_withoutNotification.groupby(["polemass","num_iter"])['total_episode_time'].agg(['mean']) / 60


In [17]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
polemass,num_iter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,50,708.08,522.140838,35404.0,50,73.841865,1.47242
0.1,100,592.36,264.702818,29618.0,50,37.434632,2.44228
0.1,300,660.86,352.724961,33043.0,50,49.882842,8.151678
1.0,50,667.9,393.794531,33395.0,50,55.690957,1.386213
1.0,100,718.64,480.583649,35932.0,50,67.964791,2.913864
1.0,300,600.9,337.124046,30045.0,50,47.67654,7.446955
1.2,50,717.7,447.498136,35885.0,50,63.285793,1.420852
1.2,100,629.4,337.100845,31470.0,50,47.673259,2.625504
1.2,300,595.64,374.513876,29782.0,50,52.96426,7.026169
1.3,50,552.68,271.529713,27634.0,50,38.4001,1.124383


### Single transition with notification

After the first decision epoch the pole mass is updated. The MCTS agent then hass access to the most up to date MDP. 

In [18]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartpoleVanillaMCTS_withChangNotif_2024-05-21_21:34:20.csv"
cartpole_single_transition_with_notification = pd.read_csv(exp_path)
results = cartpole_single_transition_with_notification.groupby(["polemass","num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = cartpole_single_transition_with_notification.groupby(["polemass","num_iter"])['total_episode_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
polemass,num_iter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,50,745.06,430.080268,37253.0,50,60.822535,1.531159
0.1,100,608.96,410.88148,30448.0,50,58.107416,2.466585
0.1,300,611.78,239.51254,30589.0,50,33.872188,6.460949


### MCTS Single transition with notification - changing gravity among [9.8, 20,50,500 ]

In [19]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartpoleVanillaMCTS_Changing_Gravity_withoutChangNotif_2024-05-16_22:43:15.csv"
cartpole_single_transition_with_notification = pd.read_csv(exp_path)
results = cartpole_single_transition_with_notification.groupby(["gravity","num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = cartpole_single_transition_with_notification.groupby(["gravity","num_iter"])['total_episode_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
gravity,num_iter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9.8,25,725.38,368.907924,36269.0,50,52.171459,0.694684
9.8,50,980.98,517.912667,49049.0,50,73.243912,1.939144
9.8,75,1096.56,734.1029,54828.0,50,103.817828,3.283458
9.8,100,1380.5,1107.26409,69025.0,50,156.590789,5.542224
9.8,200,1300.5,907.372125,65025.0,50,128.321797,10.031157
9.8,300,2248.9,1561.463537,112445.0,50,220.824291,18.94741
20.0,25,427.1,260.631826,21355.0,50,36.858906,0.419422
20.0,50,469.06,328.299229,23453.0,50,46.428522,0.932704
20.0,75,517.84,361.52028,25892.0,50,51.126688,1.556276
20.0,100,573.82,396.921762,28691.0,50,56.133214,2.312814


### MCTS Continuos transition with notification - increasing pole mass by 0.1 each time step. 

In [20]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_MCTS_IncreasingMass_trail_1_2024-05-27.csv"
cartpole_continuous_update_with_notification = pd.read_csv(exp_path)

results = cartpole_continuous_update_with_notification.groupby(["num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])

results["Mean Episode Time in Minutes"] = cartpole_continuous_update_with_notification.groupby(["num_iter"])['total_time'].agg(['mean']) / 60
results


Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
50,757.7,466.24533,113655.0,150,38.068772,2.065447
100,693.926667,488.168046,104089.0,150,39.858754,3.893496
300,656.5,438.856931,98475.0,150,35.832518,9.777991


### MCTS Continous transition without notification - increasing pole mass by 0.1 each time step

In [21]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_MCTS_IncreasingMass_trial_1_without_notif_2024-05-28.csv"
cartpole_continuous_update_without_notification = pd.read_csv(exp_path)
results = cartpole_continuous_update_without_notification.groupby(["num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])

results["Mean Episode Time in Minutes"] = cartpole_continuous_update_without_notification.groupby(["num_iter"])['total_time'].agg(['mean']) / 60

results

Unnamed: 0_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
50,147.946667,42.515498,22192.0,150,3.471376,0.308773
100,150.02,35.025297,22503.0,150,2.859804,0.631876
300,149.033333,36.349241,22355.0,150,2.967903,1.78467


### DDQN Continous transition without notification - increasing pole mass by 0.1 each time step

In [22]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_DDQN_IncreasingMass_trial_1_2024-05-28.csv"
DDQN_cartpole_continuous_update_without_notification = pd.read_csv(exp_path)
results = DDQN_cartpole_continuous_update_without_notification["reward"].agg(['mean', 'std','sum','count'])

results["std error"] = results["std"] / np.sqrt(results["count"])

results["Mean Episode Time in Minutes"] = DDQN_cartpole_continuous_update_without_notification["total_time"].mean() / 60    

results.to_frame().T

Unnamed: 0,mean,std,sum,count,std error,Mean Episode Time in Minutes
reward,101.48,33.449094,15222.0,150.0,2.731107,0.017351


### PAMCTS continous transition without notification -- increasing pole mase by 0.1 each time step 

In [33]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_PAMCTS_IncreasingMass_trial_1_without_change_notif_2024-05-28.csv"
PAMCTS_cartpole_continuous_update_without_notification = pd.read_csv(exp_path)
results = PAMCTS_cartpole_continuous_update_without_notification.groupby(["num_iter","alpha"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = PAMCTS_cartpole_continuous_update_without_notification.groupby(["num_iter","alpha"])['total_time'].agg(['mean']) / 60

results

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
50,0.25,101.213333,32.548897,15182.0,150,2.657606,0.20963
50,0.5,101.7,31.482551,15255.0,150,2.57054,0.210568
50,75.0,98.026667,33.285413,14704.0,150,2.717743,0.199809
100,0.25,100.32,33.612741,15048.0,150,2.744469,0.408065
100,0.5,99.8,31.413672,14970.0,150,2.564916,0.406928
100,75.0,101.933333,31.541528,15290.0,150,2.575355,0.423163
300,0.25,98.46,33.17548,14769.0,150,2.708767,1.23585
300,0.5,101.126667,33.110816,15169.0,150,2.703487,1.243384
300,75.0,100.36,31.670341,15054.0,150,2.585872,0.93721


### PAMCTS continous transition with notification -- increasing pole mase by 0.1 each time step 

In [35]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_PAMCTS_IncreasingMass_trial_1_with_change_notif_2024-05-28.csv"
PAMCTS_cartpole_continuous_update_without_notification = pd.read_csv(exp_path)
results = PAMCTS_cartpole_continuous_update_without_notification.groupby(["num_iter","alpha"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = PAMCTS_cartpole_continuous_update_without_notification.groupby(["num_iter","alpha"])['total_time'].agg(['mean']) / 60

results


Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
num_iter,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
50,0.25,99.966667,32.549274,14995.0,150,2.657637,0.196147
50,0.5,104.526667,29.899765,15679.0,150,2.441306,0.206402
50,75.0,96.273333,33.880345,14441.0,150,2.766319,0.184935
100,0.25,101.873333,32.155724,15281.0,150,2.625504,0.393646
100,0.5,95.553333,32.399465,14333.0,150,2.645405,0.363645
100,75.0,97.746667,33.255203,14662.0,150,2.715276,0.383951
300,0.25,101.066667,31.948504,15160.0,150,2.608584,1.204882
300,0.5,99.593333,32.60051,14939.0,150,2.66182,1.155824
300,75.0,99.533333,32.046876,14930.0,150,2.616616,0.849455


### DDQN single transition without notif. 

In [25]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_DDQN_single_transition_increasingMass_trial_1_2024-05-30.csv"
DDQN_cartpole_single_transition_without_notification = pd.read_csv(exp_path)
results = DDQN_cartpole_single_transition_without_notification.groupby(["mass"])['reward'].agg(['mean', 'std','sum','count'])
results['std error'] = results['std'] / np.sqrt(results['count'])
results


Unnamed: 0_level_0,mean,std,sum,count,std error
mass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.1,136.606667,3.856306,20491.0,150,0.314866
1.0,135.533333,3.414668,20330.0,150,0.278806
1.2,135.853333,3.328516,20378.0,150,0.271772
1.3,136.026667,3.172759,20404.0,150,0.259055
1.5,135.193333,3.272207,20279.0,150,0.267175


### PAMCTS single transition without notifc

In [30]:
exp_path = notebook_dir / "NS_Cartpole" / "results" / "CartPole_PAMCTS_IncreasingMass_trial_without_change_notif_2024-05-30.csv"
PAMCTS_cartpole_single_transition_without_notification = pd.read_csv(exp_path)
print(PAMCTS_cartpole_single_transition_without_notification.columns)
results = PAMCTS_cartpole_single_transition_without_notification.groupby(["mass","alpha","num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results['std error'] = results['std'] / np.sqrt(results['count'])
results

Index(['mass', 'sample_id', 'reward', 'experiment_name', 'num_iter',
       'total_time', 'seed', 'c', 'gamma', 'alpha'],
      dtype='object')


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,sum,count,std error
mass,alpha,num_iter,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,0.25,50,136.146667,3.60534,20422.0,150,0.294375
0.1,0.25,100,136.153333,3.87166,20423.0,150,0.31612
0.1,0.25,300,136.36,3.722379,20454.0,150,0.303931
0.1,0.5,50,136.706667,3.627906,20506.0,150,0.296217
0.1,0.5,100,136.22,3.654307,20433.0,150,0.298373
0.1,0.5,300,136.553333,3.63897,20483.0,150,0.297121
0.1,75.0,50,136.073333,3.789949,20411.0,150,0.309448
0.1,75.0,100,136.406667,3.57864,20461.0,150,0.292195
0.1,75.0,300,136.52,3.282494,20478.0,150,0.268014
1.0,0.25,50,135.953333,3.336471,20393.0,150,0.272422


# Mountain Car

## Hyper param search



In [27]:
exp_path = notebook_dir / "NS_Mountain_Car"/ "results" / "MCTS_MountainCar_hyperparameter_search_2024-05-23.csv"

mountain_car_hyperparams = pd.read_csv(exp_path)
mountain_car_hyperparams


Unnamed: 0,sample_id,reward,experiment_name,num_iter,total_time,seed,c,gamma
0,70,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,126.618293,42509,0.8,0.80
1,37,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,126.916794,48713,0.8,0.50
2,47,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,126.931805,56092,0.5,0.99
3,9,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,127.028807,58505,0.5,0.99
4,32,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,127.081158,32573,0.8,0.80
...,...,...,...,...,...,...,...,...
2395,22,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,107.468034,68046,5.0,0.99
2396,93,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,112.441218,63411,5.0,1.00
2397,55,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,109.112278,12933,5.0,1.00
2398,41,-200.0,MCTS_MountainCar_hyperparameter_search_2024-05-23,100,110.174428,79533,5.0,0.99


In [28]:
results = mountain_car_hyperparams.groupby(["gamma","c","num_iter"])['reward'].agg(['mean', 'std','sum','count'])
results["std error"] = results["std"] / np.sqrt(results["count"])
results["Mean Episode Time in Minutes"] = mountain_car_hyperparams.groupby(["gamma","c","num_iter"])['total_time'].agg(['mean']) / 60
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,sum,count,std error,Mean Episode Time in Minutes
gamma,c,num_iter,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.5,0.5,100,-200.0,0.0,-20000.0,100,0.0,2.269702
0.5,0.8,100,-200.0,0.0,-20000.0,100,0.0,2.254777
0.5,1.0,100,-200.0,0.0,-20000.0,100,0.0,2.293076
0.5,1.414214,100,-200.0,0.0,-20000.0,100,0.0,2.293202
0.5,2.0,100,-200.0,0.0,-20000.0,100,0.0,2.293294
0.5,5.0,100,-200.0,0.0,-20000.0,100,0.0,2.224888
0.8,0.5,100,-200.0,0.0,-20000.0,100,0.0,2.271982
0.8,0.8,100,-200.0,0.0,-20000.0,100,0.0,2.252528
0.8,1.0,100,-200.0,0.0,-20000.0,100,0.0,2.300976
0.8,1.414214,100,-200.0,0.0,-20000.0,100,0.0,2.292331
