In [2]:
# activate R magic

%load_ext rpy2.ipython

In [10]:
import pandas as pd
datatable = pd.read_csv("./results/data_10m.csv")


In [16]:
print(f"Total Time(s): {datatable['total_time'].sum()}")
datatable[datatable["env"] == "BeamRider"]

Total Time(s): 4152156.933662653


Unnamed: 0.1,Unnamed: 0,env,algo,lr,beta,epsilon,avg_return,total_time,asymptotic_return
0,0,BeamRider,sgd,0.094111,0.598665,0.000866,1207.733598,22355.746607,1532.60
4,4,BeamRider,adam,0.000023,0.598665,0.000866,464.627258,6517.251286,492.72
8,8,BeamRider,tdprop,0.000023,0.598665,0.000866,464.670897,10202.082766,474.96
12,12,BeamRider,sgd,0.034937,0.152042,0.000003,899.728917,6194.165495,1208.52
16,16,BeamRider,adam,0.000017,0.152042,0.000003,445.497342,6312.489528,432.56
...,...,...,...,...,...,...,...,...,...
580,580,BeamRider,adam,0.000018,0.795901,0.000275,545.715374,8864.705158,621.68
584,584,BeamRider,tdprop,0.000018,0.795901,0.000275,546.185736,8185.452545,579.24
588,588,BeamRider,sgd,0.023404,0.244338,0.002878,838.942029,6177.847796,1200.32
592,592,BeamRider,adam,0.001483,0.244338,0.002878,1057.248774,5031.267909,1407.92


In [4]:
# !pip install bootstrapped

import bootstrapped.compare_functions as bs_compare
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats
def bootstrap_test(data1, data2, alpha=0.05):

    data1 = data1.squeeze()
    data2 = data2.squeeze()
    n1 = data1.size
    n2 = data2.size

    res = bs.bootstrap_ab(data1, data2, bs_stats.mean, bs_compare.difference, alpha=alpha, num_iterations=5000)
    rejection = np.sign(res.upper_bound) == np.sign(res.lower_bound)
    return rejection, res


In [5]:
import numpy as np
# return_type = "avg_return"
return_type = "asymptotic_return"

for game in ["BeamRider", "Breakout", "SpaceInvaders", "Qbert"]:
  beam_rider = datatable[datatable["env"] == game]

  sgd = beam_rider[beam_rider["algo"] == "sgd"]
  adam = beam_rider[beam_rider["algo"] == "adam"]
  tdprop = beam_rider[beam_rider["algo"] == "tdprop"]

  bootstrap_val_sgd = (bs.bootstrap(sgd[return_type].to_numpy(), stat_func=bs_stats.mean))
  bootstrap_val_adam = (bs.bootstrap(adam[return_type].to_numpy(), stat_func=bs_stats.mean))
  bootstrap_val_tdprop = (bs.bootstrap(tdprop[return_type].to_numpy(), stat_func=bs_stats.mean))
  row_string = f"{game}&"
  row_string += f"{round(bootstrap_val_sgd.value, 1)} ({round(bootstrap_val_sgd.lower_bound, 1)}, {round(bootstrap_val_sgd.upper_bound, 1)}) & "
  row_string += f"{round(bootstrap_val_adam.value, 1)} ({round(bootstrap_val_adam.lower_bound, 1)}, {round(bootstrap_val_adam.upper_bound, 1)}) & "  
  row_string += f"{round(bootstrap_val_tdprop.value, 1)} ({round(bootstrap_val_tdprop.lower_bound, 1)}, {round(bootstrap_val_tdprop.upper_bound, 1)})"  
  print(row_string + "\\\\")

print("::::::top::::::")
for game in ["BeamRider", "Breakout", "SpaceInvaders", "Qbert"]:
  beam_rider = datatable[datatable["env"] == game]

  sgd = beam_rider[beam_rider["algo"] == "sgd"]
  adam = beam_rider[beam_rider["algo"] == "adam"]
  tdprop = beam_rider[beam_rider["algo"] == "tdprop"]
  top_returns_adam = adam[adam[return_type] > np.percentile(adam[return_type].to_numpy(), 75)]
  top_returns_sgd = sgd[sgd[return_type] > np.percentile(sgd[return_type].to_numpy(), 75)]
  top_returns_tdprop = tdprop[tdprop[return_type] > np.percentile(tdprop[return_type].to_numpy(), 75)]
  bootstrap_val_sgd = (bs.bootstrap(top_returns_sgd[return_type].to_numpy(), stat_func=bs_stats.mean))
  bootstrap_val_adam = (bs.bootstrap(top_returns_adam[return_type].to_numpy(), stat_func=bs_stats.mean))
  bootstrap_val_tdprop = (bs.bootstrap(top_returns_tdprop[return_type].to_numpy(), stat_func=bs_stats.mean))
  row_string = f"{game}&"
  row_string += f"{round(bootstrap_val_sgd.value, 1)} ({round(bootstrap_val_sgd.lower_bound, 1)}, {round(bootstrap_val_sgd.upper_bound, 1)}) & "
  row_string += f"{round(bootstrap_val_adam.value, 1)} ({round(bootstrap_val_adam.lower_bound, 1)}, {round(bootstrap_val_adam.upper_bound, 1)}) & "  
  row_string += f"{round(bootstrap_val_tdprop.value, 1)} ({round(bootstrap_val_tdprop.lower_bound, 1)}, {round(bootstrap_val_tdprop.upper_bound, 1)}) "  
  print(row_string + "\\\\")

for game in ["BeamRider", "Breakout", "SpaceInvaders", "Qbert"]:
  print(game)
  print("--------------------------------------------")
  beam_rider = datatable[datatable["env"] == game]

  sgd = beam_rider[beam_rider["algo"] == "sgd"]
  adam = beam_rider[beam_rider["algo"] == "adam"]
  tdprop = beam_rider[beam_rider["algo"] == "tdprop"]

  top_returns_adam = adam[adam[return_type] > np.percentile(adam[return_type].to_numpy(), 75)][return_type].to_numpy()
  top_returns_sgd = sgd[sgd[return_type] > np.percentile(sgd[return_type].to_numpy(), 75)][return_type].to_numpy()
  top_returns_tdprop = tdprop[tdprop[return_type] > np.percentile(tdprop[return_type].to_numpy(), 75)][return_type].to_numpy()

  print(f"On Average TDProp Better than adam:  {tdprop[return_type].mean()} v. {adam[return_type].mean()}")
  print(bootstrap_test(adam[return_type].to_numpy(), tdprop[return_type].to_numpy(), alpha=0.05))
  print(f"On Average SGD Better than adam:  {sgd[return_type].mean()} v. {adam[return_type].mean()}")
  print(bootstrap_test(adam[return_type].to_numpy(), sgd[return_type].to_numpy(), alpha=0.05))
  print(f"On Average tdprop Better than sgd:  {tdprop[return_type].mean()} v. {sgd[return_type].mean()}")
  print(bootstrap_test(sgd[return_type].to_numpy(), tdprop[return_type].to_numpy(), alpha=0.05))

  print(f"Top 25% TDProp Better than adam:  {top_returns_tdprop.mean()} v. {top_returns_adam.mean()}")
  print(bootstrap_test(top_returns_adam, top_returns_tdprop, alpha=0.05))
  print(f"Top 25% sgd Better than adam:  {top_returns_sgd.mean()} v. {top_returns_adam.mean()}")
  print(bootstrap_test(top_returns_adam, top_returns_sgd, alpha=0.05))
  print(f"Top 25% tdprop Better than sgd:  {top_returns_tdprop.mean()} v. {top_returns_sgd.mean()}")
  print(bootstrap_test(top_returns_tdprop, top_returns_sgd, alpha=0.05))



BeamRider&963.7 (837.9, 1087.1) & 765.6 (643.1, 881.3) & 1091.5 (949.5, 1229.7)\\
Breakout&39.4 (23.1, 53.9) & 40.3 (23.6, 54.6) & 39.9 (24.8, 53.1)\\
SpaceInvaders&394.1 (363.2, 424.2) & 336.6 (299.4, 372.4) & 420.8 (386.8, 454.3)\\
Qbert&1144.4 (807.7, 1451.0) & 930.4 (619.8, 1187.6) & 792.9 (557.1, 1002.8)\\
::::::top::::::
BeamRider&1559.9 (1502.3, 1613.2) & 1402.9 (1316.9, 1487.8) & 1753.7 (1618.7, 1862.5) \\
Breakout&123.8 (98.8, 148.3) & 119.0 (87.9, 143.2) & 112.2 (83.6, 138.2) \\
SpaceInvaders&543.8 (527.1, 560.9) & 530.2 (487.8, 565.1) & 586.2 (562.7, 610.4) \\
Qbert&2968.4 (2674.4, 3285.0) & 2284.4 (1578.0, 2904.2) & 1869.4 (1372.9, 2333.4) \\
BeamRider
--------------------------------------------
On Average TDProp Better than adam:  1091.5004410256408 v. 765.5512000000001
(True, -325.94924102564073    (-513.4563120512814, -147.6890110256404))
On Average SGD Better than adam:  963.6915999999999 v. 765.5512000000001
(True, -198.14039999999977    (-365.5282599999992, -20.81044

In [8]:
%%R
# install.packages("dplyr", dependencies=TRUE, repo = "https://cloud.r-project.org")

library(dplyr)

datatable = read.csv(file = "./results/data_10m.csv")
datatable$algo <- as.factor (datatable$algo) 
datatable$treatment<- as.numeric(datatable$algo) 
data <- datatable %>% filter(env == "BeamRider") %>% filter(algo == "tdprop")
print(head(data))

fit <- lm(avg_return ~ lr + beta + epsilon, data=data)
print(summary(fit)) # show results

data <- datatable %>% filter(env == "Breakout") %>% filter(algo == "tdprop")
print(head(data))

fit4 <- lm(avg_return ~ lr + beta + epsilon, data=data)
print(summary(fit4)) # show results

data <- datatable %>% filter(env == "SpaceInvaders") %>% filter(algo == "tdprop")
print(head(data))

fit2 <- lm(avg_return ~ lr + beta + epsilon, data=data)
print(summary(fit2)) # show results
data <- datatable %>% filter(env == "Qbert") %>% filter(algo == "tdprop")
print(head(data))
# + scale(lr*epsilon) + scale(lr*beta) + scale(epsilon*beta)
fit3 <- lm(avg_return ~ lr + beta + epsilon, data=data)
print(summary(fit3)) # show results

# install.packages("stargazer")
library(stargazer)
stargazer(fit, fit4, fit2, fit3, title="Results", align=TRUE)

   X       env   algo           lr      beta      epsilon avg_return total_time
1  8 BeamRider tdprop 2.323623e-05 0.5986653 8.657892e-04   464.6709  10202.083
2 20 BeamRider tdprop 1.741722e-05 0.1520423 2.895583e-06   712.5343   7373.531
3 32 BeamRider tdprop 4.724685e-03 0.7925278 6.051871e-04  1333.3258   7399.517
4 44 BeamRider tdprop 2.953280e-04 0.2898943 1.101302e-05  1182.5597   7082.545
5 56 BeamRider tdprop 7.698553e-03 0.2614602 4.533994e-07   516.0948   7919.662
6 68 BeamRider tdprop 3.200471e-04 0.9786490 7.102862e-06  1249.0812   6707.074
  asymptotic_return treatment
1            474.96         3
2            954.48         3
3           1627.76         3
4           1633.14         3
5            105.64         3
6           1500.28         3

Call:
lm(formula = avg_return ~ lr + beta + epsilon, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-696.33 -254.17  -84.62  280.85  732.67 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(

In [7]:
%%R
# install.packages("dplyr", dependencies=TRUE, repo = "https://cloud.r-project.org")

library(dplyr)

datatable = read.csv(file = "./results/data_10m.csv")
datatable$algo <- as.factor (datatable$algo) 
datatable$treatment<- as.numeric(datatable$algo) 
data <- datatable %>% filter(env == "BeamRider") %>% filter(algo == "adam")
print(head(data))

fit <- lm((avg_return) ~ (lr) + (beta) + (epsilon) , data=data)
print(summary(fit)) # show results

data <- datatable %>% filter(env == "Breakout") %>% filter(algo == "adam")
print(head(data))

fit4 <- lm((avg_return) ~ (lr) + (beta) + (epsilon) , data=data)
print(summary(fit4)) # show results

data <- datatable %>% filter(env == "SpaceInvaders") %>% filter(algo == "adam")
print(head(data))

fit2 <- lm((avg_return) ~ (lr) + (beta) + (epsilon) , data=data)
print(summary(fit2)) # show results
data <- datatable %>% filter(env == "Qbert") %>% filter(algo == "adam")
print(head(data))

fit3 <- lm((avg_return) ~ (lr) + (beta) + (epsilon) , data=data)
print(summary(fit3)) # show results

# install.packages("stargazer")
library(stargazer)
stargazer(fit, fit4, fit2, fit3, title="Results", align=TRUE)

   X       env algo           lr      beta      epsilon avg_return total_time
1  4 BeamRider adam 2.323623e-05 0.5986653 8.657892e-04   464.6273   6517.251
2 16 BeamRider adam 1.741722e-05 0.1520423 2.895583e-06   445.4973   6312.490
3 28 BeamRider adam 4.724685e-03 0.7925278 6.051871e-04   511.1256   6948.144
4 40 BeamRider adam 2.953280e-04 0.2898943 1.101302e-05   351.3268   4809.971
5 52 BeamRider adam 7.698553e-03 0.2614602 4.533994e-07   455.1337   5563.863
6 64 BeamRider adam 3.200471e-04 0.9786490 7.102862e-06  1306.0068   6006.449
  asymptotic_return treatment
1            492.72         1
2            432.56         1
3            533.64         1
4            381.28         1
5            472.28         1
6           1556.20         1

Call:
lm(formula = (avg_return) ~ (lr) + (beta) + (epsilon), data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-385.57 -208.14  -79.58  195.94  495.89 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Inter

In [None]:
import pandas as pd
import numpy as np
datatable = pd.read_csv("~/Desktop/data_10m.csv")
data = df = datatable
y = return_type= "avg_return"
all_top_returns = []
for game in ["BeamRider", "Breakout", "SpaceInvaders", "Qbert"]:
  beam_rider = datatable[datatable["env"] == game]

  sgd = beam_rider[beam_rider["algo"] == "sgd"]
  adam = beam_rider[beam_rider["algo"] == "adam"]
  tdprop = beam_rider[beam_rider["algo"] == "tdprop"]
  top_returns_adam = adam[adam[return_type] > np.percentile(adam[return_type].to_numpy(), 75)]
  top_returns_sgd = sgd[sgd[return_type] > np.percentile(sgd[return_type].to_numpy(), 75)]
  top_returns_tdprop = tdprop[tdprop[return_type] > np.percentile(tdprop[return_type].to_numpy(), 75)]
  all_top_returns.extend([top_returns_adam, top_returns_sgd, top_returns_tdprop])

df = data = datable = pd.concat(all_top_returns)
# print()
new_y = "Normalized Average Return"
df[new_y] = df.apply(lambda x: x[y]/df[df["env"] == x["env"]][y].max(), axis=1)
# print(test)
y = new_y
import seaborn as sns
from statannot import add_stat_annotation
import matplotlib.pyplot as plt
sns.set(font_scale=1.3)
x = "env"
hue = "algo"
box_pairs = []
pairings = [("sgd","adam"), ("adam", "tdprop"), ("sgd", "tdprop")]
for game in ["Breakout", "BeamRider", "SpaceInvaders", "Qbert"]:
    box_pairs.extend([((game,x[0]),(game, x[1])) for x in pairings])
print(box_pairs)
ax = sns.violinplot(data=df, x=x, y=y, hue=hue,                    hue_order=["adam","tdprop","sgd"])
add_stat_annotation(ax, data=df, x=x, y=y, hue=hue,
                    box_pairs=box_pairs,
                    hue_order=["adam","tdprop","sgd"],
                    comparisons_correction=None,
                    test='t-test_welch', loc='outside', verbose=2)
#                    line_offset_to_box=0.05, line_offset=0.05, line_height=0.05, text_offset=0.05)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
plt.savefig('./plots/top_25_avg_plot.pdf', dpi=400, bbox_inches='tight')

In [None]:
import pandas as pd
import numpy as np
datatable = pd.read_csv("./results/data_10m.csv")
data = df = datatable
y = return_type= "asymptotic_return"
all_top_returns = []
for game in ["BeamRider", "Breakout", "SpaceInvaders", "Qbert"]:
  beam_rider = datatable[datatable["env"] == game]

  sgd = beam_rider[beam_rider["algo"] == "sgd"]
  adam = beam_rider[beam_rider["algo"] == "adam"]
  tdprop = beam_rider[beam_rider["algo"] == "tdprop"]
  top_returns_adam = adam[adam[return_type] > np.percentile(adam[return_type].to_numpy(), 75)]
  top_returns_sgd = sgd[sgd[return_type] > np.percentile(sgd[return_type].to_numpy(), 75)]
  top_returns_tdprop = tdprop[tdprop[return_type] > np.percentile(tdprop[return_type].to_numpy(), 75)]
  all_top_returns.extend([top_returns_adam, top_returns_sgd, top_returns_tdprop])

df = data = datable = pd.concat(all_top_returns)
# print()
new_y = "Normalized Asymptotic Return"
df[new_y] = df.apply(lambda x: x[y]/df[df["env"] == x["env"]][y].max(), axis=1)
# print(test)
y = new_y
import seaborn as sns
from statannot import add_stat_annotation
import matplotlib.pyplot as plt
sns.set(font_scale=1.3)
x = "env"
hue = "algo"
box_pairs = []
pairings = [("sgd","adam"), ("adam", "tdprop"), ("sgd", "tdprop")]
for game in ["Breakout", "BeamRider", "SpaceInvaders", "Qbert"]:
    box_pairs.extend([((game,x[0]),(game, x[1])) for x in pairings])
print(box_pairs)
ax = sns.violinplot(data=df, x=x, y=y, hue=hue,                    hue_order=["adam","tdprop","sgd"])
add_stat_annotation(ax, data=df, x=x, y=y, hue=hue,
                    box_pairs=box_pairs,
                    hue_order=["adam","tdprop","sgd"],
                    comparisons_correction=None,
                    test='t-test_welch', loc='outside', verbose=2)
#                    line_offset_to_box=0.05, line_offset=0.05, line_height=0.05, text_offset=0.05)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
plt.savefig('./plots/top_25_asymptotic_plot.pdf', dpi=400, bbox_inches='tight')

In [None]:
import pandas as pd
import numpy as np
datatable = pd.read_csv("./results/data_10m.csv")
data = df = datatable
y = return_type= "avg_return"
# all_top_returns = []


# df = data = datable = pd.concat(all_top_returns)
# print()
new_y = "Normalized Average Return"
df[new_y] = df.apply(lambda x: x[y]/df[df["env"] == x["env"]][y].max(), axis=1)
# print(test)
y = new_y
import seaborn as sns
from statannot import add_stat_annotation
import matplotlib.pyplot as plt
sns.set(font_scale=1.3)
x = "env"
hue = "algo"
box_pairs = []
pairings = [("sgd","adam"), ("adam", "tdprop"), ("sgd", "tdprop")]
for game in ["Breakout", "BeamRider", "SpaceInvaders", "Qbert"]:
    box_pairs.extend([((game,x[0]),(game, x[1])) for x in pairings])
print(box_pairs)
ax = sns.violinplot(data=df, x=x, y=y, hue=hue,                    hue_order=["adam","tdprop","sgd"])
add_stat_annotation(ax, data=df, x=x, y=y, hue=hue,
                    box_pairs=box_pairs,
                    hue_order=["adam","tdprop","sgd"],
                    comparisons_correction=None,
                    test='t-test_welch', loc='outside', verbose=2)
#                    line_offset_to_box=0.05, line_offset=0.05, line_height=0.05, text_offset=0.05)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
plt.savefig('./plots/all_ave_plot.pdf', dpi=400, bbox_inches='tight')

In [None]:
import pandas as pd
import numpy as np
datatable = pd.read_csv("./data/data_10m.csv")
data = df = datatable
y = return_type= "asymptotic_return"
# all_top_returns = []


# df = data = datable = pd.concat(all_top_returns)
# print()
new_y = "Normalized Asymptotic Return"
df[new_y] = df.apply(lambda x: x[y]/df[df["env"] == x["env"]][y].max(), axis=1)
# print(test)
y = new_y
import seaborn as sns
from statannot import add_stat_annotation
import matplotlib.pyplot as plt
sns.set(font_scale=1.3)
x = "env"
hue = "algo"
box_pairs = []
pairings = [("sgd","adam"), ("adam", "tdprop"), ("sgd", "tdprop")]
for game in ["Breakout", "BeamRider", "SpaceInvaders", "Qbert"]:
    box_pairs.extend([((game,x[0]),(game, x[1])) for x in pairings])
print(box_pairs)
ax = sns.violinplot(data=df, x=x, y=y, hue=hue,                    hue_order=["adam","tdprop","sgd"])
add_stat_annotation(ax, data=df, x=x, y=y, hue=hue,
                    box_pairs=box_pairs,
                    hue_order=["adam","tdprop","sgd"],
                    comparisons_correction=None,
                    test='t-test_welch', loc='outside', verbose=2)
#                    line_offset_to_box=0.05, line_offset=0.05, line_height=0.05, text_offset=0.05)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
plt.savefig('./plots/all_asymptotic_plot.pdf', dpi=400, bbox_inches='tight')