In [50]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pickle
from glob import glob
import os
import subprocess

In [158]:
def get_all_file_paths(root_dir):
    to_return = []
    current_level_dfs = glob(f"{root_dir}/*Df.csv")
    if len(current_level_dfs) > 0:
        to_return += [df_path for df_path in current_level_dfs]
    else:
        for subdir in os.listdir(root_dir):
            full_dir = f"{root_dir}/{subdir}" 
            if os.path.isdir(full_dir):
                to_return += get_all_file_paths(full_dir)
    return to_return

In [11]:
os.chdir("../simulation_scripts")
os.system("./RunEffectSizeSimsSameArms_fixedbs.sh")
os.chdir("../simulation_analysis_scripts")

In [27]:
save_dirs = !ls ../simulation_saves
save_dirs = ['../simulation_saves/' + d for d in save_dirs]

In [40]:
!ls ../simulation_analysis_scripts

ChiSquareCutoff.ipynb
README.txt
TS_BS
__pycache__
checkcsv.py
checkpickle.py
checkpickle_EFFECT.py
checkpickle_EFFECT_Reward.py
checkpickle_IsExplore.py
checkpickle_arghavan.py
checkpickle_bytrial.py
checkpickle_simbased_bsprop.py
checkpickle_simbased_bsprop_armprob.py
checkpickle_simbased_bsprop_armprob_EFFECT.py
checkpickle_simbased_bsprop_armprob_EFFECT_Reward.py
checkpickle_simbased_bsprop_armprob_by_IsExplore.py
checkpickle_simbased_bsprop_armprob_bytrial.py
get_assistments_rewards.py
hist_functions.py
read_config.py
rectify_vars_and_wald_functions.py
scatter_plot_functions.py
table_functions.py


In [62]:
num_sims = 500
arm_prob = 0.5
means = {}
cutoffs = {}
for save_dir in save_dirs:
    for n in (32, 88, 785):
        file_path = glob(save_dir + f"/num_sims={num_sims}armProb={arm_prob}/n={n}/*Df.csv")[0]
        df_sims = pd.read_csv(file_path)
        plt.hist(df_sims['stat'])
        plt.title(f"Chi-Square Statistic: n={n}")
        plt.xlabel("Chi-Square Statistic")
        plt.ylabel("# Sims")
        plt.savefig(f'../simulation_analysis_saves/chi_square_histogram_{n}.png')
        plt.clf()
        cutoff = df_sims['stat'].sort_values().reset_index()['stat'][475]
        print(f"cutoff: {cutoff}")
        cutoffs[f'{n}'] = cutoff
        print(f"chi square mean: {df_sims['stat'].mean()}")
        means[f'{n}'] = df_sims['stat'].mean()

cutoff: 3.344465058750773
chi square mean: 1.0413016455021298
cutoff: 4.194049237909725
chi square mean: 1.2740821600166727
cutoff: 6.139242472615505
chi square mean: 1.6842397960101994


<Figure size 432x288 with 0 Axes>

In [161]:
# delete all the simulation saves
for save_dir in save_dirs:
    os.system(f"rm -rf {save_dir}")

In [164]:
# re-run the simulations
test_scripts = ["RunEffectSizeSimsSameArms_fixedbs.sh", "RunEffectSizeSims.sh"]
os.chdir("../simulation_scripts")
for test_script in test_scripts:
    os.system(f"./{test_script}")
os.chdir("../simulation_analysis_scripts")

In [165]:
save_dirs = !ls ../simulation_saves
save_dirs = ['../simulation_saves/' + d for d in save_dirs]

In [167]:
get_all_file_paths("../simulation_saves")

['../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=22-22BB0.3Df.csv',
 '../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=4-4BB0.3Df.csv',
 '../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=8-8BB0.3Df.csv',
 '../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansUniformburn_in_size-batch_size=22-22BU0.3Df.csv',
 '../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansUniformburn_in_size-batch_size=4-4BU0.3Df.csv',
 '../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansUniformburn_in_size-batch_size=8-8BU0.3Df.csv',
 '../simulation_saves/NoEffect_fixedbs_TEST/num_sims=500armProb=0.5/n=32/bbEqualMeansEqualPriorburn_in_size-batch_size=1-1BB0N32Df.csv',
 '../simulation_saves/NoEffect_fixedbs_TEST/num_sims=500ar

In [169]:
num_sims = 500
arm_prob = 0.5
for save_dir in save_dirs:
    for n in (32, 88, 785):
        for file_path in get_all_file_paths("../simulation_saves"):
            print(file_path)
            df_sims = pd.read_csv(file_path)
            cutoff = cutoffs[f'{n}']
            df_positives = df_sims[df_sims['stat'] > cutoff]
            print(f"# above chi-square_cutoff: {len(df_positives)}")
            print(f"% of sims positive: {len(df_positives)/num_sims}")

../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=22-22BB0.3Df.csv
# above chi-square_cutoff: 365
% of sims positive: 0.73
../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=4-4BB0.3Df.csv
# above chi-square_cutoff: 328
% of sims positive: 0.656
../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansEqualPriorburn_in_size-batch_size=8-8BB0.3Df.csv
# above chi-square_cutoff: 347
% of sims positive: 0.694
../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansUniformburn_in_size-batch_size=22-22BU0.3Df.csv
# above chi-square_cutoff: 411
% of sims positive: 0.822
../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es=0.3/bbUnEqualMeansUniformburn_in_size-batch_size=4-4BU0.3Df.csv
# above chi-square_cutoff: 424
% of sims positive: 0.848
../simulation_saves/IsEffect_bs_TEST/num_sims=500armProb=0.5/es