In [None]:
import copy
from collections.abc import Iterable
import functools
import itertools
import operator
from matplotlib import pyplot as plt
import matplotlib as mpl

mpl.use('pgf')
plt.rcParams.update({
    "font.family": "serif",  # use serif/main font for text elements
    "text.usetex": True,     # use inline math for ticks
    "pgf.rcfonts": False,    # don't setup fonts from rc parameters
    "text.latex.preamble":  [r"""\usepackage{amssymb}""", r'\usepackage{amsmath}'],
    })
# mpl.verbose.level = 'debug-annoying'


import pandas as pd
from pandas.api.types import is_numeric_dtype
import numpy as np
import numpy_ext as npe
import math
import random
from pprint import pprint
from scipy.optimize import curve_fit
from scipy.stats import poisson
from scipy.sparse import hstack, vstack, csr_matrix
import scipy

from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.preprocessing import Normalizer, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
import joblib

import seaborn as sns
import utils
import sys

from config import demographics, vital_sign_vars, lab_vars, treatment_vars, vent_vars, guideline_vars, ffill_windows_clinical, SAMPLE_TIME_H
from config import fio2_bins, peep_bins, tv_bins

In [None]:
file = 'data/ope_results.csv'
safety_file = 'data/safety_results.csv'
ope_wis = pd.read_csv(file)
safety = pd.read_csv(safety_file)

ope_wis = pd.merge(ope_wis, safety, suffixes=['', '_left',], on=['seed', 'algorithm', 'unsafety_prob_train', 'shaping', 'safety', 'scalar'])

REWARD_RANGE = (-100, 100)
RR_SIZE = max(REWARD_RANGE) - min(REWARD_RANGE)
N_BOOT = 2000

# TEXTWIDTH=390.0 # AI in MEDICINE
TEXTWIDTH=341.43289 # Dissertation
rng = np.random.default_rng()

In [None]:
ope_wis.loc[:, 'algorithm'] = ope_wis.algorithm.str.replace('behavior', 'IL')
ope_wis.loc[:, 'algorithm'] = ope_wis.algorithm.str.replace('observed', 'O')
ope_wis.loc[:, 'algorithm'] = ope_wis.algorithm.str.replace('softmax', 'QL$_S$')
ope_wis.loc[:, 'algorithm'] = ope_wis.algorithm.str.replace('greedy', 'QL$_D$')
ope_wis.loc[:, 'algorithm'] = ope_wis.algorithm.str.replace('mixed', 'M')
    
ope_wis['norm_scalar'] = ope_wis['scalar'] / RR_SIZE

In [None]:
# ope_wis = ope_wis[ope_wis.algorithm != 'G']
# ope_wis = ope_wis[ope_wis.seed < 10]
# ope_wis = ope_wis[ope_wis.unsafety_prob_train == 1.0]
# ope_wis = ope_wis[ope_wis.norm_scalar == 0.02]

In [None]:
ope_wis.shaping.value_counts()

In [None]:
# seeds = ope_wis.seed.unique()
# algorithms = ope_wis.algorithm.unique()
# settings = ope_wis.train_test.unique()
# shaping = ope_wis.shaping.unique()

# experiments = itertools.product(seeds, algorithms, settings)
cis_train = []
cis_test = []
for index, experiment in ope_wis.iterrows():
    mean = experiment['phwis']
    if experiment['train_test'] == 'train':
        n = experiment['n_train']
        cis = cis_train
    elif experiment['train_test'] == 'test':
        n = experiment['n_test']
        cis = cis_test
    else:
        raise ValueError('Only train and test results supported for now')
    ci_low, ci_up = utils.var_to_ci_cheb(experiment['var'], mean, n)
    ci_low = max(ci_low, -100)
    ci_up = min(ci_up, 100)
    cis.append([experiment['seed'], experiment['algorithm'], experiment['unsafety_prob'], experiment['norm_scalar'], mean, ci_low, ci_up, experiment['hcope5'], experiment['am']])

cis_test = pd.DataFrame(cis_test, columns=['seed', 'algorithm', 'unsafety_prob', 'norm_scalar', 'phwis', 'ci_l', 'ci_u', 'hcope5', 'am'])
cis_train = pd.DataFrame(cis_train, columns=['seed', 'algorithm', 'unsafety_prob', 'norm_scalar', 'phwis', 'ci_l', 'ci_u', 'hcope5', 'am'])

cis_test['setup'] = cis_test[['algorithm', 'norm_scalar', 'unsafety_prob']].apply(lambda x: '-'.join(map(str, x)), axis=1)
cis_train['setup'] = cis_train[['algorithm', 'norm_scalar', 'unsafety_prob']].apply(lambda x: '-'.join(map(str, x)), axis=1)

In [None]:
for setup, means in cis_test.groupby('setup')['phwis']:
    mean = means.mean()
    std = means.std()
    print(setup, mean, std)
    print(scipy.stats.norm.interval(.95, loc=mean, scale=std/math.sqrt(len(means))))

In [None]:
def to_setup_scalar(row):
    rnp = row.to_numpy()
    alg, safe_scalar, mixing_scalar = rnp[0], rnp[1], rnp[2]
    if alg in {'O', 'IL'}:
        return alg
    elif alg in {'QL$_D$', 'QL$_S$'}:
        return '{}\nc={}'.format(alg, safe_scalar)
    elif alg in 'M':
        return '{}\nc={}'.format(alg, mixing_scalar)

def to_setup(row):
    rnp = row.to_numpy()
    alg, safe_scalar, mixing_scalar = rnp[0], rnp[1], rnp[2]
    if alg in {'O', 'IL'}:
        return alg
    elif alg in {'QL$_D$', 'QL$_S$'}:
        return '{}'.format(alg)
    elif alg in 'M':
        return '{}'.format(alg)

def setup_key(setup):
    order = pd.Series([float('-inf'),] * len(setup))
    order[setup == 'O'] = 0
    order[setup == 'IL'] = 1
    order[setup.str.contains('QL$_D$')] = setup.str.split('=').str[1].astype('float') * 200 + 1
    order[setup.str.contains('QL$_S$')] = setup.str.split('=').str[1].astype('float') * 200 + 2
    order[setup.str.contains('M')] = setup.str.split('=').str[1].astype('float') * 200 + 2
    return order

def algorithm_key(algorithm):
    order = pd.Series([float('inf'),] * len(setup))
    order[algorithm == 'O'] = 1
    order[algorithm == 'IL'] = 2
    order[algorithm == 'QL$_D$'] = 3
    order[algorithm == 'QL$_S$'] = 4
#     order[setup.str.contains('M')] = setup.str.split('=').str[1].astype('float') * 200 + 2
    return order

def safety_setup(row):
    rnp = row.to_numpy()
    unsafety_train, unsafety_final = rnp[0], rnp[1]
    if unsafety_final == 1.0:
        return 'Unsafe'
    elif unsafety_final == 0.0 and unsafety_train == 0.0:
        return 'Q-function'
    elif unsafety_final == 0.0 and unsafety_train == 1.0:
        return 'Policy'
    else:
        raise ValueError('Unknown safety combination {}'.format(rnp))

ope_wis['setup'] = ope_wis[['algorithm', 'norm_scalar', 'mixing_prob']].apply(to_setup, axis=1)
ope_wis['setup_scalar'] = ope_wis[['algorithm', 'norm_scalar', 'mixing_prob']].apply(to_setup_scalar, axis=1)
ope_wis['safety'] = ope_wis[['unsafety_prob_train', 'unsafety_prob']].apply(safety_setup, axis=1)
ope_wis['compliance'] = ope_wis.safety
ope_wis['compliance'] = ope_wis.compliance.str.replace('Unsafe', 'Unconstr\'nd')

In [None]:
ope_wis.compliance

In [None]:
def set_size(width_pt, fraction=1, times=1, subplots=(1, 1), buffer=0.0):
    """Set figure dimensions to sit nicely in our document.

    Parameters
    ----------
    width_pt: float
            Document width in points
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy
    subplots: array-like, optional
            The number of rows and columns of subplots.
    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    # Width of figure (in pts)
    fig_width_pt = width_pt * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt 
    # Figure height in inches
    fig_height_in = fig_width_in * (1 + buffer) * golden_ratio * (subplots[0] / subplots[1]) 

    return (fig_width_in, fig_height_in)

to_plot = ope_wis[ope_wis.norm_scalar == 0.0]
plot_algs = list(to_plot.algorithm.unique())
# plot_algs.remove('QL')
plot_data = to_plot[
    ope_wis.algorithm.isin(plot_algs) &
    ope_wis.unsafety_prob.isin({0,1})].sort_values('setup', key=setup_key).copy()
plot_data.loc[:, 'algorithm'] = plot_data.setup
# plot_data = plot_data.sort_values('algorithm', key=algorithm_key)

# fig, ax = plt.subplots(1, 2, sharey=True, figsize=(15,5))
hue_order=sorted(plot_data.compliance.unique(), reverse=True)
MARKERS = ['^', 'o', 'v', 's']
LINESTYLES = ['solid', 'dashed','dotted']


subplots= (3,2)
fig = plt.figure(figsize=set_size(TEXTWIDTH, fraction=1.1, subplots=subplots, buffer=0.4))


# fig, ax = plt.subplots(subplots[0], subplots[1], figsize=set_size(TEXTWIDTH, fraction=1.0, subplots=subplots), sharex='col')
# ax[-1][-1].axis('off')
# sns.pointplot(data=plot_data[plot_data.train_test == 'train'], x='setup', y='phwis', hue='unsafety_prob', hue_order=hue_order, join=False, dodge=True, ax=ax[0], n_boot=N_BOOT)
dodge=0.5
errwidth=1.0
ax_1 = fig.add_subplot(3,2,1)
ax_i = ax_1
sns.pointplot(data=plot_data[plot_data.train_test == 'test'],
              y='algorithm',
              x='fqe',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
#               linestyles=LINESTYLES,
              ax=ax_i,
              linestyle="none",
              err_kws={"color": "black"},
              dodge=dodge,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
# ax_i.set_title('FQE')
ax_i.set_title('Model-based')
ax_i.get_legend().remove()
ax_i.set_xlabel('Expected Return')
ax_i.set_ylabel('')

ax_i = fig.add_subplot(3,2,3, sharex=ax_1)
plot_data_i = plot_data[(plot_data.train_test == 'test') & (plot_data.ess > 0.0)]
# plot_data_i = plot_data[(plot_data.train_test == 'test')]

sns.pointplot(data=plot_data_i,
              y='algorithm',
              x='phwis',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
              err_kws={"color": "black"},
              ax=ax_i,
              linestyle="none",
              dodge=dodge,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
ax_i.get_legend().remove()
ax_i.set_xlabel('Expected Return')
ax_i.set_ylabel('')
ax_i.set_title('Inverse Propensity Scoring')

ax_i = fig.add_subplot(3,2,5, sharex=ax_1)
sns.pointplot(data=plot_data[plot_data.train_test == 'test'],
              y='algorithm',
              x='phwdr',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
#               linestyles=LINESTYLES,
              err_kws={"color": "black"},
              ax=ax_i,
              linestyle="none",
              dodge=dodge,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
ax_i.get_legend().remove()
# ax[2].legend()
ax_i.set_ylabel('')
ax_i.set_title('Hybrid')
ax_i.set_xlabel('Expected Return')

ax_i = fig.add_subplot(3,2,2)
sns.pointplot(data=plot_data[plot_data.train_test == 'test'],
              y='algorithm',
              x='safety_policy',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
#               linestyles=LINESTYLES,
              err_kws={"color": "black"},
              ax=ax_i,
              linestyle="none",
              dodge=dodge,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
ax_i.get_legend().remove()
ax_i.set_xlim(0,1.1)
# ax[2].legend()
ax_i.set_ylabel('')
ax_i.set_xlabel(r'$P(a \in A_{\mathcal{C}})$')
ax_i.set_title('Compliance')

ax_i = fig.add_subplot(3,2,4)
sns.pointplot(data=plot_data[plot_data.train_test == 'test'],
              y='algorithm',
              x='ess',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
#               linestyles=LINESTYLES,
              err_kws={"color": "black"},
              ax=ax_i,
              linestyle="none",
              dodge=dodge,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
ax_i.get_legend().remove()
ax_i.legend(loc='upper right',title='', handletextpad=.1)
ax_i.set_xscale('symlog')
ax_i.set_xlim(-1.0,10e3)
# ax[2].legend()
ax_i.set_ylabel('')
ax_i.set_xlabel('ESS')
ax_i.set_title('Effective sample size')


# ax[1].axhline(y=16.717279190924337, color='black')
# ax[1].axhline(y=73.3723122454629, color='black')
# plt.suptitle('')
fig.tight_layout()
plt.savefig('/tmp/all_ope.pdf', dpi=1200)
# plt.savefig('/tmp/all_ope.png', dpi=1200)
plt.savefig('/tmp/all_ope.pgf')
plt.show()

In [None]:
q_d = ope_wis[(ope_wis.algorithm == 'QL$_D$')]
hue_order=sorted(q_d.compliance.unique(), reverse=True)

fig, ax = plt.subplots(figsize=set_size(TEXTWIDTH, fraction=0.7))

ax = sns.pointplot(data=q_d[q_d.train_test == 'test'],
              y='fqe',
              x='scalar',
              hue='compliance',
              hue_order=hue_order,
              markers=MARKERS,
              err_kws={"color": "black"},
              linestyles="none",
                   ax=ax,
              dodge=0.4,
              n_boot=N_BOOT,
              errwidth=errwidth,
              scale=0.8)
# ax.get_legend().remove()
ax.legend(loc='best',title='', handletextpad=.1)
ax.set_ylabel('Expected Return')
ax.set_xlabel('$c$')
ax.set_title('QL$_D$ with reward shaping')
plt.tight_layout()
plt.savefig('/tmp/shaping.pdf')
# plt.savefig('/tmp/shaping.png', dpi=1200)
plt.show()

In [None]:
shared_filters = (q_d.shaped == False) & (q_d.algorithm == 'QL$_D$')
q_d_qsafe = q_d[shared_filters & (q_d.safety == 'Q-function')].sort_values('seed')
q_d_psafe = q_d[shared_filters & (q_d.safety == 'Policy')].sort_values('seed')

In [None]:
# Compare whether the safety criterion should be included in the Q function definition or not
print('Compare safety Q-function to Policy safety (nonparametric Wilcoxon signed-rank test)')
print('fqe: {}'.format(scipy.stats.wilcoxon(q_d_qsafe.fqe, q_d_psafe.fqe, alternative='greater')))

In [None]:
o = plot_data[plot_data.algorithm == 'O'].sort_values('seed')
il_u = plot_data[(plot_data.algorithm == 'IL') & (plot_data.safety == 'Unsafe')].sort_values('seed')
il_psafe = plot_data[(plot_data.algorithm == 'IL') & (plot_data.safety == 'Policy')].sort_values('seed')

q_d_qsafe = plot_data[(plot_data.algorithm == 'QL$_D$') & (plot_data.safety == 'Q-function') & (plot_data.shaped == False)].sort_values('seed')
q_d_psafe = plot_data[(plot_data.algorithm == 'QL$_D$') & (plot_data.safety == 'Policy') & (plot_data.shaped == False)].sort_values('seed')
q_d_u = plot_data[(plot_data.algorithm == 'QL$_D$') & (plot_data.safety == 'Unsafe') & (plot_data.shaped == False)].sort_values('seed')

q_s_qsafe = plot_data[(plot_data.algorithm == 'QL$_S$') & (plot_data.safety == 'Q-function') & (plot_data.shaped == False)].sort_values('seed')
q_s_psafe = plot_data[(plot_data.algorithm == 'QL$_S$') & (plot_data.safety == 'Policy') & (plot_data.shaped == False)].sort_values('seed')
q_s_u = plot_data[(plot_data.algorithm == 'QL$_S$') & (plot_data.safety == 'Unsafe') & (plot_data.shaped == False)].sort_values('seed')

In [None]:
# Compare whether the safety criterion should be included in the Q function definition or not
print('Compare safety Q-function to Policy safety (nonparametric Wilcoxon signed-rank test)')
print('il_u: {}'.format(scipy.stats.wilcoxon(il_u.phwis, o.phwis, alternative='less')))
print('il_psafe: {}'.format(scipy.stats.wilcoxon(il_psafe.phwis, o.phwis, alternative='less')))
print('q_d_u: -')
print('q_d_qsafe: -')
nonzero_seeds = q_d_psafe[q_d_psafe.phwis !=0].seed
print('q_d_psafe: {}'.format(scipy.stats.wilcoxon(q_d_psafe.phwis[q_d_psafe.seed.isin(nonzero_seeds)], o[o.seed.isin(nonzero_seeds)].phwis, alternative='less')))
print('q_s_u: {}'.format(scipy.stats.wilcoxon(q_s_u.phwis, o.phwis, alternative='less')))
print('q_s_qsafe: {}'.format(scipy.stats.wilcoxon(q_s_qsafe.phwis, o.phwis, alternative='less')))
print('q_s_psafe: {}'.format(scipy.stats.wilcoxon(q_s_psafe.phwis, o.phwis, alternative='less')))

In [None]:
# Compare whether the safety criterion should be included in the Q function definition or not
print('Compare safety Q-function to Policy safety (nonparametric Wilcoxon signed-rank test)')
print('il_u: {}'.format(scipy.stats.wilcoxon(il_u.phwdr, o.phwdr, alternative='less')))
print('il_psafe: {}'.format(scipy.stats.wilcoxon(il_psafe.phwdr, o.phwdr, alternative='less')))
print('q_d_u: {}'.format(scipy.stats.wilcoxon(q_d_u.phwdr, o.phwdr, alternative='less')))
print('q_d_qsafe: {}'.format(scipy.stats.wilcoxon(q_d_qsafe.phwdr, o.phwdr, alternative='less')))
print('q_d_psafe: {}'.format(scipy.stats.wilcoxon(q_d_psafe.phwdr, o.phwdr, alternative='less')))
print('q_s_u: {}'.format(scipy.stats.wilcoxon(q_s_u.phwdr, o.phwdr, alternative='less')))
print('q_s_qsafe: {}'.format(scipy.stats.wilcoxon(q_s_qsafe.phwdr, o.phwdr, alternative='less')))
print('q_s_psafe: {}'.format(scipy.stats.wilcoxon(q_s_psafe.phwdr, o.phwdr, alternative='less')))

In [None]:
nonzero_seeds

In [None]:
# Compare whether the safety criterion should be included in the Q function definition or not
print('Compare safety Q-function to Policy safety (nonparametric Wilcoxon signed-rank test)')
print('phwdr: {}'.format(scipy.stats.wilcoxon(ql_qsafe.phwdr, ql_psafe.phwdr, alternative='less').pvalue))
print('phwis: {}'.format(scipy.stats.wilcoxon(ql_qsafe.phwis, ql_psafe.phwis, alternative='less').pvalue))

In [None]:
def print_statistic_row(datarow, statistic=np.mean, cols = ['phwis', 'fqe', 'phwdr', 'ess'], prec=0.2):
    for c in cols:
        if c != 'ess':
            loc, (ci_l, ci_u) = utils.bootstrap_ci(datarow[c], stat=statistic, conf=0.95)
            print((" {:" + str(prec) + "f} & {:"+str(prec) +"f}-{:"+str(prec)+"f} &").format(loc, ci_l, ci_u), end=" ")
        else:
            print((" {:" + str(prec) + "f} ").format(statistic(datarow[c])), end="\\\\\n")

            
for stat in [
    ope_wis[ope_wis.algorithm == 'O'],
    il_unsafe,
    il_psafe,
    ql_unsafe,
    ql_qsafe,
    ql_psafe
]:
    print_statistic_row(stat, np.mean)

In [None]:

statistic = np.mean 
# statistic = np.median
print('Observed')
print(utils.bootstrap_ci(ope_wis[ope_wis.algorithm == 'O'].phwdr))
print('IL-unsafe')
print(utils.bootstrap_ci(il_unsafe.phwdr))
print(utils.bootstrap_ci(il_unsafe.phwis))
print(utils.bootstrap_ci(il_unsafe.fqe))

print('IL-psafe')
print(utils.bootstrap_ci(il_psafe.phwdr))
print(utils.bootstrap_ci(il_psafe.phwis))
print(utils.bootstrap_ci(il_psafe.fqe))
print('QL-unsafe')
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].phwdr))
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].phwis))
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].fqe))
print('QL-qsafe')
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].phwdr))
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].phwis))
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].fqe))

print('QL-psafe')
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].phwdr))
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].phwis))
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].fqe))

In [None]:
?utils.bootstrap_ci

In [None]:
median = False
# statistic = np.mean 
statistic = np.median
print('Observed')
print(utils.bootstrap_ci(ope_wis[ope_wis.algorithm == 'O'].phwdr, statistic))
print(utils.bootstrap_ci(ope_wis[ope_wis.algorithm == 'O'].phwis, statistic))
print(utils.bootstrap_ci(ope_wis[ope_wis.algorithm == 'O'].fqe, statistic))
print('IL-unsafe')
print(utils.bootstrap_ci(il_unsafe.phwdr, statistic))
print(utils.bootstrap_ci(il_unsafe.phwis, statistic))
print(utils.bootstrap_ci(il_unsafe.fqe, statistic))

print('IL-psafe')
print(utils.bootstrap_ci(il_psafe.phwdr, statistic))
print(utils.bootstrap_ci(il_psafe.phwis, statistic))
print(utils.bootstrap_ci(il_psafe.fqe, statistic))

print('QL-unsafe')
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].phwdr, statistic))
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].phwis, statistic))
print(utils.bootstrap_ci(ql_unsafe[ql_unsafe.scalar == 0.0].fqe, statistic))

print('QL-qsafe')
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].phwdr, statistic))
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].phwis, statistic))
print(utils.bootstrap_ci(ql_qsafe[ql_qsafe.scalar == 0.0].fqe, statistic))

print('QL-psafe')
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].phwdr, statistic))
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].phwis, statistic))
print(utils.bootstrap_ci(ql_psafe[ql_psafe.scalar == 0.0].fqe, statistic))



In [None]:
bs = scipy.stats.bootstrap([ql_unsafe[ql_unsafe.scalar == 0.0].phwdr,], np.mean, confidence_level=.95)

In [None]:
utils.var_to_sem(ql_unsafe[ql_unsafe.scalar == 0.0].phwdr.var(), len(ql_unsafe[ql_unsafe.scalar == 0.0].phwdr))

In [None]:
ql_unsafe[ql_unsafe.scalar == 0.0].phwdr.std() / math.sqrt(len(ql_unsafe[ql_unsafe.scalar == 0.0].phwdr))

In [None]:
scipy.stats.wilcoxon(ql_qsafe[ql_qsafe.scalar == 0.0].phwdr, ql_unsafe[ql_unsafe.scalar == 0.0].phwdr)

In [None]:
scipy.stats.ttest_rel(ql_qsafe[ql_qsafe.scalar == 0.0].phwdr, ql_unsafe[ql_unsafe.scalar == 0.0].phwdr)

In [None]:
ql_unsafe[ql_unsafe.scalar == 0.0][['seed','phwdr']]

In [None]:
scipy.stats.wilcoxon(ql_qsafe[ql_qsafe.scalar == 0.0].phwdr, ql_unsafe[ql_unsafe.scalar == 0.0].phwdr)

In [None]:
scipy.stats.wilcoxon(ql_unsafe, ql_safe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(il_unsafe, ql_unsafe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(il_safe, ql_safe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(il_unsafe, il_safe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(o, ql_safe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(o, il_safe, alternative='greater')

In [None]:
scipy.stats.wilcoxon(o, il_unsafe, alternative='greater')

In [None]:
check = ope_wis[
    (ope_wis.train_test == 'test') & 
    (ope_wis.algorithm == 'QL') &
#     ((ope_wis.unsafety_prob == 0.0) |
#      (ope_wis.unsafety_prob == 1.0))
#     ((ope_wis.norm_scalar == 0.0) | 
#      (ope_wis.norm_scalar == .05))
    (ope_wis.seed == 5)
]

# check[['norm_scalar', 'seed', 'mean', 'unsafety_prob']].sort_values(['seed', 'norm_scalar', 'unsafety_prob']).set_index('norm_scalar').pivot(columns=['seed', 'unsafety_prob'])
check[['norm_scalar', 'seed', 'phwis', 'unsafety_prob']].sort_values(['seed', 'norm_scalar', 'unsafety_prob']).set_index('unsafety_prob').pivot(columns=['seed', 'norm_scalar'])

In [None]:
ope_wis.scalar.unique()

In [None]:
scipy.stats.ttest_rel(ql_safe, ql_unsafe, alternative='greater')

In [None]:
scipy.stats.ttest_ind(o, ql_unsafe, alternative='greater')

In [None]:
ope_wis.scalar.unique()

In [None]:
print('O:',utils.ci(o), o.mean())
print('IL-unsafe:', utils.ci(il_unsafe), il_unsafe.mean())
print('IL-safe:', utils.ci(il_safe), il_safe.mean())
print('QL-unsafe:', utils.ci(ql_unsafe), ql_unsafe.mean())
print('QL-safe:', utils.ci(ql_safe), ql_safe.mean())

In [None]:
importlib.reload(utils)