In [72]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import seaborn as sns
import matplotlib
import warnings
warnings.filterwarnings("ignore")

## Warfarin Experiments

In [147]:
def transform_model(row):
    if row['method'] == 'DM':
        return row['ml']
    elif row['method'] == 'IPW':
        return row['prop_pred']
    else:
        return f'{row["prop_pred"]}, {row["ml"]}'

In [148]:
df = pd.DataFrame()
             
# our method
df_buffer = pd.read_csv(f'Results_Warfarin_prob/compiled_results/unconstrained_agg.csv')
df_buffer['method'] = df_buffer['method'].map({'Direct': 'DM', 'Robust': 'DR', 'IPW': 'IPW'})
df_buffer['prop_pred'] = df_buffer['prop_pred'].map({'tree': 'DT'})
df_buffer['ml'] = 'RF/Log'

df_buffer['model'] = df_buffer.apply(lambda row: transform_model(row), axis=1)
df = pd.concat([df, df_buffer[['depth', 'method', 'model', 'gap', 
                               'solve_time', 'regret_test', 'best_found_test']]], ignore_index=True)

In [149]:
# kallus bertsimas
df_buffer = pd.read_csv(f'Results_Warfarin_prob/compiled_results/KB.csv')
df_buffer['method'] = df_buffer['method'].map({'Kallus': 'K-PT', 'Bertsimas': 'B-PT'})
df_buffer['model'] = '-'
df = pd.concat([df, df_buffer[['depth', 'method', 'model', 'gap', 
                               'solve_time', 'regret_test', 'best_found_test']]], ignore_index=True)

In [150]:
# PT
df_buffer = pd.read_csv(f'other_methods/results/policytree/warfarin/raw_proba.csv')
for col, oosp, regret in zip(['random_time', 'r0.06_time', 'r0.11_time'], ['random', 'r0.06', 'r0.11'],
                            ['random_oos_regret', 'r0.06_oos_regret', 'r0.11_oos_regret']):
    h = pd.DataFrame({'solve_time': df_buffer[col].tolist(),
                    'regret_test': df_buffer[regret].tolist(),
                    'best_found_test': df_buffer[oosp].tolist()})
    h['method'] = 'PT'
    h['gap'] = 0
    h['best_found_test'] *= 100
    h['depth'] = 2
    h['model'] = 'DT, Mixed'
    df = pd.concat([df, h], ignore_index=False)

In [151]:
# CF, CT
for m, m_name in zip(['cf', 'cf_untuned', 'ct'], ['CF', 'CF (untuned)', 'CT']):
    df_buffer = pd.read_csv(f'other_methods/results/CF/warfarin/{m}_baseline_raw.csv')
    for col, oosp, regret, in zip(['time_random', 'time_r0.06', 'time_r0.11'], ['random', 'r0.06', 'r0.11'],
                            ['random_oos_regret', 'r0.06_oos_regret', 'r0.11_oos_regret']):
        h = pd.DataFrame({'solve_time': df_buffer[col].tolist(),
                         'regret_test': df_buffer[regret].tolist(),
                    'best_found_test': df_buffer[oosp].tolist()})
        h['method'] = m_name
        h['depth'] = '-'
        h['best_found_test'] *= 100
        h['gap'] = 0
        h['model'] = '-'
        df = pd.concat([df, h], ignore_index=False)

In [152]:
#RC
df_buffer = pd.read_csv(f'other_methods/results/RC/warfarin/rc_raw.csv')
df_buffer_random = df_buffer[df_buffer['randomization'] == '0.33']
df_buffer_random1 = df_buffer_random[df_buffer_random['model'] == 'balanced_rf']
df_buffer_random1['model'] = 'best'
df_buffer_random = pd.concat([df_buffer_random[df_buffer_random['model'] != 'lrrf'], df_buffer_random1], ignore_index=True)
df_buffer_random['model'] = df_buffer_random['model'].map({'balanced_rf': 'RF', 'best': 'Best',
                                                          'balanced_lr': 'Log'})

df_buffer_other = df_buffer[df_buffer['randomization'] != '0.33']
df_buffer_other['model'] = df_buffer_other['model'].map({'balanced_rf': 'RF', 'lrrf': 'Best',
                                                          'balanced_lr': 'Log'})

df_buffer = pd.concat([df_buffer_random, df_buffer_other], ignore_index=True).rename(columns={'oos_regret': 'regret_test',
                                                                                              'oosp': 'best_found_test'})
df_buffer['method'] = 'R&C'
df_buffer['gap'] = 0
df_buffer['depth'] = '-'
df_buffer['best_found_test'] *= 100

df_buffer = df_buffer.drop(columns=['randomization', 'dataset', 'seed'])
df = pd.concat([df, df_buffer], ignore_index=False)

In [153]:
mean_df = df.groupby(['depth', 'method', 'model']).agg('mean').reset_index().round(2)

In [154]:
std_df = df.groupby(['depth', 'method', 'model']).agg('std').reset_index().round(2)

In [160]:
combined = mean_df.merge(std_df, on=['depth', 'method', 'model'])

In [161]:
for col in ['gap', 'solve_time', 'regret_test', 'best_found_test']:
    combined[col] = combined.apply(lambda row: f'{row[f"{col}_x"]:.2f} ± {row[f"{col}_y"]:.2f}', axis=1)
    combined = combined.drop(columns=[f'{col}_{i}' for i in ['x', 'y']])

In [162]:
mapping = {'IPW': 1, 'DM': 2, 'DR': 3, 'K-PT': 4, 'B-PT': 5, 'PT': 6, 'CF': 0, 'CF (untuned)': 0, 'CT': 0, 'R&C': 0}

In [163]:
combined['method_map'] = combined['method'].apply(lambda x: mapping[x])

In [164]:
print(combined.sort_values(by=['depth', 'method_map']).drop(columns=['method_map']).to_latex(index=False))

\begin{tabular}{lllllll}
\toprule
depth &       method &      model &           gap &        solve\_time &     regret\_test & best\_found\_test \\
\midrule
    1 &          IPW &         DT &   0.00 ± 0.00 &       7.45 ± 1.69 & 399.35 ± 139.25 &   71.19 ± 10.05 \\
    1 &           DM &     RF/Log &   0.00 ± 0.00 &       5.25 ± 2.12 & 283.12 ± 104.79 &    79.57 ± 7.56 \\
    1 &           DR & DT, RF/Log &   0.00 ± 0.00 &       7.82 ± 1.80 & 335.47 ± 120.29 &    75.80 ± 8.68 \\
    2 &          IPW &         DT &   0.00 ± 0.00 &    255.45 ± 65.13 &  317.39 ± 82.90 &    77.10 ± 5.98 \\
    2 &           DM &     RF/Log &   0.00 ± 0.00 &    192.03 ± 64.13 &  288.79 ± 88.46 &    79.16 ± 6.38 \\
    2 &           DR & DT, RF/Log &   0.00 ± 0.00 &    284.58 ± 83.83 &  279.32 ± 87.00 &    79.85 ± 6.28 \\
    2 &         K-PT &          - &   0.00 ± 0.03 & 5435.72 ± 2792.71 & 522.12 ± 264.43 &   62.33 ± 19.08 \\
    2 &         B-PT &          - & 52.30 ± 43.78 & 14333.53 ± 674.39 & 601.39 ± 

## Synthetic Experiments

In [165]:
def transform_model(row):
    if row['method'] == 'DM':
        return row['ml']
    elif row['method'] == 'IPW':
        return row['prop_pred']
    else:
        return f'{row["prop_pred"]}, {row["ml"]}'

In [166]:
df = pd.DataFrame()
             
# our method
df_buffer = pd.read_csv(f'Results_Atheyv1/compiled_results/our_method.csv')
df_buffer['method'] = df_buffer['method'].map({'Direct': 'DM', 'Robust': 'DR', 'IPW': 'IPW'})
df_buffer = df_buffer[((df_buffer['budget'].isna()) | (df_buffer['budget'] == 1.0)) & (df_buffer['depth'] == 1)]
# print(df_buffer['ml'].value_counts())
# dm = df_buffer[(df_buffer['method'] == 'DM') & (df_buffer['ml'] == 'linear')]
# dr = df_buffer[(df_buffer['method'] == 'DR') & ((df_buffer['ml'] == 'linear') & (df_buffer['prop_pred'] == 'tree'))]
# ipw = df_buffer[(df_buffer['method'] == 'IPW') & (df_buffer['prop_pred'] == 'tree')]

df_buffer['prop_pred'] = df_buffer['prop_pred'].map({'tree': 'DT', 'log': 'Log'})
df_buffer['ml'] = df_buffer['ml'].map({'linear': 'LR', 'lasso': 'Lasso'})

def transform_model(row):
    if row['method'] == 'DM':
        return row['ml']
    elif row['method'] == 'IPW':
        return row['prop_pred']
    else:
        return f'{row["prop_pred"]}, {row["ml"]}'
    
df_buffer['model'] = df_buffer.apply(lambda row: transform_model(row), axis=1)

# df_buffer = pd.concat([ipw, dm, dr], ignore_index=True)

df = pd.concat([df, df_buffer[['depth', 'method', 'model', 'gap', 
                               'solve_time', 'regret_test', 'best_found_test']]], ignore_index=True)


# K-PT/B-PT
df_buffer = pd.read_csv(f'Results_Atheyv1/compiled_results/KB.csv')
df_buffer['method'] = df_buffer['method'].map({'Kallus': 'K-PT', 'Bertsimas': 'B-PT'})
df_buffer = df_buffer[df_buffer['depth'] == 1]
df_buffer['model'] = '-'
df = pd.concat([df, df_buffer[['depth', 'method', 'model', 'gap', 
                               'solve_time', 'regret_test', 'best_found_test']]], ignore_index=True)

# policytree
df_buffer = pd.read_csv(f'other_methods/results/policytree/synthetic/raw.csv')
for col, oosp, regret in zip([f'time_{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']],
                            [f'p{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']],
                            [f'oosr_{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']]):
    h = pd.DataFrame({'solve_time': df_buffer[col].tolist(),
                     'regret_test': df_buffer[regret].tolist(),
                    'best_found_test': df_buffer[oosp].tolist()})
    h['method'] = 'PT'
    h['best_found_test'] *= 100
    h['gap'] = 0
    h['depth'] = 1
    h['model'] = 'DT, LR'
    df = pd.concat([df, h], ignore_index=False)
    
    
# CF, CT
for m, m_name in zip(['cf', 'ct'], ['CF', 'CT']):
    df_buffer = pd.read_csv(f'other_methods/results/CF/synthetic/{m}_raw.csv')
#     df_trans = pd.DataFrame(columns=['method', 'randomization', 'realized_outcome_oos'])
    for col, oosp, regret in zip([f'time_{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']],
                            [f'p{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']],
                            [f'oosr_{i}' for i in ['0.1', '0.25', '0.5', '0.75', '0.9']]):
        h = pd.DataFrame({'solve_time': df_buffer[col].tolist(),
                     'regret_test': df_buffer[regret].tolist(),
                    'best_found_test': df_buffer[oosp].tolist()})
        h['method'] = m_name
        h['gap'] = 0
        h['best_found_test'] *= 100
        h['depth'] = '-'
        h['model'] = '-'
        df = pd.concat([df, h], ignore_index=False)
    
    
# RC
fp = 'other_methods/results/RC/synthetic/'
df_buffer = pd.read_csv(os.path.join(fp, 'raw.csv'))
df_buffer = df_buffer[df_buffer['method'] == 'lr']
df_buffer['method'] = 'R&C'
df_buffer = df_buffer.rename(columns={'time_elapsed': 'solve_time', 'oosp': 'best_found_test',
                                     'oos_regret': 'regret_test'})
df_buffer['gap'] = 0
df_buffer['depth'] = '-'
df_buffer['model'] = 'LR'
df_buffer['best_found_test'] *= 100
df = pd.concat([df, df_buffer[['depth', 'method', 'model', 'gap', 
                               'solve_time', 'regret_test', 'best_found_test']]], ignore_index=False)


In [167]:
mean_df = df.groupby(['depth', 'method', 'model']).agg('mean').reset_index().round(2)

In [168]:
std_df = df.groupby(['depth', 'method', 'model']).agg('std').reset_index().round(2)

In [172]:
combined = mean_df.merge(std_df, on=['depth', 'method', 'model'])
for col in ['gap', 'solve_time', 'regret_test', 'best_found_test']:
    combined[col] = combined.apply(lambda row: f'{row[f"{col}_x"]:.2f} ± {row[f"{col}_y"]:.2f}', axis=1)
    combined = combined.drop(columns=[f'{col}_{i}' for i in ['x', 'y']])

In [173]:
mapping = {'IPW': 1, 'DM': 2, 'DR': 3, 'K-PT': 4, 'B-PT': 5, 'PT': 6, 'CF': 0, 'R&C': 0, 'CT': 0}

In [174]:
combined['method_map'] = combined['method'].apply(lambda x: mapping[x])

In [175]:
print(combined.sort_values(by=['depth', 'method_map']).drop(columns=['method_map']).to_latex(index=False))

\begin{tabular}{lllllll}
\toprule
depth & method &      model &         gap &  solve\_time &     regret\_test & best\_found\_test \\
\midrule
    1 &    IPW &         DT & 0.00 ± 0.00 & 1.34 ± 0.22 &  123.18 ± 95.63 &   66.25 ± 15.84 \\
    1 &    IPW &        Log & 0.00 ± 0.00 & 1.31 ± 0.31 & 138.12 ± 114.65 &   63.89 ± 18.00 \\
    1 &     DM &         LR & 0.00 ± 0.00 & 0.76 ± 0.40 &   64.22 ± 38.06 &    75.28 ± 9.87 \\
    1 &     DM &      Lasso & 0.00 ± 0.00 & 0.57 ± 0.09 &  200.68 ± 72.26 &   53.44 ± 11.00 \\
    1 &     DR &     DT, LR & 0.00 ± 0.00 & 0.86 ± 0.46 &   65.76 ± 40.68 &   75.01 ± 10.13 \\
    1 &     DR &  DT, Lasso & 0.00 ± 0.00 & 2.08 ± 0.82 &   77.04 ± 70.23 &   73.70 ± 12.83 \\
    1 &     DR &    Log, LR & 0.00 ± 0.00 & 0.95 ± 0.56 &   78.78 ± 55.88 &   72.98 ± 11.56 \\
    1 &     DR & Log, Lasso & 0.00 ± 0.00 & 1.79 ± 1.01 &  111.47 ± 68.44 &   67.16 ± 11.88 \\
    1 &   K-PT &          - & 0.00 ± 0.00 & 1.97 ± 0.76 & 161.83 ± 103.62 &   60.10 ± 17.46 \\
   

## Additional Experiments

In [228]:
cols = ['method', 'file_name', 'num_rows',
                          'depth', 'branching_limit', 'time_limit',
                          'status', 'obj_value', 'gap', 'solve_time',
                          'regret_train', 'best_found_train', 'treatment_acc_train',
                          'regret_test', 'best_found_test', 'treatment_acc_test',
                          'prop_pred', 'ml', 'protected_col', 'fairness_bound', 'treatment_budget', 'budget']

In [241]:
df_buffer = pd.read_csv(f'Results_Warfarin_prob/compiled_results/dr_synthetic.csv', names=cols, header=None)

In [242]:
df_buffer1 = pd.read_csv(f'Results_Atheyv1/compiled_results/DR.csv')

In [243]:
df_buffer['randomization'] = df_buffer['file_name'].apply(lambda x: float(x.split('_')[-2]))
df_buffer['split'] = df_buffer['file_name'].apply(lambda x: int(x.split('_')[-1]))
df_buffer = df_buffer.rename(columns={'prop_pred': 'propensity_score_pred'})[['split', 'randomization', 'budget',
                                                                              'propensity_score_pred', 'ml',
                                                                             'solve_time']]

In [244]:
df_buffer = df_buffer1.merge(df_buffer)

In [245]:
df_buffer['gap'] = 0
df_buffer['method'] = 'DR'
df_buffer['ml'] = df_buffer['ml'].map({'linear': 'LR', 'lasso': 'Lasso'})
df_buffer['propensity_score_pred'] = df_buffer['propensity_score_pred'].map({'tree': 'DT', 'log': 'Log'})
df_buffer['model'] = df_buffer.apply(lambda row: f'{row["ml"]}, {row["propensity_score_pred"]}', axis=1)
df_buffer['oos_optimal_treatment'] *= 100

In [246]:
def budget_mapping(x):
    dic = {
        '0.05-0.09': [0.05, 0.09],
        '0.10-0.14': [0.10, 0.14],
        '0.15-0.19': [0.15, 0.19],
        '0.20-0.24': [0.20, 0.24],
        '0.25-0.29': [0.25, 0.29],
        '0.30-0.34': [0.30, 0.34],
        '0.35-0.40': [0.35, 0.40]
    }
    
    for k, v in dic.items():
        if x >= v[0] and x <= v[1]:
            return k

df_buffer['budget1'] = df_buffer['budget'].apply(lambda x: budget_mapping(x))

In [247]:
df_buffer = df_buffer[['tree_depth', 'method', 'model', 'budget1', 'gap', 'solve_time', 
                       'oos_regret', 'oos_optimal_treatment']]

In [250]:
mean_df = df_buffer.groupby(['tree_depth', 'method', 'model', 'budget1']).agg('mean').reset_index().round(2)
std_df = df_buffer.groupby(['tree_depth', 'method', 'model', 'budget1']).agg('std').reset_index().round(2)

combined = mean_df.merge(std_df, on=['tree_depth', 'method', 'model', 'budget1'])
for col in ['gap', 'solve_time', 'oos_regret', 'oos_optimal_treatment']:
    combined[col] = combined.apply(lambda row: f'{row[f"{col}_x"]:.2f} ± {row[f"{col}_y"]:.2f}', axis=1)
    combined = combined.drop(columns=[f'{col}_{i}' for i in ['x', 'y']])

In [252]:
print(combined.sort_values(by=['model', 'budget1']).to_latex(index=False))

\begin{tabular}{rlllllll}
\toprule
 tree\_depth & method &      model &   budget1 &         gap &  solve\_time &     oos\_regret & oos\_optimal\_treatment \\
\midrule
          1 &     DR &     LR, DT & 0.05-0.09 & 0.00 ± 0.00 & 0.16 ± 0.03 & 158.32 ± 49.39 &          58.35 ± 9.03 \\
          1 &     DR &     LR, DT & 0.10-0.14 & 0.00 ± 0.00 & 0.22 ± 0.13 & 132.80 ± 43.61 &          61.54 ± 9.04 \\
          1 &     DR &     LR, DT & 0.15-0.19 & 0.00 ± 0.00 & 0.23 ± 0.10 & 113.91 ± 44.06 &          65.44 ± 9.48 \\
          1 &     DR &     LR, DT & 0.20-0.24 & 0.00 ± 0.00 & 0.20 ± 0.11 &  85.58 ± 32.88 &          70.01 ± 8.02 \\
          1 &     DR &     LR, DT & 0.25-0.29 & 0.00 ± 0.00 & 0.23 ± 0.13 &  77.34 ± 32.68 &          72.25 ± 8.12 \\
          1 &     DR &     LR, DT & 0.30-0.34 & 0.00 ± 0.00 & 0.23 ± 0.13 &  69.97 ± 34.71 &          73.71 ± 8.56 \\
          1 &     DR &     LR, DT & 0.35-0.40 & 0.00 ± 0.00 & 0.23 ± 0.14 &  66.32 ± 36.18 &          75.04 ± 8.86 \\
       

Warfarin

In [253]:
df_buffer = pd.read_csv(f'Results_Warfarin_prob/compiled_results/DM.csv')

In [254]:
cols = ['method', 'file_name', 'num_rows',
                          'depth', 'branching_limit', 'time_limit',
                          'status', 'obj_value', 'gap', 'solve_time',
                          'regret_train', 'best_found_train', 'treatment_acc_train',
                          'regret_test', 'best_found_test', 'treatment_acc_test',
                          'prop_pred', 'ml', 'protected_col', 'fairness_bound', 'treatment_budget', 'budget']

In [255]:
df_buffer1 = pd.read_csv('Results_Warfarin_prob/compiled_results/dm_warfarin.csv', header=None, names=cols)

In [256]:
df_buffer1 = df_buffer1[df_buffer1['protected_col'].notna()]

In [261]:
df_buffer1['randomization'] = df_buffer1['file_name'].apply(lambda x: x.split('_')[-2])
df_buffer1['split'] = df_buffer1['file_name'].apply(lambda x: int(x.split('_')[-1]))
df_buffer1['seed'] = [1]*120 + [2]*120 + [3]*120 + [4]*120 + [5]*120

In [262]:
df_buffer = df_buffer[df_buffer['fairness'] < 0.09]

In [264]:
df_buffer1 = df_buffer1.rename(columns={'fairness_bound': 'fairness'})

In [265]:
merged = df_buffer.merge(df_buffer1[['randomization', 'split', 'seed', 'fairness', 'solve_time']])

In [271]:
merged['oos_regret'] = 1386 - merged['realized_outcome_oos'] * 1386

In [272]:
merged['gap'] = 0

In [275]:
merged['model'] = 'RF/Log'

In [276]:
merged = merged[['tree_depth', 'method', 'model', 'fairness', 'gap', 'solve_time', 'dr_disparity', 'realized_disparity', 
                     'oos_regret', 'realized_outcome_oos']]

In [281]:
mean_df = merged.groupby(['tree_depth', 'method', 'model', 'fairness']).agg('mean').reset_index().round(2)
std_df = merged.groupby(['tree_depth', 'method', 'model', 'fairness']).agg('std').reset_index().round(2)

combined = mean_df.merge(std_df, on=['tree_depth', 'method', 'model', 'fairness'])
for col in ['gap', 'solve_time', 'dr_disparity', 'realized_disparity', 
                     'oos_regret', 'realized_outcome_oos']:
    combined[col] = combined.apply(lambda row: f'{row[f"{col}_x"]:.2f} ± {row[f"{col}_y"]:.2f}', axis=1)
    combined = combined.drop(columns=[f'{col}_{i}' for i in ['x', 'y']])

In [282]:
print(combined.sort_values(by=['fairness']).to_latex(index=False))

\begin{tabular}{rllrllllll}
\toprule
 tree\_depth & method &  model &  fairness &         gap &      solve\_time & dr\_disparity & realized\_disparity &      oos\_regret & realized\_outcome\_oos \\
\midrule
          2 &     DM & RF/Log &      0.01 & 0.00 ± 0.00 & 316.31 ± 112.04 & -0.01 ± 0.01 &        0.01 ± 0.10 & 326.02 ± 106.02 &          0.76 ± 0.08 \\
          2 &     DM & RF/Log &      0.02 & 0.00 ± 0.00 & 284.67 ± 125.32 & -0.02 ± 0.01 &       -0.00 ± 0.10 & 321.20 ± 107.86 &          0.77 ± 0.08 \\
          2 &     DM & RF/Log &      0.03 & 0.00 ± 0.00 & 290.46 ± 130.17 & -0.02 ± 0.02 &       -0.01 ± 0.10 & 312.77 ± 105.23 &          0.77 ± 0.08 \\
          2 &     DM & RF/Log &      0.04 & 0.00 ± 0.00 & 280.67 ± 116.49 & -0.03 ± 0.02 &       -0.02 ± 0.09 &  303.54 ± 99.35 &          0.78 ± 0.07 \\
          2 &     DM & RF/Log &      0.05 & 0.00 ± 0.00 & 271.45 ± 123.14 & -0.03 ± 0.03 &       -0.03 ± 0.09 &  297.82 ± 96.61 &          0.79 ± 0.07 \\
          2 &     DM & 