# Description

Notebook for loading simulation results from updated neff experiments, with
sample splitting per the TMLR review request.

# 1. Imports

In [5]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import multiprocessing
import numpy as np
import os
import pandas as pd
import pingouin as pg
import pickle
import seaborn as sns
import sys

from tqdm import tqdm

# user imports
sys.path.append("../../")

from utils.pwr import *
from utils.sim import *
from utils.rddd import *

# 2. Load simulated results data

## Simulation constants

In [6]:
n_trials = 500
fuzzy_gaps = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
alpha = 0.05

low_cutoff = 0.25
hi_cutoff = 0.75

seeds = range(0, 401, 100)

In [7]:
DATA_DIR = "/data/tliu/rdsgd/"
RESULTS_DIR = "../../results/tmlr_rebuttal/"

## Baseline discovery

In [8]:
baseline_result_dict = {}

for fuzzy_gap in tqdm(fuzzy_gaps):
    error_dict = {
        'x': {
            'fp': 0,
            'lower_fn': 0,
            'upper_fn': 0
        },

        'covar': {
            'fp': 0
        },
    }
    pvals = {}
    for seed in seeds:
        with open(os.path.join(DATA_DIR, "baseline", f"seed{seed}/blended_rdd_fixed_bw_{fuzzy_gap}.pkl"), "rb") as f:
            results = pickle.load(f)
            for result in results:
                x_thresholds = [np.round(c, decimals=2) for c in result['x'].keys()]
                if low_cutoff not in x_thresholds:
                    error_dict['x']['lower_fn'] += 1
                else: 
                    x_thresholds.remove(0.25)

                if hi_cutoff not in x_thresholds:
                    error_dict['x']['upper_fn'] += 1
                else:
                    x_thresholds.remove(0.75)

                error_dict['x']['fp'] += len(x_thresholds)
                error_dict['covar']['fp'] += len(result['covar'].keys())

            
    baseline_result_dict[fuzzy_gap] = error_dict       


100%|██████████| 6/6 [00:10<00:00,  1.73s/it]


In [9]:
pickle.dump(baseline_result_dict, open(os.path.join(RESULTS_DIR, "blend_baseline_results.dict"), "wb"))

## Herlands et al. results

In [15]:
#HERLANDS_RESULT_PATH = "../../../herlands-lord3/results/"

herlands_dict = {}

herlands_trial_dict = {}

for gap in fuzzy_gaps:
    gap_dict = {
        'upper_tp': 0,
        'lower_tp': 0,
        'fp': 0,
        'tot_tests': 0
    }
    trial_gap_dict = {
        'lower': [],
        'upper': []
    }
    for seed in range(0, 500, 100):
        results = pickle.load(open(os.path.join(DATA_DIR, "herlands-lord3", "seed{}/herlands_results_gap{}.pkl".format(seed, gap)), "rb"))
        gap_dict['lower_tp'] += sum([x[0] for x in results])
        gap_dict['upper_tp'] += sum([x[1] for x in results])
        gap_dict['tot_tests'] += sum([x[2] for x in results])

        for lower, upper, *_ in results:
            trial_gap_dict['lower'].append(int(lower))
            trial_gap_dict['upper'].append(int(upper))
        
    herlands_dict[gap] = gap_dict
    herlands_trial_dict[gap] = trial_gap_dict

In [23]:
pickle.dump(herlands_dict, open(os.path.join(RESULTS_DIR, "herlands_results.dict"), "wb"), -1)

## Subgroup discovery with sample splitting

In [16]:
bw = 0.25
sample_size = 1000
seed_offset = 2000

def process_subgroup_tree_results_sample_split(fuzzy_gap, seeds):
    """Process subgroup tree results, with a holdout set"""
    pvals = {
        'trial': [],
        'cutoff': [],
        'tau_pval': [],
        'neff_pval': [],
        'rule_length': [],
    }
    for seed in seeds:
        with open(os.path.join(DATA_DIR, "subgroup", f"seed{seed}_blended_rdd_fixed_bw_{fuzzy_gap}.pkl"), "rb") as f:
            result, n_tests = pickle.load(f)    
            
            x_dict = result['x']
            x_thresholds = [np.round(c, decimals=2) for c in x_dict.keys()]
            
            for x_cutoff in x_thresholds:
                nodes = x_dict[x_cutoff]
                
                for node in nodes:
                    if node['llr_results'] is None:
                        continue
                
                    # generate an iid hold-out set
                    holdout = generate_cont_blended_rdd(
                        n=sample_size,
                        # make sure we don't use the same seed as the training set
                        seed=seed + seed_offset,
                        fuzzy_gap=fuzzy_gap
                    )

                    # filter to bandwidth, which was fixed to 0.25
                    holdout = holdout[(holdout['x'] >= x_cutoff - bw) & (holdout['x'] <= x_cutoff + bw)]

                    # apply the rule path to the holdout set
                    rule_path = node['rule_path']

                    # omit the last rule, which is the terminal node
                    for rule in rule_path[:-1]:
                        if rule.path_dir == '<':
                            holdout = holdout[holdout[rule.feature] < rule.threshold]
                        elif rule.path_dir == '>=':
                            holdout = holdout[holdout[rule.feature] >= rule.threshold]
                        elif rule.path_dir == '<=':
                            holdout = holdout[holdout[rule.feature] <= rule.threshold]
                        elif rule.path_dir == '>':
                            holdout = holdout[holdout[rule.feature] > rule.threshold]
                        elif rule.path_dir == '==':
                            holdout = holdout[holdout[rule.feature] == rule.threshold]
                        
                    # test the discovered subgroup on the holdout set
                    llr_results, _, _ = test_discontinuity(holdout, x_cutoff, 'x', treat='t', bw=bw, kernel='triangular')

                    pvals['trial'].append(seed)
                    pvals['cutoff'].append(x_cutoff)
                    pvals['neff_pval'].append(node['neff_pval'])
                    pvals['tau_pval'].append(llr_results.pvalues['z'])
                    pvals['rule_length'].append(len(node['rule_path']))
                    #pvals[seed]['x_all'][x_cutoff].append((node['llr_results'].pvalues['z'], node['neff_pval'], len(node['rule_path'])))
    
    pval_df = pd.DataFrame.from_dict(pvals)
    pval_df['fuzzy_gap'] = fuzzy_gap
    return pval_df

In [17]:
%%time
#subgroup_results_dict = {}

seeds = range(0, n_trials)

f_args = [(fuzzy_gap, seeds) for fuzzy_gap in fuzzy_gaps]
with multiprocessing.Pool(8) as p:
    results = p.starmap(process_subgroup_tree_results_sample_split, f_args)

#subgroup_results_dict = {fuzzy_gap: pvals for fuzzy_gap, pvals in results}
pval_df = pd.concat(results)

invalid value encountered in sqrt
invalid value encountered in sqrt
invalid value encountered in sqrt
invalid value encountered in sqrt
invalid value encountered in sqrt
invalid value encountered in sqrt


CPU times: user 753 ms, sys: 352 ms, total: 1.11 s
Wall time: 15min 12s


In [19]:
#pval_df.to_pickle(os.path.join(RESULTS_DIR, "subgroup_neff_pval_sample_split.df"))
#pval_df = pd.read_pickle(os.path.join(RESULTS_DIR, "subgroup_neff_pval_sample_split.df"))

## Compute corrected p-values

In [20]:
alpha = 0.05
low_cutoff = 0.25
upper_cutoff = 0.75

#tp_all = {}

nb_dict = {}

subgroup_trial_dict = {}

#pval_df = result_dict['x_all']
#pval_df.shape
for gap in tqdm(fuzzy_gaps):
    gap_dict = {
        'upper_tp': 0,
        'lower_tp': 0,
        'fp': 0,
        'tot_tests': 0,
        'tot_upper_tp': 0,
        'tot_lower_tp': 0,
        #'fp_cutoffs': set()
    }
    
    for trial in range(n_trials):
        trial_df = pval_df[(pval_df['trial'] == trial) & (pval_df['fuzzy_gap'] == gap)]
        gap_dict['tot_tests'] += trial_df.shape[0]
        
        method = 'bonf'
        #reject_neff, _ = pg.multicomp(list(trial_df['neff_pval']), method=method, alpha=alpha)
        reject_neff = trial_df['neff_pval'] < alpha

        test_dict = trial_df.groupby('cutoff')['trial'].count().to_dict()
        trial_df['n_tests'] = trial_df['cutoff'].apply(lambda x: test_dict[x])
        
        #print(reject_neff.sum())
        #print(trial_df['tau_pval'].shape)
        
        reject_z = trial_df['tau_pval'] < (alpha / trial_df['n_tests'])
        
        # original correction
        reject_z, _ = pg.multicomp(list(trial_df['tau_pval']), method=method, alpha=alpha)

        reject = (reject_z & reject_neff) | (reject_z & (trial_df['rule_length'] == 1))
        if not hasattr(reject, '__iter__'):
            reject = [reject]
        sig_df = trial_df[reject]
        
        if low_cutoff in list(sig_df['cutoff']):
            gap_dict['lower_tp'] += 1
            gap_dict['tot_lower_tp'] += sig_df[sig_df['cutoff'] == low_cutoff].shape[0]

        if upper_cutoff in list(sig_df['cutoff']):
            gap_dict['upper_tp'] += 1
            gap_dict['tot_upper_tp'] += sig_df[sig_df['cutoff'] == upper_cutoff].shape[0]
            
        # remaining sig values are false positives
        fp_df = sig_df[~sig_df['cutoff'].isin([low_cutoff, upper_cutoff])]
        #print(fp_df)
        gap_dict['fp'] += fp_df.shape[0]
        # for x in fp_df['cutoff'].unique():
        #     gap_dict['fp_cutoffs'].add(x)
        
    nb_dict[gap] = gap_dict
    #subgroup_trial_dict[gap] = trial_gap_dict
        

  0%|          | 0/6 [00:00<?, ?it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 6/6 [00:10<00:00,  1.76s/it]


In [22]:
pickle.dump(nb_dict, open(os.path.join(RESULTS_DIR, "rdsgd_neff_results_sample_split.dict"), "wb"), -1)