# Description

Notebook for diabetes diagnosis RDD discovery analysis. Exercises the end-to-end process of RDD discovery.

**Note**: since the Optum claims data is private, this notebook will not run and is provided for information purposes only.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import seaborn as sns
import sys

from tqdm import tqdm

In [None]:
# user imports 
sys.path.append("../../../optum-pipeline")
sys.path.append("../../")


from rddd.feat import gen_feat_df, get_descriptives
from utils.rddd import policy_tree_discovery, test_discontinuity, create_feat_df

In [None]:
# notebook magics
%load_ext autoreload

%autoreload 2

%matplotlib inline

# Load data

In [None]:
%%time
diabetes_df = pd.read_parquet("/REDACTED/diabetes_30d.parq")

# Clean data

In [None]:
%%time
diabetes_feat = gen_feat_df(diabetes_df, rdd_cols=['indicator', 'lr_fmt'], compute_age=False)

In [None]:
diabetes_feat = diabetes_feat[(diabetes_feat['lr_fmt'] > 0) & (diabetes_feat['lr_fmt'] < 20)]

In [None]:
feat_cols = list(diabetes_feat.columns)
feat_cols = feat_cols[2:]

# Run assignment tree discovery

In [None]:
test_df = diabetes_feat.copy()
test_df['lr_fmt'] = test_df['lr_fmt'].round(1)

In [None]:
import warnings
warnings.filterwarnings("ignore", module='sk.*')

In [None]:
%%time
grid_dict = {
    'lr_fmt': np.round(np.arange(5, 7.6, 0.5), 1)
}
alpha = 0.05
treat = 'indicator'
running_cols = ['lr_fmt']
random_state = 42

sample_df = test_df[feat_cols + ['indicator', 'lr_fmt']]

subgroup_dict, num_tests = policy_tree_discovery(sample_df,
                                                 running_cols=running_cols,
                                                 grid_dict=grid_dict,
                                                 treat=treat,
                                                 alpha=alpha,
                                                 rescale=False,
                                                 bw=0.4,
                                                 random_state=random_state)

In [None]:
out_dir = "/REDACTED/diabetes/"
subgroup_dict, num_tests = pickle.load(open(f"{out_dir}/diabetes_subgroup_results.pkl", "rb"))

In [None]:
sel_nodes = []
alpha = 0.05
for cutoff, nodes in subgroup_dict['lr_fmt'].items():
    for node in nodes:
        # we need to consider the root node
        if (node['llr_results'] is not None):
            if ((node['net_benefit'] > 0) or (len(node['rule_path']) == 1)) and (node['llr_results'].pvalues['z'] < alpha / num_tests):
                sel_nodes.append((cutoff, node))