In [None]:
import pandas as pd
from functools import reduce
ohiedf = pd.read_csv('ohie_data.csv')

- Outcome variables: 
`any_hosp_ed`: Any ED visit resulting in a hospitalization in the study period

- oregonhie_survey12m_vars -> health_gen_bin_12m: self reported health. Excelent/good/very good vs. bad/very bad

- Selection parameter: 
- oregonhie_survey12m_vars:`hhinc_cat_12m`

In [None]:
descriptive_stats = pd.read_stata('oregonhie_descriptive_vars.dta')
state_programs = pd.read_stata('oregonhie_stateprograms_vars.dta')
ed_visits = pd.read_stata('oregonhie_ed_vars.dta')
survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
survey_12m = pd.read_stata('oregonhie_survey12m_vars.dta')


descriptive_stats = descriptive_stats[['person_id', 'treatment', 'numhh_list']]

# State program enrollments occuring prior to program notification date
state_programs = state_programs[['person_id', 'snap_ever_prenotify07', 'snap_tot_hh_prenotify07',
                                'tanf_ever_prenotify07', 'tanf_tot_hh_prenotify07']]

pre_vars = ['person_id', 'any_hosp_ed'] + [c for c in ed_visits.columns.tolist() if 'pre' in c]
ed_visits = ed_visits[pre_vars]

pre_survey_vars = ['person_id', 'hhinc_cat_12m'] + [c for c in survey_responses.columns.tolist() if '_pre_' in c]
survey_responses = survey_responses[pre_survey_vars]

tables = [descriptive_stats, state_programs, ed_visits, survey_responses]

# Filter to records from single-individual homes that ontain a depression score
ohie_df = reduce(lambda left, right: pd.merge(left, right, on=['person_id'], how='inner'), tables)
ohie_df = ohie_df[(ohie_df['numhh_list'] == 'signed self up')]

In [None]:
descriptive_stats = pd.read_stata('oregonhie_descriptive_vars.dta')
state_programs = pd.read_stata('oregonhie_stateprograms_vars.dta')
ed_visits = pd.read_stata('oregonhie_ed_vars.dta')
survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
survey_12m = pd.read_stata('oregonhie_survey12m_vars.dta')


descriptive_stats = descriptive_stats[['person_id', 'treatment', 'numhh_list']]

# State program enrollments occuring prior to program notification date
state_programs = state_programs[['person_id', 'snap_ever_prenotify07', 'snap_tot_hh_prenotify07',
                                'tanf_ever_prenotify07', 'tanf_tot_hh_prenotify07']]

# Emergency department visit history
pre_vars = ['person_id'] + [c for c in ed_visits.columns.tolist() if 'pre' in c]
ed_visits = ed_visits[pre_vars]

# New conditions
post_survey_vars =  [c for c in survey_responses.columns.tolist() if 'dx_post' in c]
new_conditions = (survey_responses[post_survey_vars] == 'Yes').astype(int)
new_condition = (new_conditions.sum(axis=1) > 0).astype(int)

#  Health history
pre_survey_vars = ['person_id'] + [c for c in survey_responses.columns.tolist() if '_pre_' in c]
survey_responses = survey_responses[pre_survey_vars]
health_history = survey_responses.copy()
health_history.loc[:, 'new_chronic_condition'] = new_condition

# Below federal poverty limit as (selection variable)
survey_12m['below_federal_pov'] = (survey_12m['hhinc_pctfpl_12m'] < 100).astype(int)
survey_12m = survey_12m[['person_id', 'below_federal_pov']]

tables = [descriptive_stats, state_programs, ed_visits, health_history, survey_12m]

# Filter to records from single-individual homes that ontain a depression score
ohie_df = reduce(lambda left, right: pd.merge(left, right, on=['person_id'], how='inner'), tables)
ohie_df = ohie_df[(ohie_df['numhh_list'] == 'signed self up') ]

ohie_df['Y'] = ohie_df['new_chronic_condition']

# Assign treatment variable
ohie_df.rename(columns={'treatment': 'D'}, inplace=True)

# Drop unneeded variables and convert to categorical
ohie_df.drop(columns=['person_id', 'numhh_list', 'new_chronic_condition'], inplace=True)
cat_columns = ohie_df.select_dtypes(['category']).columns
ohie_df[cat_columns] = ohie_df[cat_columns].apply(lambda x: x.cat.codes)

# Remove ~10 rows that contain a missing value
ohie_df = ohie_df.dropna()


In [None]:
print('ATE: ', ohie_df[ohie_df['D'] == 1]['Y'].mean() - ohie_df[ohie_df['D'] == 0]['Y'].mean())
print('Baserate: ', ohie_df['Y'].mean())

## Checking on compliance: how many lottery winners actually end up signing up? 

In [None]:
medicare_status = survey_12m[['person_id', 'ins_ohp_12m']]
fulldf = pd.merge(medicare_status, ohie_df, on='person_id', how='inner')
treatment_group = fulldf[fulldf['treatment'] == 'Selected']
control_group = fulldf[fulldf['treatment'] == 'Not selected']

print('treatment signup rate:', (treatment_group['ins_ohp_12m'] == 'Yes').mean())
print('control signup rate:', (control_group['ins_ohp_12m'] == 'Yes').mean())

### What are the actual treatment effects? 

In [None]:
#Ground truth treatment effect (in thresholded outcomes cutoff at 15)
ohiedf[(ohiedf['D'] == 1)]['Y'].mean()-ohiedf[(ohiedf['D'] == 0)]['Y'].mean()

In [None]:
#Ground truth treatment effect (in raw scores on 20-point continuous assessment)
ohiedf[(ohiedf['D'] == 1)]['phqtot_inp'].mean()-ohiedf[(ohiedf['D'] == 0)]['phqtot_inp'].mean()

In [None]:
print('Treated: ', (ohiedf['D'] == 1).sum())
print('Control: ', (ohiedf['D'] == 0).sum())