In [35]:
import pandas as pd
from functools import reduce
ohiedf = pd.read_csv('ohie_data.csv')

In [38]:
descriptive_stats = pd.read_stata('oregonhie_descriptive_vars.dta')
state_programs = pd.read_stata('oregonhie_stateprograms_vars.dta')
ed_visits = pd.read_stata('oregonhie_ed_vars.dta')
survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
survey_12m = pd.read_stata('oregonhie_survey12m_vars.dta')



descriptive_stats = descriptive_stats[['person_id', 'treatment', 'numhh_list']]

# State program enrollments occuring prior to program notification date
state_programs = state_programs[['person_id', 'snap_ever_prenotify07', 'snap_tot_hh_prenotify07',
                                'tanf_ever_prenotify07', 'tanf_tot_hh_prenotify07']]

pre_vars = ['person_id'] + [c for c in ed_visits.columns.tolist() if 'pre' in c]
ed_visits = ed_visits[pre_vars]

pre_survey_vars = ['person_id', 'phqtot_inp'] + [c for c in survey_responses.columns.tolist() if '_pre_' in c]
survey_responses = survey_responses[pre_survey_vars]

tables = [descriptive_stats, state_programs, ed_visits, survey_responses]

# Filter to records from single-individual homes that ontain a depression score
ohie_df = reduce(lambda left, right: pd.merge(left, right, on=['person_id'], how='inner'), tables)
ohie_df = ohie_df[(ohie_df['numhh_list'] == 'signed self up') & ~ohie_df['phqtot_inp'].isna()]

## Checking on compliance: how many lottery winners actually end up signing up? 

In [45]:
medicare_status = survey_12m[['person_id', 'ins_ohp_12m']]
fulldf = pd.merge(medicare_status, ohie_df, on='person_id', how='inner')
treatment_group = fulldf[fulldf['treatment'] == 'Selected']
control_group = fulldf[fulldf['treatment'] == 'Not selected']

print('treatment signup rate:', (treatment_group['ins_ohp_12m'] == 'Yes').mean())
print('control signup rate:', (control_group['ins_ohp_12m'] == 'Yes').mean())

treatment signup rate: 0.1643118785975929
control signup rate: 0.05643513789581205


### What are the actual treatment effects? 

In [9]:
#Ground truth treatment effect (in thresholded outcomes cutoff at 15)
ohiedf[(ohiedf['D'] == 1)]['Y'].mean()-ohiedf[(ohiedf['D'] == 0)]['Y'].mean()

-0.003408385422284027

In [12]:
#Ground truth treatment effect (in raw scores on 20-point continuous assessment)
ohiedf[(ohiedf['D'] == 1)]['phqtot_inp'].mean()-ohiedf[(ohiedf['D'] == 0)]['phqtot_inp'].mean()

-0.2743940081743297

In [25]:
print('Treated: ', (ohiedf['D'] == 1).sum())
print('Control: ', (ohiedf['D'] == 0).sum())

Treated:  3816
Control:  3907
