In [1]:
import pandas as pd
from functools import reduce
ohiedf = pd.read_csv('ohie_data.csv')

- Outcome variables: 
`any_hosp_ed`: Any ED visit resulting in a hospitalization in the study period

- oregonhie_survey12m_vars -> health_gen_bin_12m: self reported health. Excelent/good/very good vs. bad/very bad

- Selection parameter: 
- oregonhie_survey12m_vars:`hhinc_cat_12m`

In [174]:


().mean() # removal criteria

0.19382824804463308

In [170]:
descriptive_stats = pd.read_stata('oregonhie_descriptive_vars.dta')
state_programs = pd.read_stata('oregonhie_stateprograms_vars.dta')
ed_visits = pd.read_stata('oregonhie_ed_vars.dta')
survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
survey_12m = pd.read_stata('oregonhie_survey12m_vars.dta')



descriptive_stats = descriptive_stats[['person_id', 'treatment', 'numhh_list']]

# State program enrollments occuring prior to program notification date
state_programs = state_programs[['person_id', 'snap_ever_prenotify07', 'snap_tot_hh_prenotify07',
                                'tanf_ever_prenotify07', 'tanf_tot_hh_prenotify07']]

pre_vars = ['person_id', 'any_hosp_ed'] + [c for c in ed_visits.columns.tolist() if 'pre' in c]
ed_visits = ed_visits[pre_vars]

pre_survey_vars = ['person_id', 'hhinc_cat_12m'] + [c for c in survey_responses.columns.tolist() if '_pre_' in c]
survey_responses = survey_responses[pre_survey_vars]

tables = [descriptive_stats, state_programs, ed_visits, survey_responses]

# Filter to records from single-individual homes that ontain a depression score
ohie_df = reduce(lambda left, right: pd.merge(left, right, on=['person_id'], how='inner'), tables)
ohie_df = ohie_df[(ohie_df['numhh_list'] == 'signed self up')]

In [207]:
descriptive_stats = pd.read_stata('oregonhie_descriptive_vars.dta')
state_programs = pd.read_stata('oregonhie_stateprograms_vars.dta')
ed_visits = pd.read_stata('oregonhie_ed_vars.dta')
survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
survey_12m = pd.read_stata('oregonhie_survey12m_vars.dta')

pre_survey_vars =  ['person_id'] + [c for c in survey_responses.columns.tolist() if 'dx_pre' in c]
survey_responses = survey_responses[pre_survey_vars]

descriptive_stats = descriptive_stats[['person_id', 'treatment', 'numhh_list']]

# State program enrollments occuring prior to program notification date
state_programs = state_programs[['person_id', 'snap_ever_prenotify07', 'snap_tot_hh_prenotify07',
                                'tanf_ever_prenotify07', 'tanf_tot_hh_prenotify07']]

pre_vars = ['person_id', 'any_hosp_ed'] + [c for c in ed_visits.columns.tolist() if 'pre' in c]
ed_visits = ed_visits[pre_vars]

pre_survey_vars = ['person_id'] + [c for c in survey_responses.columns.tolist() if '_pre_' in c]
survey_responses = survey_responses[pre_survey_vars]

# Use whether below federal poverty limit as selection variable
survey_12m['below_federal_pov'] = (survey_12m['hhinc_pctfpl_12m'] < 100).astype(int)
survey_12m = survey_12m[['person_id', 'below_federal_pov']]

tables = [descriptive_stats, state_programs, ed_visits, survey_responses, survey_12m]

# Filter to records from single-individual homes that ontain a depression score
ohie_df = reduce(lambda left, right: pd.merge(left, right, on=['person_id'], how='inner'), tables)
ohie_df = ohie_df[(ohie_df['numhh_list'] == 'signed self up') ]

ohie_df['Y'] = ohie_df['any_hosp_ed']

# Assign treatment variable
ohie_df.rename(columns={'treatment': 'D'}, inplace=True)

# Drop unneeded variables and convert to categorical
ohie_df.drop(columns=['person_id', 'numhh_list', 'any_hosp_ed'], inplace=True)
cat_columns = ohie_df.select_dtypes(['category']).columns
ohie_df[cat_columns] = ohie_df[cat_columns].apply(lambda x: x.cat.codes)

# Remove ~10 rows that contain a missing value
ohie_df = ohie_df.dropna()



In [217]:

survey_responses = pd.read_stata('oregonhie_inperson_vars.dta')
post_survey_vars =   [c for c in survey_responses.columns.tolist() if 'dx_post' in c]
survey_responses = survey_responses[post_survey_vars]

survey_responses = (survey_responses == 'Yes').astype(int)


In [221]:
survey_responses.sum(axis=1)

0        0
1        0
2        0
3        0
4        0
        ..
20740    0
20741    1
20742    0
20743    0
20744    0
Length: 20745, dtype: int64

In [206]:
ohie_df[ohie_df['D'] == 1]['Y'].mean() - ohie_df[ohie_df['D'] == 0]['Y'].mean()



-0.005205580900151233

In [201]:
((ohie_df['D'] == 1) & (ohie_df['below_federal_pov'] == 0)).mean() / (ohie_df['D'] == 1).mean()



0.7750848279205041

## Checking on compliance: how many lottery winners actually end up signing up? 

In [45]:
medicare_status = survey_12m[['person_id', 'ins_ohp_12m']]
fulldf = pd.merge(medicare_status, ohie_df, on='person_id', how='inner')
treatment_group = fulldf[fulldf['treatment'] == 'Selected']
control_group = fulldf[fulldf['treatment'] == 'Not selected']

print('treatment signup rate:', (treatment_group['ins_ohp_12m'] == 'Yes').mean())
print('control signup rate:', (control_group['ins_ohp_12m'] == 'Yes').mean())

treatment signup rate: 0.1643118785975929
control signup rate: 0.05643513789581205


### What are the actual treatment effects? 

In [196]:
#Ground truth treatment effect (in thresholded outcomes cutoff at 15)
ohiedf[(ohiedf['D'] == 1)]['Y'].mean()-ohiedf[(ohiedf['D'] == 0)]['Y'].mean()

-0.003408385422284027

In [12]:
#Ground truth treatment effect (in raw scores on 20-point continuous assessment)
ohiedf[(ohiedf['D'] == 1)]['phqtot_inp'].mean()-ohiedf[(ohiedf['D'] == 0)]['phqtot_inp'].mean()

-0.2743940081743297

In [25]:
print('Treated: ', (ohiedf['D'] == 1).sum())
print('Control: ', (ohiedf['D'] == 0).sum())

Treated:  3816
Control:  3907
