In [None]:
# import packages 
import pandas as pd
import numpy as np
import pandasql as psql
import re
import os

In [None]:
# datasets
# maternal mortality and health indicators by county 
maternal_mortality = pd.read_csv('data/processed/maternal_mortality_country_all_race_all_age_2019.csv')
health_indicators = pd.read_csv('data/processed/merged_health_indicators.csv')

# state level political data 
abortion_laws_index = pd.read_csv('data/raw/abortion_laws_index.csv')
state_gov_control = pd.read_csv('data/processed/state_gov_control_2017.csv')

# states data to help join state_abbrv<->state name 
states = pd.read_csv('data/raw/project_political_data - States.csv')


In [None]:
# check to make sure we're not missing any states
maternal_mortality.state_name.nunique()

50

In [None]:
#limit health indicators data to target control variables

health_indicators_target_var = health_indicators[['county_fips','low_income_pct','obgyn_provider_rate_100k',
                                                  'prenatal_care_first_trimester_pct','obesity_pre_pregnancy_pct',
                                                  'diabetes_pre_pregnancy_pct', 'hypertension_pre_pregnancy_pct' ]]

In [None]:
# merge health indicators with maternal mortality on county_fips
maternal_mortality = pd.merge(maternal_mortality,health_indicators_target_var, how = 'left', left_on ='fips', right_on = 'county_fips')

In [64]:
# merge states data to help with state abbr <-> state name joins 
maternal_mortality = pd.merge(maternal_mortality, states, how ='left', left_on = 'state_name', right_on ='State')

In [None]:
#limit abortion laws data to target variables
abortion_laws_index_target_var = abortion_laws_index[['State Abbreviation','guttmacher_index']]

# merge abortion laws with maternal mortality on state abbreviation
maternal_mortality = pd.merge(maternal_mortality, abortion_laws_index_target_var, how ='left', left_on = 'State Abbreviation', right_on ='State Abbreviation')

In [None]:
# limit state gov control to target variables
state_gov_control_target_var = state_gov_control[['state','gop_trifecta']]

# merge abortion laws with maternal mortality on state name
maternal_mortality = pd.merge(maternal_mortality, state_gov_control_target_var, how ='left', left_on = 'State', right_on ='state')

In [None]:
# review resulting DF
maternal_mortality.head()

Unnamed: 0.1,Unnamed: 0,measure_id,measure_name,location_id,location_name,fips,race_id,race_name,sex_id,sex_name,...,obgyn_provider_rate_100k,prenatal_care_first_trimester_pct,obesity_pre_pregnancy_pct,diabetes_pre_pregnancy_pct,hypertension_pre_pregnancy_pct,State Abbreviation,State,guttmacher_index,state,gop_trifecta
0,3648030,1,Deaths,614,Autauga County (Alabama),1001.0,1,Total,2,Female,...,3.5,71.8,32.1,1.1,3.0,AL,Alabama,most_restrictive,Alabama,1
1,3648036,1,Deaths,637,Baldwin County (Alabama),1003.0,1,Total,2,Female,...,19.4,78.3,28.0,1.2,3.3,AL,Alabama,most_restrictive,Alabama,1
2,3648042,1,Deaths,624,Barbour County (Alabama),1005.0,1,Total,2,Female,...,0.0,67.4,41.2,1.1,4.6,AL,Alabama,most_restrictive,Alabama,1
3,3648048,1,Deaths,603,Bibb County (Alabama),1007.0,1,Total,2,Female,...,0.0,66.4,37.5,1.1,3.1,AL,Alabama,most_restrictive,Alabama,1
4,3648054,1,Deaths,588,Blount County (Alabama),1009.0,1,Total,2,Female,...,3.4,72.3,32.8,1.2,2.9,AL,Alabama,most_restrictive,Alabama,1


In [None]:
# Check for null values
maternal_mortality.isna().sum()

Unnamed: 0                             0
measure_id                             0
measure_name                           0
location_id                            0
location_name                          0
fips                                   0
race_id                                0
race_name                              0
sex_id                                 0
sex_name                               0
age_group_id                           0
age_name                               0
cause_id                               0
cause_name                             0
year                                   0
metric_id                              0
metric_name                            0
val                                   31
upper                                 31
lower                                 31
state_name                             0
county_fips                           10
low_income_pct                        19
obgyn_provider_rate_100k              13
prenatal_care_fi

In [None]:
# More data checks on resulting DF 
maternal_mortality.describe()

Unnamed: 0.1,Unnamed: 0,measure_id,location_id,fips,race_id,sex_id,age_group_id,cause_id,year,metric_id,...,upper,lower,county_fips,low_income_pct,obgyn_provider_rate_100k,prenatal_care_first_trimester_pct,obesity_pre_pregnancy_pct,diabetes_pre_pregnancy_pct,hypertension_pre_pregnancy_pct,gop_trifecta
count,3112.0,3112.0,3112.0,3112.0,3112.0,3112.0,3112.0,3112.0,3112.0,3112.0,...,3081.0,3081.0,3102.0,3093.0,3099.0,3099.0,3099.0,3008.0,3069.0,3112.0
mean,3845659.0,1.0,3964.326157,30053.951157,1.0,2.0,22.0,962.0,2019.0,3.0,...,4.9e-05,2.5e-05,30112.270471,26.90569,9.8091,76.683349,32.025653,1.056217,2.351548,0.664846
std,116481.6,0.0,12880.776428,15129.727079,0.0,0.0,0.0,0.0,0.0,0.0,...,1.8e-05,1e-05,15087.083227,9.466455,13.519115,6.934325,4.891153,0.394139,1.097705,0.47212
min,3648030.0,1.0,574.0,1001.0,1.0,2.0,22.0,962.0,2019.0,3.0,...,1e-05,4e-06,1001.0,0.0,0.0,36.7,8.7,0.2,0.1,0.0
25%,3742156.0,1.0,1378.75,18144.5,1.0,2.0,22.0,962.0,2019.0,3.0,...,3.7e-05,1.8e-05,18155.5,20.1,0.0,72.5,29.4,0.8,1.5,0.0
50%,3845157.0,1.0,2160.5,29130.0,1.0,2.0,22.0,962.0,2019.0,3.0,...,4.6e-05,2.3e-05,29136.0,25.8,4.0,77.8,32.4,1.0,2.2,1.0
75%,3943946.0,1.0,2942.25,45023.5,1.0,2.0,22.0,962.0,2019.0,3.0,...,5.7e-05,3e-05,45024.5,32.6,16.4,81.6,35.0,1.2,2.9,1.0
max,4044456.0,1.0,94092.0,56045.0,1.0,2.0,22.0,962.0,2019.0,3.0,...,0.000182,8.8e-05,56045.0,73.3,172.5,93.0,54.7,10.7,8.5,1.0


In [None]:
# export main DF to csv
maternal_mortality.to_csv('data/processed/maternal_mortality_complete_dataset.csv')