# Machine Learning
### Input variables
### State abortion policy as of 8/7/22 
### State Community Health Centers
### Health insurance coverage of women
### Maternal death and mortality rates
### Legal abortions obtained out of state
### Legal abortions obtained in state of occurence
### Population distribution by sex
### Reported women not seeing a doctor in past 12 months
### Reported women having a mammogram in last 2 years
### Reported women not having a personal doctor or health care provider
### Reported women having a pap smear in the last 3 years
### State requirements for insurance coverage of contraceptives
### Crisis Pregnancy Centers (fake abortion clinics) by county
### Poverty levels by county

In [1]:
import pandas as pd

In [2]:
abortion_policy_tracker_df = pd.read_csv("Resources/All_States_Abortion_Policy_Tracker.csv", 
                                        skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80],
                                        index_col='Location')
abortion_policy_tracker_df = abortion_policy_tracker_df.drop(columns='Footnotes')
abortion_policy_tracker_df

Unnamed: 0_level_0,Status of Abortion
Location,Unnamed: 1_level_1
Alabama,Abortion Ban In Effect
Alaska,Abortion Available
Arizona,Status of pre-Roe ban unclear
Arkansas,Abortion Ban In Effect
California,Abortion Available
Colorado,Abortion Available
Connecticut,Abortion Available
Delaware,Abortion Available
District of Columbia,Abortion Available
Florida,"Abortion available, pre-viability gestational ..."


In [3]:
CHC_df = pd.read_csv("Resources/Community_Health_Center_Delivery_Sites_and_Patient_Visits.csv", 
                                        skiprows=[0,1,3,42,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79],
                                        index_col='Location')
CHC_df = CHC_df.drop(columns='Footnotes')
CHC_df = CHC_df.rename(columns={"Total CHCs": "Total Community Health Centers"})
CHC_df

Unnamed: 0_level_0,Total Community Health Centers
Location,Unnamed: 1_level_1
Alabama,17
Alaska,27
Arizona,23
Arkansas,12
California,175
Colorado,19
Connecticut,16
Delaware,3
District of Columbia,8
Florida,47


In [4]:
ins_df = pd.read_csv("Resources/Health_Ins_Coverage_of_Women_19-64_in_2020.csv", 
                         skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81], 
                         index_col='Location')
ins_df = ins_df.drop(columns=['Employer', 'Non-Group', 'Medicaid', 'Medicare', 'Military'])
ins_df = ins_df.rename(columns={"Total":"Total Insured per State"})
ins_df

Unnamed: 0_level_0,Uninsured,Total Insured per State
Location,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,165700,1462900
Alaska,29200,207200
Arizona,309000,2132900
Arkansas,83800,865900
California,1034300,11885100
Colorado,204300,1758600
Connecticut,58100,1038000
Delaware,24800,295800
District of Columbia,7600,251600
Florida,1011800,6381600


In [5]:
maternal_deaths_df = pd.read_csv("Resources/Maternal_deaths_and_mortality_rates.csv", 
                                    skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69], 
                                    index_col='Location')
maternal_deaths_df = maternal_deaths_df.drop(columns=['Maternal Mortality Rate per 100,000 live Births'])
maternal_deaths_df = maternal_deaths_df.rename(columns={"Number of Deaths": "Number Maternal Deaths"})
maternal_deaths_df

Unnamed: 0_level_0,Number Maternal Deaths
Location,Unnamed: 1_level_1
Alabama,63
Alaska,8
Arizona,67
Arkansas,44
California,135
Colorado,27
Connecticut,18
Delaware,5
District of Columbia,7
Florida,141


In [6]:
total_births_df = pd.read_csv("Resources/Total_Number_of_Births.csv", 
                                    skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72], 
                                    index_col='Location')
total_births_df = total_births_df.drop(columns='Footnotes')
total_births_df['Number of Births'] = total_births_df['Number of Births'].str.replace(',', '')
total_births_df

Unnamed: 0_level_0,Number of Births
Location,Unnamed: 1_level_1
Alabama,57647
Alaska,9469
Arizona,76947
Arkansas,35251
California,420259
Colorado,61494
Connecticut,33460
Delaware,10392
District of Columbia,8874
Florida,209671


In [7]:
total_births_df['Number of Births'] =  pd.to_numeric(total_births_df['Number of Births'])
death_div_births = (maternal_deaths_df["Number Maternal Deaths"] / total_births_df["Number of Births"])
death_div_births_mult = death_div_births * 100000
maternal_mortality_df = pd.DataFrame(data=death_div_births_mult, columns=['Maternal Mortality Rate (Per 100,000 Live Births)'])
maternal_mortality_df

Unnamed: 0_level_0,"Maternal Mortality Rate (Per 100,000 Live Births)"
Location,Unnamed: 1_level_1
Alabama,109.285826
Alaska,84.486218
Arizona,87.07292
Arkansas,124.819154
California,32.123048
Colorado,43.906723
Connecticut,53.795577
Delaware,48.113934
District of Columbia,78.882128
Florida,67.248213


In [8]:
abortion_out_of_state_df = pd.read_csv("Resources/Percentage_Legal_Abortions_Obtained_Out_of_State_Residents.csv", 
                                       skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71], 
                                       index_col='Location')
abortion_out_of_state_df = abortion_out_of_state_df.drop(columns='Footnotes')
abortion_out_of_state_df

Unnamed: 0_level_0,Abortions by Out-of-State Residents
Location,Unnamed: 1_level_1
Alabama,0.17
Alaska,0.02
Arizona,0.01
Arkansas,0.11
California,NR
Colorado,0.11
Connecticut,0.04
Delaware,0.14
District of Columbia,0.69
Florida,0.03


In [9]:
pop_by_sex_df = pd.read_csv("Resources/Population_Distribution_by_Sex.csv", 
                            skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69], 
                            index_col='Location')
pop_by_sex_df = pop_by_sex_df.drop(columns=['Footnotes', 'Male', 'Female'])
pop_by_sex_df = pop_by_sex_df.rename(columns={"Total":"Total State Population"})
pop_by_sex_df

Unnamed: 0_level_0,Total State Population
Location,Unnamed: 1_level_1
Alabama,4767100
Alaska,701700
Arizona,7098000
Arkansas,2922500
California,38642700
Colorado,5611800
Connecticut,3453300
Delaware,940300
District of Columbia,671300
Florida,20992000


In [10]:
abortion_rate_df = pd.read_csv("Resources/Rate_of_Legal_Abortions_by_State_of_Occurrence.csv", 
                               skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69], 
                               index_col='Location')
abortion_rate_df = abortion_rate_df.drop(columns='Footnotes')
abortion_rate_df = abortion_rate_df.rename(columns={"Abortion Rate": "Abortion Rate (Legal Abortions per 1000 Women)"})
abortion_rate_df

Unnamed: 0_level_0,Abortion Rate (Legal Abortions per 1000 Women)
Location,Unnamed: 1_level_1
Alabama,6.3
Alaska,8.8
Arizona,9.4
Arkansas,5.1
California,NR
Colorado,7.6
Connecticut,13.7
Delaware,11.3
District of Columbia,23.9
Florida,18.5


In [11]:
no_doctor_visit_df = pd.read_csv("Resources/Report_Not_Seeing_Doctor_in_Past_12_Months_Due_to_Cost.csv", 
                                 skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75], 
                                 index_col='Location')
no_doctor_visit_df = no_doctor_visit_df.drop(columns='Footnotes')
no_doctor_visit_df = no_doctor_visit_df.rename(columns={"All Women": "No Doctor Visit in Past 12 Months Due to Cost"})
no_doctor_visit_df

Unnamed: 0_level_0,No Doctor Visit in Past 12 Months Due to Cost
Location,Unnamed: 1_level_1
Alabama,0.12
Alaska,0.12
Arizona,0.11
Arkansas,0.12
California,0.09
Colorado,0.11
Connecticut,0.07
Delaware,0.09
District of Columbia,0.06
Florida,0.14


In [12]:
mammogram_df = pd.read_csv("Resources/Reported_Mammogram_2_Years.csv", 
                           skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73], 
                           index_col='Location')
mammogram_df = mammogram_df.drop(columns='Footnotes')
mammogram_df = mammogram_df.rename(columns={"All Women": "Women Ages 40+ Who Had a Mammogram within the Past Two Years"})
mammogram_df

Unnamed: 0_level_0,Women Ages 40+ Who Had a Mammogram within the Past Two Years
Location,Unnamed: 1_level_1
Alabama,0.74
Alaska,0.63
Arizona,0.68
Arkansas,0.7
California,0.68
Colorado,0.67
Connecticut,0.78
Delaware,0.74
District of Columbia,0.74
Florida,0.74


In [13]:
no_doctor_df = pd.read_csv("Resources/Reported_No_Personal_Doctor_or_Health_Care_Provider.csv", 
                           skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75], 
                           index_col='Location')
no_doctor_df = no_doctor_df.drop(columns='Footnotes')
no_doctor_df = no_doctor_df.rename(columns={"All Women": "Women Without a Healthcare Provider"})
no_doctor_df

Unnamed: 0_level_0,Women Without a Healthcare Provider
Location,Unnamed: 1_level_1
Alabama,0.13
Alaska,0.23
Arizona,0.19
Arkansas,0.13
California,0.19
Colorado,0.17
Connecticut,0.1
Delaware,0.12
District of Columbia,0.15
Florida,0.2


In [14]:
pap_smear_df = pd.read_csv("Resources/Reported_Pap_Smear_3_Years.csv", 
                           skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71], 
                           index_col='Location')
pap_smear_df = pap_smear_df.drop(columns='Footnotes')
pap_smear_df = pap_smear_df.rename(columns={"All Women": "Women Ages 18-64 with Pap Smear in Past Three Years"})
pap_smear_df

Unnamed: 0_level_0,Women Ages 18-64 with Pap Smear in Past Three Years
Location,Unnamed: 1_level_1
Alabama,0.74
Alaska,0.65
Arizona,0.69
Arkansas,0.68
California,0.74
Colorado,0.69
Connecticut,0.8
Delaware,0.75
District of Columbia,0.8
Florida,0.73


In [15]:
ins_contraceptives_df = pd.read_csv("Resources/State_Requirements_Ins_Coverage_Contraceptives.csv",
                                   skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74],
                                   index_col='Location')
ins_contraceptives_df = ins_contraceptives_df.drop(columns="Footnotes")
ins_contraceptives_df

Unnamed: 0_level_0,State Requires Coverage of Prescription Contraception,State Requires Coverage of Over-the-counter Methods,State Requires Coverage of Male Sterilization,State Requires Coverage of Female Sterilization,State Prohibits cost sharing
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alabama,No,No,No,No,No
Alaska,No,No,No,No,No
Arizona,Yes,No,No,No,No
Arkansas,Yes,No,No,No,No
California,Yes,Yes,No,Yes,Yes
Colorado,Yes,No,No,No,No
Connecticut,Yes,Yes,No,Yes,Yes
Delaware,Yes,Yes,No,Yes,Yes
District of Columbia,Yes,Yes,No,Yes,Yes
Florida,No,No,No,No,No


In [16]:
teen_birth_df = pd.read_csv("Resources/Teen_Birth_Rate.csv", 
                           skiprows=[0,1,3,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72], 
                           index_col='Location')
teen_birth_df = teen_birth_df.drop(columns="Footnotes")
teen_birth_df

Unnamed: 0_level_0,"Teen Birth Rate per 1,000"
Location,Unnamed: 1_level_1
Alabama,24.8
Alaska,17.7
Arizona,16.6
Arkansas,27.8
California,11.0
Colorado,12.5
Connecticut,7.6
Delaware,14.6
District of Columbia,15.6
Florida,15.2


In [17]:
crisis_center_df = pd.read_csv("CrisisCenterData/crisispregnancycenter_data.csv")
crisis_center_df

Unnamed: 0,id,store_name,store_number,contact_info,address,state,country,county,email,store_hours,dealer_website,image,additional_information,lat,lng,so_id
0,51012,Legacy Pregnancy Center,na,na,"847 Coffeen Ave\r\nSheridan, WY 82801",Wyoming,United States,Sheridan,na,na,na,na,na,44.7883,-106.9450,2
1,51011,Inside Connection,na,na,"2712 Commercial Way\r\nRock Springs, WY 82901",Wyoming,United States,Sweetwater,na,na,na,na,na,41.5825,-109.2540,2
2,51010,ABBA's House Pregnancy Medical Clinic,na,na,"2420 Watt Ct\r\nRiverton, WY 82501",Wyoming,United States,Fremont,na,na,na,na,na,43.0304,-108.4220,2
3,51009,Serenity Pregnancy Resource Center,na,na,"215 S Division St\r\nPowell, WY 82435",Wyoming,United States,Park,na,na,na,na,na,44.7514,-108.7650,2
4,51008,Heart to Heart Pregnancy Resource Clinic,na,na,"920 E Sheridan St\r\nLaramie, WY 82070",Wyoming,United States,Albany,na,na,na,na,na,41.3065,-105.5850,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2541,1005,Elmore County Pregnancy Center,na,na,"104 Company St Wetumpka, AL 36092",Alabama,United States,Elmore,na,na,na,na,na,32.5379,-86.2042,2
2542,1004,River Region Pregnancy Center,na,na,"101 E 4th St \r\nPrattville, AL 36067",Alabama,United States,Prattville,na,na,na,na,na,32.4631,-86.4722,2
2543,1003,Pregnancy Care Center,na,na,"5389 US Hwy 11 \r\nSpringville, AL 35146",Alabama,United States,St. Clair,na,na,na,na,na,33.7636,-86.4831,2
2544,1002,PRC Hueytown,na,na,"2719 19th St N \r\nHueytown, AL 35023",Alabama,United States,Jefferson,na,na,na,na,na,33.4229,-86.9850,2


In [18]:
#https://stackoverflow.com/questions/44327999/python-pandas-merge-multiple-dataframes
data_frames = [abortion_policy_tracker_df, CHC_df, ins_df, maternal_mortality_df, abortion_out_of_state_df, pop_by_sex_df, 
               abortion_rate_df, no_doctor_visit_df, mammogram_df, no_doctor_df, pap_smear_df, ins_contraceptives_df, 
               teen_birth_df]

In [19]:
from functools import reduce

In [20]:
df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Location'],
                                            how='outer'), data_frames)

In [21]:
df_merged.shape

(51, 18)

In [22]:
df_merged.head(5)

Unnamed: 0_level_0,Status of Abortion,Total Community Health Centers,Uninsured,Total Insured per State,"Maternal Mortality Rate (Per 100,000 Live Births)",Abortions by Out-of-State Residents,Total State Population,Abortion Rate (Legal Abortions per 1000 Women),No Doctor Visit in Past 12 Months Due to Cost,Women Ages 40+ Who Had a Mammogram within the Past Two Years,Women Without a Healthcare Provider,Women Ages 18-64 with Pap Smear in Past Three Years,State Requires Coverage of Prescription Contraception,State Requires Coverage of Over-the-counter Methods,State Requires Coverage of Male Sterilization,State Requires Coverage of Female Sterilization,State Prohibits cost sharing,"Teen Birth Rate per 1,000"
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Alabama,Abortion Ban In Effect,17,165700,1462900,109.285826,0.17,4767100,6.3,0.12,0.74,0.13,0.74,No,No,No,No,No,24.8
Alaska,Abortion Available,27,29200,207200,84.486218,0.02,701700,8.8,0.12,0.63,0.23,0.65,No,No,No,No,No,17.7
Arizona,Status of pre-Roe ban unclear,23,309000,2132900,87.07292,0.01,7098000,9.4,0.11,0.68,0.19,0.69,Yes,No,No,No,No,16.6
Arkansas,Abortion Ban In Effect,12,83800,865900,124.819154,0.11,2922500,5.1,0.12,0.7,0.13,0.68,Yes,No,No,No,No,27.8
California,Abortion Available,175,1034300,11885100,32.123048,NR,38642700,NR,0.09,0.68,0.19,0.74,Yes,Yes,No,Yes,Yes,11.0


In [23]:
df_merged.dtypes

Status of Abortion                                               object
Total Community Health Centers                                    int64
Uninsured                                                         int64
Total Insured per State                                           int64
Maternal Mortality Rate (Per 100,000 Live Births)               float64
Abortions by Out-of-State Residents                              object
Total State Population                                            int64
Abortion Rate (Legal Abortions per 1000 Women)                   object
No Doctor Visit in Past 12 Months Due to Cost                   float64
Women Ages 40+ Who Had a Mammogram within the Past Two Years    float64
Women Without a Healthcare Provider                             float64
Women Ages 18-64 with Pap Smear in Past Three Years             float64
State Requires Coverage of Prescription Contraception            object
State Requires Coverage of Over-the-counter Methods             

In [24]:
pd.DataFrame.to_csv(df_merged, 'merged.csv', sep=',', na_rep='.', index=True)