# Combined Data Exploration

In this data set, I will explore how our three key datasets (socioeconomic, health, and election) interact with each other.

In [9]:
import pandas as pd
from IPython.display import HTML, Markdown, display
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [6]:
health_df = pd.read_csv(r'../data/health_clean.csv')
voting_df = pd.read_csv(r'../data/election_clean.csv')
socioecon_df = pd.read_csv(r'../data/socioeconomic_data_clean.csv')

In [7]:
health_df.head()

Unnamed: 0,state_fips,county_fips,fips5,state_abbreviation,name,Release Year,poor_fair_health,poor_physical_days,poor_mental_days,life_exp
0,0,0,0,US,United States,2020,0.171987,3.752828,3.970321,79.08913
1,1,0,1000,AL,Alabama,2020,0.220287,4.918052,4.939753,75.416194
2,1,1,1001,AL,Autauga County,2020,0.20883,4.743889,4.654031,76.879477
3,1,3,1003,AL,Baldwin County,2020,0.175091,4.224519,4.304056,78.450258
4,1,5,1005,AL,Barbour County,2020,0.295918,5.430279,5.185594,75.341935


In [14]:
health_df.shape

(3194, 10)

In [8]:
voting_df.head()

Unnamed: 0,state_name,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,dem_majority,state_proportion
0,Alabama,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184,False,
1,Alabama,Baldwin County,83544,24578,109679,58966,0.761714,0.22409,0.537623,False,
2,Alabama,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631,False,
3,Alabama,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.57728,False,
4,Alabama,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022,False,


In [10]:
socioecon_df.head()

Unnamed: 0,State FIPS Code,County FIPS Code,5-digit FIPS Code,State,County Name,% Severe Housing Problems,% Uninsured,High school graduation rate,% Unemployed,Income Inequality,Median household income,% Rural,Residential Segregation,Median household income (White),Median household income (Black),Population
0,0,0,0,US,United States,0.179136,0.102234,0.846,0.038953,4.920018,61937.0,0.19269,46.773464,,,327167434.0
1,1,0,1000,AL,Alabama,0.143407,0.110448,0.893,0.039336,5.261136,49881.0,0.409632,50.777776,,,4887871.0
2,1,1,1001,AL,Autauga County,0.146635,0.087217,0.9,0.036291,5.234597,59338.0,0.420022,23.628395,65047.0,27643.0,55601.0
3,1,3,1003,AL,Baldwin County,0.135662,0.113334,0.863616,0.036154,4.417767,57588.0,0.422791,31.825343,59418.0,31112.0,218022.0
4,1,5,1005,AL,Barbour County,0.145833,0.122428,0.814103,0.051714,5.68141,34382.0,0.677896,23.449713,47031.0,23013.0,24881.0


In [11]:
combined_df = pd.concat([socioecon_df, voting_df, health_df], axis=1)

In [12]:
combined_df.head()

Unnamed: 0,State FIPS Code,County FIPS Code,5-digit FIPS Code,State,County Name,% Severe Housing Problems,% Uninsured,High school graduation rate,% Unemployed,Income Inequality,...,state_fips,county_fips,fips5,state_abbreviation,name,Release Year,poor_fair_health,poor_physical_days,poor_mental_days,life_exp
0,0,0,0,US,United States,0.179136,0.102234,0.846,0.038953,4.920018,...,0,0,0,US,United States,2020,0.171987,3.752828,3.970321,79.08913
1,1,0,1000,AL,Alabama,0.143407,0.110448,0.893,0.039336,5.261136,...,1,0,1000,AL,Alabama,2020,0.220287,4.918052,4.939753,75.416194
2,1,1,1001,AL,Autauga County,0.146635,0.087217,0.9,0.036291,5.234597,...,1,1,1001,AL,Autauga County,2020,0.20883,4.743889,4.654031,76.879477
3,1,3,1003,AL,Baldwin County,0.135662,0.113334,0.863616,0.036154,4.417767,...,1,3,1003,AL,Baldwin County,2020,0.175091,4.224519,4.304056,78.450258
4,1,5,1005,AL,Barbour County,0.145833,0.122428,0.814103,0.051714,5.68141,...,1,5,1005,AL,Barbour County,2020,0.295918,5.430279,5.185594,75.341935


In [None]:
# explore relationship between health and health insurance

In [None]:
# explore relationship between health, wealth, and party allegiance

In [15]:
# Opiod deaths

health_df_raw = pd.read_csv("../data/county_health.csv", 
                            low_memory=False)

list(health_df_raw)

['State FIPS Code',
 'County FIPS Code',
 '5-digit FIPS Code',
 'State Abbreviation',
 'Name',
 'Release Year',
 'County Ranked (Yes=1/No=0)',
 'Premature death raw value',
 'Premature death numerator',
 'Premature death denominator',
 'Premature death CI low',
 'Premature death CI high',
 'Premature death flag (0 = No Flag/1=Unreliable/2=Suppressed)',
 'Premature death (AIAN)',
 'Premature death CI low (AIAN)',
 'Premature death CI high (AIAN)',
 'Premature death flag (AIAN) (. = No Flag/1=Unreliable/2=Suppressed)',
 'Premature death (Asian/Pacific Islander)',
 'Premature death CI low (Asian/Pacific Islander)',
 'Premature death CI high (Asian/Pacific Islander)',
 'Premature death flag (Asian/Pacific Islander) (. = No Flag/1=Unreliable/2=Suppressed)',
 'Premature death (Black)',
 'Premature death CI low (Black)',
 'Premature death CI high (Black)',
 'Premature death flag (Black) (. = No Flag/1=Unreliable/2=Suppressed)',
 'Premature death (Hispanic)',
 'Premature death CI low (Hispanic

In [None]:
#write in columns of interest here
columns = ['Drug overdose deaths raw value',]

opiod_df = health_df_raw[columns]