# Import dependencies, API key and set output file name

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime
import os
import gmaps
import seaborn as sn

# Import API key from a file that is ignored by Git (.gitignore file) so the key isn't exposed to the public
#from config import gkey

# Configure gmaps
#gmaps.configure(api_key=gkey)

## Store County Health Rankings Excel file results into DataFrame

* Load the excel file imported from https://www.countyhealthrankings.org/app/texas/2019/measure/outcomes/144/description?sort=desc-2

In [2]:
amd_df = pd.read_excel('input_data/health_data.xls',    sheet_name='AMD')
rmd_df = pd.read_excel('input_data/health_data.xls',    sheet_name='RMD')
cs_df  = pd.read_excel('input_data/county_status.xlsx', sheet_name='RVU')

In [3]:
mod_amd_df = amd_df.drop(amd_df.columns[amd_df.columns.str.contains('^9')], axis=1)
mod_amd_1df = mod_amd_df.drop(mod_amd_df.columns[mod_amd_df.columns.str.contains('^#')], axis=1)
mod_amd_2df = mod_amd_1df.drop(mod_amd_1df.columns[mod_amd_1df.columns.str.contains('^Score')], axis=1)
mod_amd_2df.reset_index(drop=True)
mod_amd_2df.head(1)

Unnamed: 0,FIPS,State,County,Life Expectancy,Life Expectancy (Black),Life Expectancy (Hispanic),Life Expectancy (White),Age-Adjusted Mortality,Age-Adjusted Mortality (Black),Age-Adjusted Mortality (Hispanic),...,% 65 and over,% African American,% American Indian/Alaskan Native,% Asian,% Native Hawaiian/Other Pacific Islander,% Hispanic,% Non-Hispanic White,% Not Proficient in English,% Female,% Rural
0,48000,Texas,,79.021563,,,,341.1,,,...,12.269075,11.900799,1.015072,5.008215,0.143934,39.41591,41.994526,7.662459,50.319754,15.300999


In [4]:
mod_rmd_df  = rmd_df.drop(rmd_df.columns[rmd_df.columns.str.contains('^9')], axis=1)
mod_rmd_1df = mod_rmd_df.drop(mod_rmd_df.columns[mod_rmd_df.columns.str.contains('^#')], axis=1)
mod_rmd_2df = mod_rmd_1df.drop(mod_rmd_1df.columns[mod_rmd_1df.columns.str.contains('^Z-Score')], axis=1)
mod_rmd_2df.reset_index(drop=True)
mod_rmd_2df.head(1)

Unnamed: 0,FIPS,State,County,Years of Potential Life Lost Rate,YPLL Rate (Black),YPLL Rate (Hispanic),YPLL Rate (White),% Fair/Poor,Physically Unhealthy Days,Mentally Unhealthy Days,...,Presence of violation,% Severe Housing Problems,Severe Housing Cost Burden,Overcrowding,Inadequate Facilities,% Drive Alone,% Drive Alone (Black),% Drive Alone (Hispanic),% Drive Alone (White),% Long Commute - Drives Alone
0,48000,Texas,,6681.323364,,,,18.173766,3.539468,3.415822,...,,17.969559,,,,80.458438,,,,37.7


In [5]:
mod_cs_df = cs_df.drop(cs_df.columns[cs_df.columns.str.contains('^Border')], axis=1)

In [6]:
merged_df = pd.merge(mod_rmd_2df,mod_amd_2df, how='inner', on='County')
cols = merged_df.columns
col_list = [col for col in cols]

In [7]:
final_merged_df = pd.merge(mod_cs_df,merged_df, how='inner', on='County')
cols = final_merged_df.columns
col_list = [col for col in cols]

In [8]:
final_merged_df.drop(['FIPS_y','State_x','State_y'],axis=1, inplace=True)

In [9]:
final_merged_df.rename(columns={'FIPS_x':'FIPS'},inplace=True)

In [10]:
final_merged_df.to_csv(r'output_data\final_merged_df.csv')

In [11]:
corr = final_merged_df.corr()

In [12]:
corr.to_csv(r'output_data\corr.csv')

In [13]:
corr_spear = final_merged_df.corr(method='spearman')

In [14]:
corr_spear.to_csv(r'output_data\corr_spear.csv')

In [15]:
#sn.heatmap(corr,annot=True)

In [16]:
#hmap

In [17]:
rural_df = final_merged_df[final_merged_df["Status"] == "Rural"]
rural_df.reset_index(drop=True)
rural_df.head(1)

Unnamed: 0,County,Status,FIPS,Years of Potential Life Lost Rate,YPLL Rate (Black),YPLL Rate (Hispanic),YPLL Rate (White),% Fair/Poor,Physically Unhealthy Days,Mentally Unhealthy Days,...,% 65 and over,% African American,% American Indian/Alaskan Native,% Asian,% Native Hawaiian/Other Pacific Islander,% Hispanic,% Non-Hispanic White,% Not Proficient in English,% Female,% Rural
0,Anderson,Rural,48001,10272.949118,12962.159784,10880.315229,9971.540573,19.529585,3.92613,3.642847,...,14.535599,20.687207,0.677162,0.900573,0.143745,17.815763,59.126098,2.715874,39.06756,67.063533


In [18]:
rural_df.to_csv(r'output_data\rural_df.csv')

In [19]:
corr = rural_df.corr()

In [20]:
corr.to_csv(r'output_data\rural_corr.csv')

In [21]:
corr_spear = rural_df.corr(method='spearman')

In [22]:
corr_spear.to_csv(r'output_data\rural_corr_spear.csv')

In [23]:
urban_df = final_merged_df[final_merged_df["Status"] == "Urban"]
urban_df.reset_index(drop=True)
urban_df.head(1)

Unnamed: 0,County,Status,FIPS,Years of Potential Life Lost Rate,YPLL Rate (Black),YPLL Rate (Hispanic),YPLL Rate (White),% Fair/Poor,Physically Unhealthy Days,Mentally Unhealthy Days,...,% 65 and over,% African American,% American Indian/Alaskan Native,% Asian,% Native Hawaiian/Other Pacific Islander,% Hispanic,% Non-Hispanic White,% Not Proficient in English,% Female,% Rural
3,Aransas,Urban,48007,9315.870361,,6088.571549,11277.610901,19.517678,3.859044,3.7431,...,27.295479,1.360863,1.243548,1.9318,0.082121,27.655248,67.171125,1.588916,50.527921,27.260558


In [24]:
urban_df.to_csv(r'output_data\urban_df.csv')

In [25]:
corr = urban_df.corr()

In [26]:
corr.to_csv(r'output_data\ubran_corr.csv')

In [27]:
corr_spear = urban_df.corr(method='spearman')

In [28]:
corr_spear.to_csv(r'output_data\urban_corr_spear.csv')