# Resources

HCAHPS: https://data.medicare.gov/Hospital-Compare/Patient-survey-HCAHPS-Hospital/dgck-syfz

Population: https://www2.census.gov/programs-surveys/popest/datasets/2010-2016/cities/totals/sub-est2016_all.csv
    
Mortality: https://www.cdc.gov/nchs/data_access/vitalstatsonline.htm#Mortality_Multiple , https://catalog.data.gov/dataset/cdc-wonder-mortality-multiple-cause-of-death , https://wonder.cdc.gov/wonder/help/WONDER-API.html

In [2]:
# Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
# Import patient survey data into pandas from CSV
survey_raw_df = pd.read_csv('../Resources/Patient_survey__HCAHPS__-_Hospital.csv')

survey_raw_df.head()

Unnamed: 0,Provider ID,Hospital Name,Address,City,State,ZIP Code,County Name,Phone Number,HCAHPS Measure ID,HCAHPS Question,...,HCAHPS Answer Percent,HCAHPS Answer Percent Footnote,HCAHPS Linear Mean Value,Number of Completed Surveys,Number of Completed Surveys Footnote,Survey Response Rate Percent,Survey Response Rate Percent Footnote,Measure Start Date,Measure End Date,Location
0,240018,MAYO CLINIC HEALTH SYSTEM - RED WING,"701 HEWITT BOULEVARD, PO BOX 95",RED WING,MN,55066,GOODHUE,6512675000,H_COMP_3_LINEAR_SCORE,Staff responsiveness - linear mean score,...,Not Applicable,,93,347,,33,,07/01/2016,06/30/2017,"701 HEWITT BOULEVARD, PO BOX 95\nRED WING, MN\..."
1,231334,PROMEDICA HERRICK HOSPITAL,500 E POTTAWATAMIE STREET,TECUMSEH,MI,49286,LENAWEE,5174243000,H_QUIET_HSP_SN_P,Patients who reported that the area around the...,...,Not Available,1 - The number of cases/patients is too few to...,Not Applicable,Not Available,1 - The number of cases/patients is too few to...,Not Available,1 - The number of cases/patients is too few to...,07/01/2016,06/30/2017,"500 E POTTAWATAMIE STREET\nTECUMSEH, MI\n(42.0..."
2,231332,BRONSON LAKEVIEW HOSPITAL,408 HAZEN STREET,PAW PAW,MI,49079,VAN BUREN,2696571400,H_COMP_1_A_P,"Patients who reported that their nurses ""Alway...",...,85,11 - There were discrepancies in the data coll...,Not Applicable,130,11 - There were discrepancies in the data coll...,30,11 - There were discrepancies in the data coll...,07/01/2016,06/30/2017,"408 HAZEN STREET\nPAW PAW, MI\n(42.221009, -85..."
3,240010,MAYO CLINIC HOSPITAL ROCHESTER,1216 SECOND STREET SOUTHWEST,ROCHESTER,MN,55902,OLMSTED,5072555123,H_RECMND_PY,"Patients who reported YES, they would probably...",...,15,,Not Applicable,708,,38,,07/01/2016,06/30/2017,"1216 SECOND STREET SOUTHWEST\nROCHESTER, MN\n(..."
4,231330,MARLETTE REGIONAL HOSPITAL,2770 MAIN STREET,MARLETTE,MI,48453,SANILAC,9896354000,H_COMP_5_LINEAR_SCORE,Communication about medicines - linear mean score,...,Not Applicable,,76,119,,38,,07/01/2016,06/30/2017,"2770 MAIN STREET\nMARLETTE, MI\n(43.332579, -8..."


In [4]:
# Only include survey columns with "State", "City", "Hospital Name", 
# "Patient Survey Star Rating", "Number of Completed Surveys", "Survey Response Rate Percent", "Location"

data_df = pd.DataFrame()
data_df["Hospital"] = survey_raw_df["Hospital Name"]
data_df["Location"] = survey_raw_df["Location"]
data_df["City"] = survey_raw_df["City"]
data_df["City Population"] = ""
data_df["County"] = survey_raw_df["County Name"]
data_df["County Population"] = ""
data_df["Mortality Rates (by county)"] = ""
data_df["State"] = survey_raw_df["State"]
data_df["Survey Rating"] = ""
data_df["Surveys Completed"] = survey_raw_df["Number of Completed Surveys"]
data_df["Survey Response Rate (%)"] = survey_raw_df["Survey Response Rate Percent"]


# Only include survey rows with hospital "HCAHPS Measure ID" == "H_STAR_RATING"




# print dataframe
data_df.head()

Unnamed: 0,Hospital,Location,City,City Population,County,County Population,Mortality Rates (by county),State,Survey Rating,Surveys Completed,Survey Response Rate (%)
0,MAYO CLINIC HEALTH SYSTEM - RED WING,"701 HEWITT BOULEVARD, PO BOX 95\nRED WING, MN\...",RED WING,,GOODHUE,,,MN,,347,33
1,PROMEDICA HERRICK HOSPITAL,"500 E POTTAWATAMIE STREET\nTECUMSEH, MI\n(42.0...",TECUMSEH,,LENAWEE,,,MI,,Not Available,Not Available
2,BRONSON LAKEVIEW HOSPITAL,"408 HAZEN STREET\nPAW PAW, MI\n(42.221009, -85...",PAW PAW,,VAN BUREN,,,MI,,130,30
3,MAYO CLINIC HOSPITAL ROCHESTER,"1216 SECOND STREET SOUTHWEST\nROCHESTER, MN\n(...",ROCHESTER,,OLMSTED,,,MN,,708,38
4,MARLETTE REGIONAL HOSPITAL,"2770 MAIN STREET\nMARLETTE, MI\n(43.332579, -8...",MARLETTE,,SANILAC,,,MI,,119,38


In [7]:
# Import census population estimate (2016) data into pandas from CSV
population_raw_df = pd.read_csv('../Resources/sub-est2016_all.csv', encoding='latin-1')

population_raw_df.head()


Unnamed: 0,SUMLEV,STATE,COUNTY,PLACE,COUSUB,CONCIT,PRIMGEO_FLAG,FUNCSTAT,NAME,STNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016
0,40,1,0,0,0,0,0,A,Alabama,Alabama,4779736,4780131,4785492,4799918,4815960,4829479,4843214,4853875,4863300
1,162,1,0,124,0,0,0,A,Abbeville city,Alabama,2688,2688,2683,2685,2647,2631,2619,2616,2603
2,162,1,0,460,0,0,0,A,Adamsville city,Alabama,4522,4522,4517,4495,4472,4447,4428,4395,4360
3,162,1,0,484,0,0,0,A,Addison town,Alabama,758,756,754,753,748,748,747,740,738
4,162,1,0,676,0,0,0,A,Akron town,Alabama,356,356,355,345,345,342,337,337,334


In [None]:
# add column to survey_df called "Population (2016 est.)"

# loop through population_raw_df {population_city} starting with {survey_city} (we may have to do this state by state)

# add city population to column in survey_df


In [None]:
# optional: create a county_df with county level data (population, hospital count, hospital per capita, mortality rate, etc)

In [None]:
# use Google API to search hospital address and gather county name

# insert column on survey_df and add county name

In [None]:
# make API call to CDC WONDER to pull mortality rates by county

# add to survey_df

# average rates and add to state_df

In [None]:
#visualize relationships between county mortality rates, hospital count, hospital star rating, 
# hospitals per capita, using matplotlib (opt. seaborn, tableau plot on map)