In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states 
import gmaps

In [2]:
api_key = "1383ed6d1c178075bb6c33d544f4a17d700c9607"

In [3]:
c = Census(api_key, year=2016)

In [35]:
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'state:*'})


# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["State", "Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]

census_pd

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,1,Alabama,4841164.0,38.6,44758.0,24736.0,868666.0,17.943329,3.810633
1,2,Alaska,736855.0,33.6,74444.0,34191.0,72826.0,9.883356,4.090221
2,4,Arizona,6728577.0,37.1,51340.0,26686.0,1165636.0,17.323663,3.71508
3,5,Arkansas,2968472.0,37.7,42336.0,23401.0,542431.0,18.273071,3.139326
4,6,California,38654206.0,36.0,63783.0,31458.0,6004257.0,15.533257,4.355868
5,8,Colorado,5359295.0,36.4,62520.0,33230.0,637938.0,11.903394,3.203686
6,9,Connecticut,3588570.0,40.6,71755.0,39906.0,360464.0,10.044781,4.319409
7,10,Delaware,934695.0,39.6,61017.0,31118.0,109448.0,11.709488,3.602138
8,11,District of Columbia,659009.0,33.8,72935.0,48781.0,112060.0,17.00432,5.056987
9,12,Florida,19934451.0,41.6,48900.0,27598.0,3139258.0,15.747903,4.023266


In [5]:
gr_population = census_pd.groupby('State')
gr_population["Population"]


<pandas.core.groupby.groupby.SeriesGroupBy object at 0x10c1454a8>

In [9]:
state_table = pd.read_csv("csv_data/state_abbreviations.csv")
state_table

Unnamed: 0,CAPS,Name,State
0,ALABAMA,Alabama,AL
1,ALASKA,Alaska,AK
2,ARIZONA,Arizona,AZ
3,ARKANSAS,Arkansas,AR
4,CALIFORNIA,California,CA
5,COLORADO,Colorado,CO
6,CONNECTICUT,Connecticut,CT
7,DELAWARE,Delaware,DE
8,FLORIDA,Florida,FL
9,GEORGIA,Georgia,GA


In [36]:
merge_table = pd.merge(census_pd, state_table, on="Name", how="outer")
merge_table

Unnamed: 0,State_x,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate,CAPS,State_y
0,1,Alabama,4841164.0,38.6,44758.0,24736.0,868666.0,17.943329,3.810633,ALABAMA,AL
1,2,Alaska,736855.0,33.6,74444.0,34191.0,72826.0,9.883356,4.090221,ALASKA,AK
2,4,Arizona,6728577.0,37.1,51340.0,26686.0,1165636.0,17.323663,3.71508,ARIZONA,AZ
3,5,Arkansas,2968472.0,37.7,42336.0,23401.0,542431.0,18.273071,3.139326,ARKANSAS,AR
4,6,California,38654206.0,36.0,63783.0,31458.0,6004257.0,15.533257,4.355868,CALIFORNIA,CA
5,8,Colorado,5359295.0,36.4,62520.0,33230.0,637938.0,11.903394,3.203686,COLORADO,CO
6,9,Connecticut,3588570.0,40.6,71755.0,39906.0,360464.0,10.044781,4.319409,CONNECTICUT,CT
7,10,Delaware,934695.0,39.6,61017.0,31118.0,109448.0,11.709488,3.602138,DELAWARE,DE
8,11,District of Columbia,659009.0,33.8,72935.0,48781.0,112060.0,17.00432,5.056987,,
9,12,Florida,19934451.0,41.6,48900.0,27598.0,3139258.0,15.747903,4.023266,FLORIDA,FL


In [39]:
newtable_df = merge_table.rename(columns={"State_x": "Index", "Name": "Name", "Population": "Population",
                                      "Median Age": "Median Age", "Household Income": "Household Income", "Per Capita Income": "BPer Capita Income",
                                         "Poverty Count": "Poverty Count", "Unemployment Rate": "Unemployment Rate", "CAPS":"CAPS", "State_y":"State"})

newtable_df

Unnamed: 0,Index,Name,Population,Median Age,Household Income,BPer Capita Income,Poverty Count,Poverty Rate,Unemployment Rate,CAPS,State
0,1,Alabama,4841164.0,38.6,44758.0,24736.0,868666.0,17.943329,3.810633,ALABAMA,AL
1,2,Alaska,736855.0,33.6,74444.0,34191.0,72826.0,9.883356,4.090221,ALASKA,AK
2,4,Arizona,6728577.0,37.1,51340.0,26686.0,1165636.0,17.323663,3.71508,ARIZONA,AZ
3,5,Arkansas,2968472.0,37.7,42336.0,23401.0,542431.0,18.273071,3.139326,ARKANSAS,AR
4,6,California,38654206.0,36.0,63783.0,31458.0,6004257.0,15.533257,4.355868,CALIFORNIA,CA
5,8,Colorado,5359295.0,36.4,62520.0,33230.0,637938.0,11.903394,3.203686,COLORADO,CO
6,9,Connecticut,3588570.0,40.6,71755.0,39906.0,360464.0,10.044781,4.319409,CONNECTICUT,CT
7,10,Delaware,934695.0,39.6,61017.0,31118.0,109448.0,11.709488,3.602138,DELAWARE,DE
8,11,District of Columbia,659009.0,33.8,72935.0,48781.0,112060.0,17.00432,5.056987,,
9,12,Florida,19934451.0,41.6,48900.0,27598.0,3139258.0,15.747903,4.023266,FLORIDA,FL


In [29]:
hospitalgen = pd.read_csv("csv_data/Hospital General Information.csv")
hospitalgen.head()

Unnamed: 0,Provider ID,Hospital Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes
1,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,Acute Care Hospitals,Government - Hospital District or Authority,Yes
2,10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,LAUDERDALE,Acute Care Hospitals,Government - Hospital District or Authority,Yes
3,10007,MIZELL MEMORIAL HOSPITAL,702 N MAIN ST,OPP,AL,36467,COVINGTON,Acute Care Hospitals,Voluntary non-profit - Private,Yes
4,10008,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,LUVERNE,AL,36049,CRENSHAW,Acute Care Hospitals,Proprietary,Yes


In [8]:
hospitalstr = pd.read_csv("csv_data/Structural Measures - Hospital.csv")
hospitalstr.head()


Unnamed: 0,Provider ID,Hospital Name,Address,City,State,ZIP Code,County Name,Measure Name,Measure ID,Measure Response
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Able to receive lab results electronically,OP_12,Yes
1,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,"Able to track patients' lab results, tests, an...",OP_17,No
2,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,Able to receive lab results electronically,OP_12,Yes
3,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,"Able to track patients' lab results, tests, an...",OP_17,Yes
4,10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,LAUDERDALE,Able to receive lab results electronically,OP_12,Yes


In [9]:
hospitaldetail = pd.read_csv("csv_data/HCAHPS - Hospital.csv")
hospitaldetail.head()



Unnamed: 0,Provider ID,Hospital Name,Address,City,State,ZIP Code,County Name
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON
1,10005,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL
2,10006,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,LAUDERDALE
3,10007,MIZELL MEMORIAL HOSPITAL,702 N MAIN ST,OPP,AL,36467,COVINGTON
4,10008,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,LUVERNE,AL,36049,CRENSHAW


In [22]:
count = hospitaldetail["Hospital Name"].count()
count

4744

In [11]:
len(set(zip(hospitaldetail['State'],hospitaldetail['Hospital Name'])))

4744

In [13]:
state_counts = hospitaldetail["State"].value_counts()
state_counts.head()

TX    396
CA    345
FL    187
IL    174
NY    174
Name: State, dtype: int64

In [26]:
grouped_df = hospitaldetail.groupby('State',as_index =False)
count_df = grouped_df["Hospital Name"].count()
count_df





Unnamed: 0,State,Hospital Name
0,AK,22
1,AL,94
2,AR,77
3,AS,1
4,AZ,80
5,CA,345
6,CO,82
7,CT,32
8,DC,8
9,DE,8


In [17]:
#highest_hospital = df.loc[["State", "Berry", "Hudson",
                                #"Mcdonald", "Morales"], ["id", "first_name", "Phone Number"]]
#print(richardson_to_morales)

In [41]:
hospital_table = pd.merge(newtable_df, count_df, on="State", how="outer")
hospital_table

Unnamed: 0,Index,Name,Population,Median Age,Household Income,BPer Capita Income,Poverty Count,Poverty Rate,Unemployment Rate,CAPS,State,Hospital Name
0,1.0,Alabama,4841164.0,38.6,44758.0,24736.0,868666.0,17.943329,3.810633,ALABAMA,AL,94.0
1,2.0,Alaska,736855.0,33.6,74444.0,34191.0,72826.0,9.883356,4.090221,ALASKA,AK,22.0
2,4.0,Arizona,6728577.0,37.1,51340.0,26686.0,1165636.0,17.323663,3.71508,ARIZONA,AZ,80.0
3,5.0,Arkansas,2968472.0,37.7,42336.0,23401.0,542431.0,18.273071,3.139326,ARKANSAS,AR,77.0
4,6.0,California,38654206.0,36.0,63783.0,31458.0,6004257.0,15.533257,4.355868,CALIFORNIA,CA,345.0
5,8.0,Colorado,5359295.0,36.4,62520.0,33230.0,637938.0,11.903394,3.203686,COLORADO,CO,82.0
6,9.0,Connecticut,3588570.0,40.6,71755.0,39906.0,360464.0,10.044781,4.319409,CONNECTICUT,CT,32.0
7,10.0,Delaware,934695.0,39.6,61017.0,31118.0,109448.0,11.709488,3.602138,DELAWARE,DE,8.0
8,11.0,District of Columbia,659009.0,33.8,72935.0,48781.0,112060.0,17.00432,5.056987,,,
9,72.0,Puerto Rico,3529385.0,39.4,19606.0,11688.0,1577075.0,44.684131,6.428315,,,


In [48]:
#emergency_df = hospitalgen.groupby('State',as_index =False)
#emcount_df = emergency_df["Emergency Services"].count()
#emcount_df
#emergencydf.groupby('hospitalgen')['Emergency Services'].value_counts().unstack(1)