In [160]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json
import csv

import gmaps

# Census API Keys
from config import api_key
from config import gkey



In [39]:
response = requests.get(f"https://api.census.gov/data/2019/pep/population?get=POP&for=state:*&key={api_key}").json()
response_df = pd.DataFrame(response)
rename_df = response_df.rename(columns={0: "Population", 1: "State Number"})
pop_df = rename_df.drop([0])
pop_df.to_csv("state_pop.csv", encoding="utf-8", index=False, header=True)
print(len(pop_df))
pop_df.head()

52


Unnamed: 0,Population,State Number
1,2976149,28
2,6137428,29
3,1068778,30
4,1934408,31
5,3080156,32


In [40]:
income_response = requests.get(f"https://api.census.gov/data/timeseries/poverty/saipe?get=SAEPOVALL_PT,SAEMHI_PT,NAME&for=state:*&time=2018&key={api_key}").json()
income_response_df = pd.DataFrame(income_response)
rename_inc_df = income_response_df.rename(columns={1:"Median Household Income Est.", 2: "State", 4: "State Number"})
income_df = rename_inc_df.drop([0])
income_df = income_df.drop(columns= [0, 3])
income_df.to_csv("pop_income.csv", encoding="utf-8", index=False, header=True)
print(len(income_df))
income_df.head()

51


Unnamed: 0,Median Household Income Est.,State,State Number
1,49881,Alabama,1
2,74912,Alaska,2
3,59079,Arizona,4
4,47094,Arkansas,5
5,75250,California,6


In [60]:
merged_df = pd.merge(pop_df, income_df, on= "State Number")
merged_df = merged_df.set_index("State")
merged_df.to_csv("merged_pop_income_state.csv", encoding="utf-8", index=False, header=True)
print(len(merged_df))
merged_df.head()

51


Unnamed: 0_level_0,Population,State Number,Median Household Income Est.
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mississippi,2976149,28,44740
Missouri,6137428,29,54412
Montana,1068778,30,55248
Nebraska,1934408,31,59724
Nevada,3080156,32,58740


In [175]:
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv") 
df.to_csv('covid_data.csv')
df = df.drop(columns= "fips")
df = df.rename(columns={"state":"State", "cases": "Cases", "deaths": "Deaths"})
df = df.groupby(df["State"]).max()
df = df.reset_index()
covid_merge = pd.merge(merged_df, df, on = "State")
covid_merge = covid_merge.drop(columns= ["State Number", "date"])
covid_merge = covid_merge.sort_values("State")
covid_merge["Mortality Rate"] = covid_merge["Deaths"]/covid_merge["Cases"]*100
covid_merge

Unnamed: 0,State,Population,Median Household Income Est.,Cases,Deaths,Mortality Rate
27,Alabama,4903185,49881,113632,1996,1.756547
28,Alaska,731545,74912,5401,28,0.518423
29,Arizona,7278717,59079,196925,4698,2.38568
30,Arkansas,3017804,47094,55652,663,1.191332
31,California,39512223,75250,659991,11984,1.815782
32,Colorado,5758736,71949,54658,1915,3.503604
35,Connecticut,3565287,76366,51519,4460,8.657
33,Delaware,973764,65467,16770,600,3.577818
34,District of Columbia,705749,82533,13469,602,4.469523
36,Florida,21477737,55433,593278,10167,1.713699


In [153]:
state_size = pd.read_csv("state_size.csv")
state_size = state_size.drop(columns="index")
state_size = state_size.rename(columns={"state": "State"})
covid = pd.merge(covid_merge, state_size, on="State")
covid["Population"] = np.dtype('int64').type(covid["Population"])
covid["Population Density (people/square mile)"] = covid["Population"]/covid["Land Area (square miles)"]
covid

Unnamed: 0,State,Population,Median Household Income Est.,Cases,Deaths,Mortality Rate,Land Area (square miles),Population Density (people/square mile)
0,Alabama,4903185,49881,110361,1936,1.754243,50744,96.625907
1,Alaska,731545,74912,5175,27,0.521739,571951,1.279034
2,Arizona,7278717,59079,194931,4535,2.326464,113635,64.053478
3,Arkansas,3017804,47094,53487,619,1.157291,52068,57.9589
4,California,39512223,75250,640499,11523,1.799066,155959,253.350066
5,Colorado,5758736,71949,53674,1906,3.551068,103718,55.523014
6,Connecticut,3565287,76366,51267,4456,8.691751,4845,735.86935
7,Delaware,973764,65467,16593,593,3.573796,1954,498.34391
8,Florida,21477737,55433,579924,9757,1.682462,53927,398.274278
9,Georgia,10617423,58634,224681,4691,2.087849,57906,183.356181


In [171]:
covid["Lat"] = ""
covid["Lng"] = ""
covid

Unnamed: 0,State,Population,Median Household Income Est.,Cases,Deaths,Mortality Rate,Land Area (square miles),Population Density (people/square mile),Lat,Lng
0,Alabama,4903185,49881,110361,1936,1.754243,50744,96.625907,,
1,Alaska,731545,74912,5175,27,0.521739,571951,1.279034,,
2,Arizona,7278717,59079,194931,4535,2.326464,113635,64.053478,,
3,Arkansas,3017804,47094,53487,619,1.157291,52068,57.9589,,
4,California,39512223,75250,640499,11523,1.799066,155959,253.350066,,
5,Colorado,5758736,71949,53674,1906,3.551068,103718,55.523014,,
6,Connecticut,3565287,76366,51267,4456,8.691751,4845,735.86935,,
7,Delaware,973764,65467,16593,593,3.573796,1954,498.34391,,
8,Florida,21477737,55433,579924,9757,1.682462,53927,398.274278,,
9,Georgia,10617423,58634,224681,4691,2.087849,57906,183.356181,,


In [172]:
# Heatmap of Covid-19 Cases in the US by state

# Set parameters
params = {
    "key": gkey
}

# Hit the Google Places API for each city's coordinates.
for index, row in covid.iterrows():
    target_state = row['State']
    target_url = ('https://maps.googleapis.com/maps/api/geocode/json?'
    'address={0}&key={1}').format(target_state, gkey)
    geo_data = requests.get(target_url).json()

    # assemble url and make API request
    response = requests.get(target_url, params=params).json()
#     print(json.dumps(response, indent= 4, sort_keys= True))
    
    # print the response url, avoid doing for public github repos in order to avoid exposing key
#     print(response)
    
    # extract results 
    results = response['results']
    
    covid.loc[index, 'Lat'] = geo_data["results"][0]["geometry"]["location"]["lat"]
    covid.loc[index, 'Lng'] = geo_data["results"][0]["geometry"]["location"]["lng"]

In [173]:
covid

Unnamed: 0,State,Population,Median Household Income Est.,Cases,Deaths,Mortality Rate,Land Area (square miles),Population Density (people/square mile),Lat,Lng
0,Alabama,4903185,49881,110361,1936,1.754243,50744,96.625907,32.3182,-86.9023
1,Alaska,731545,74912,5175,27,0.521739,571951,1.279034,64.2008,-149.494
2,Arizona,7278717,59079,194931,4535,2.326464,113635,64.053478,34.0489,-111.094
3,Arkansas,3017804,47094,53487,619,1.157291,52068,57.9589,35.2011,-91.8318
4,California,39512223,75250,640499,11523,1.799066,155959,253.350066,36.7783,-119.418
5,Colorado,5758736,71949,53674,1906,3.551068,103718,55.523014,39.5501,-105.782
6,Connecticut,3565287,76366,51267,4456,8.691751,4845,735.86935,41.6032,-73.0877
7,Delaware,973764,65467,16593,593,3.573796,1954,498.34391,38.9108,-75.5277
8,Florida,21477737,55433,579924,9757,1.682462,53927,398.274278,27.6648,-81.5158
9,Georgia,10617423,58634,224681,4691,2.087849,57906,183.356181,32.1656,-82.9001


In [174]:
covid.to_csv("complete_dataset.csv", encoding="utf-8", index=False, header=True)
covid

Unnamed: 0,State,Population,Median Household Income Est.,Cases,Deaths,Mortality Rate,Land Area (square miles),Population Density (people/square mile),Lat,Lng
0,Alabama,4903185,49881,110361,1936,1.754243,50744,96.625907,32.3182,-86.9023
1,Alaska,731545,74912,5175,27,0.521739,571951,1.279034,64.2008,-149.494
2,Arizona,7278717,59079,194931,4535,2.326464,113635,64.053478,34.0489,-111.094
3,Arkansas,3017804,47094,53487,619,1.157291,52068,57.9589,35.2011,-91.8318
4,California,39512223,75250,640499,11523,1.799066,155959,253.350066,36.7783,-119.418
5,Colorado,5758736,71949,53674,1906,3.551068,103718,55.523014,39.5501,-105.782
6,Connecticut,3565287,76366,51267,4456,8.691751,4845,735.86935,41.6032,-73.0877
7,Delaware,973764,65467,16593,593,3.573796,1954,498.34391,38.9108,-75.5277
8,Florida,21477737,55433,579924,9757,1.682462,53927,398.274278,27.6648,-81.5158
9,Georgia,10617423,58634,224681,4691,2.087849,57906,183.356181,32.1656,-82.9001
