<a href="https://colab.research.google.com/github/katherinewilner/Visualizing-COVID-19-in-US/blob/master/Trend_Data_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:

# import packages
import pandas as pd
import plotly.express as px

# import libraries to access geojson file (for animated choropleth map)
from urllib.request import urlopen
import json

In [0]:
# data from JHU CSSE
data_covid = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-14-2020.csv"


In [0]:
# make dataframe
df_covid_0414 = pd.read_csv(data_covid)

In [0]:
# keep only US data
df_covid_0414_us = df_covid_0414[df_covid_0414["Country_Region"]== "US"]

In [0]:
# save csv data from us census population
data_population = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv"

In [0]:
# import county population total data from 2019
df_countypop = pd.read_csv(data_population,encoding='latin1')

In [0]:
# convert state and county columns to strings
df_countypop["STATE"] = df_countypop["STATE"].astype(str)
df_countypop["COUNTY"] = df_countypop["COUNTY"].astype(str)

In [0]:
# fill in preceding zeros to make state 2 digits and county 3 digits
df_countypop["STATE"] = df_countypop["STATE"].str.zfill(2)
df_countypop["COUNTY"] = df_countypop["COUNTY"].str.zfill(3)

In [0]:
# sum state and county FIPS to create a column that contains the full county fips code
df_countypop["FIPS"] = df_countypop["STATE"] + df_countypop["COUNTY"]

In [0]:
df_covid_0414_us = df_covid_0414_us[df_covid_0414_us["FIPS"].notnull()]

In [0]:
# convert JHU df FIPS to integer
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].astype(int)
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].astype(str)

In [0]:
# fill in preceding zeros to make FIPS five digits
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].str.zfill(5)

In [0]:
# merge 2019 population data onto covid-19 df
df_covid_0414_us = pd.merge(df_covid_0414_us,
                           df_countypop[["FIPS","POPESTIMATE2019"]],
                           on = "FIPS",
                           how = "left")

In [0]:
# make column for number of cases per 1000 people and deaths per 1000 people
df_covid_0414_us["cases_per1000"] = (df_covid_0414_us["Confirmed"]/df_covid_0414_us["POPESTIMATE2019"])*1000
df_covid_0414_us["deaths_per1000"] = (df_covid_0414_us["Deaths"]/df_covid_0414_us["POPESTIMATE2019"])*1000

In [0]:
# round per capita values
df_covid_0414_us["cases_per1000"] = round(df_covid_0414_us["cases_per1000"], 2)
df_covid_0414_us["deaths_per1000"] = round(df_covid_0414_us["deaths_per1000"], 2)

In [0]:
df_covid_0414_us.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,POPESTIMATE2019,cases_per1000,deaths_per1000
0,45001,Abbeville,South Carolina,US,2020-04-14 23:33:31,34.223334,-82.461707,9,0,0,9,"Abbeville, South Carolina, US",24527.0,0.37,0.0
1,22001,Acadia,Louisiana,US,2020-04-14 23:33:31,30.295065,-92.414197,104,5,0,99,"Acadia, Louisiana, US",62045.0,1.68,0.08
2,51001,Accomack,Virginia,US,2020-04-14 23:33:31,37.767072,-75.632346,15,0,0,15,"Accomack, Virginia, US",32316.0,0.46,0.0
3,16001,Ada,Idaho,US,2020-04-14 23:33:31,43.452658,-116.241552,538,9,0,529,"Ada, Idaho, US",481587.0,1.12,0.02
4,19001,Adair,Iowa,US,2020-04-14 23:33:31,41.330756,-94.471059,1,0,0,1,"Adair, Iowa, US",7152.0,0.14,0.0


In [0]:
# set up mapbox access token
px.set_mapbox_access_token("pk.eyJ1Ijoia2F0eXdpbG5lciIsImEiOiJjazlhZjV5czMwYzgwM2hzMnplaXFlajQ5In0.NNa7BgyvWXMIp5Y4vVPnKA")

In [0]:
# bubble map of number of cases
bubble_map = px.scatter_mapbox(df_covid_0414_us, 
                               lat = "Lat",
                               lon = "Long_",
                               size = "Confirmed",
                               hover_data = ["Confirmed", "Deaths", "cases_per1000", "deaths_per1000"] ,
                               hover_name = "Combined_Key",
                               center = dict(lat=37.0902, 
                                            lon= -95.7129), # center of the US
                               zoom = 2.5,
                               title = "Confirmed COVID-19 Cases in the United States",
                               labels = {"Confirmed": "Cumulative Confirmed Cases",
                                         "Deaths": "Cumulative Confirmed Deaths",
                                         "cases_per1000": "Cases per 1,000 People",
                                         "deaths_per1000": "Deaths per 1,000 People"
                                        } 
                              )

In [0]:
bubble_map

In [0]:
# copy row and append row equal to the number of cases for that county
df_covid_0414_repeat = df_covid_0414_us.reindex(df_covid_0414_us.index.repeat(df_covid_0414_us["Confirmed"])).reset_index()

In [0]:
# rename Long_ column to long and Lat column to lat to upload to mapbox
df_covid_0414_repeat = df_covid_0414_repeat.rename(columns = {"Lat": "lat",
                                                                     "Long_": "lon"})

In [0]:
density_heatmap = px.density_mapbox(df_covid_0414_repeat, # dataframe with data for the heatmap 
                                    lat = "lat", 
                                    lon = "lon", 
                                    z = "Confirmed", 
                                    hover_data = ["Confirmed", "Deaths", "cases_per1000", "deaths_per1000"],
                                    hover_name = "Combined_Key",
                                    center = dict(lat=37.0902, 
                                                  lon= -95.7129), # center of the US
                                    zoom = 2.5, 
                                    labels = {"Confirmed": "Cumulative Confirmed Cases",
                                         "Deaths": "Cumulative Confirmed Deaths",
                                         "cases_per1000": "Cases per 1,000 People",
                                         "deaths_per1000": "Deaths per 1,000 People"
                                        }, 
                                    color_continuous_scale= "Inferno",
                                    title = "Density Heatmap of Confirmed COVID-19 Cases in the US",
                                    radius = 25,
                                   )

In [0]:
# instead, we can view if we export as an html
density_heatmap.write_html("density_heatmap_covid19_april14.html")

In [0]:
density_heatmap

In [0]:
# save geojson file in our notebook
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    us_counties_shape = json.load(response)

In [0]:
# upload the nyt covid-19 data
covid_nyt_data = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"

In [0]:
import pandas as pd


In [0]:
df_county_nyt = pd.read_csv(covid_nyt_data)

In [0]:
df_county_nyt.

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [0]:
# convert fips to an integer then string then forward fill 0s to get 5 digits

#remove null values
df_county_nyt = df_county_nyt[df_county_nyt["fips"].notnull()]
# edit datatypes
df_county_nyt["fips"] = df_county_nyt["fips"].astype(int)
df_county_nyt["fips"] = df_county_nyt["fips"].astype(str)
df_county_nyt["fips"] = df_county_nyt["fips"].str.zfill(5)

In [0]:
# convert date column to datetime
df_county_nyt["date"] = pd.to_datetime(df_county_nyt["date"])

In [0]:
# make column with month name
df_county_nyt["month_name"] = df_county_nyt["date"].dt.month_name()

In [0]:
# make column with month day
df_county_nyt["day_of_month"] = df_county_nyt["date"].dt.day

In [0]:
# combine month and day into a string
df_county_nyt["month_day_name"] = df_county_nyt["month_name"] + " " + df_county_nyt["day_of_month"].astype(str)

In [0]:
# merge 2019 population data onto nyt covid-19 df
df_county_nyt = pd.merge(df_county_nyt,
                         df_countypop[["FIPS","POPESTIMATE2019"]],
                         left_on = "fips",
                         right_on = "FIPS",
                         how = "left")

In [0]:
# make columns to calculate number of cases and deaths per 100000 residents
# round the values to the 3rd decimal

df_county_nyt["cases_per100000"] = round(((df_county_nyt["cases"]/df_county_nyt["POPESTIMATE2019"])*100000),3)
df_county_nyt["deaths_per100000"] = round(((df_county_nyt["deaths"]/df_county_nyt["POPESTIMATE2019"])*100000),3)

In [0]:
print(px.colors.sequential.Plasma)

['#0d0887', '#46039f', '#7201a8', '#9c179e', '#bd3786', '#d8576b', '#ed7953', '#fb9f3a', '#fdca26', '#f0f921']


In [0]:
['#0d0887', '#46039f', '#7201a8', '#9c179e', '#bd3786', '#d8576b', '#ed7953', '#fb9f3a', '#fdca26', '#f0f921']

['#0d0887',
 '#46039f',
 '#7201a8',
 '#9c179e',
 '#bd3786',
 '#d8576b',
 '#ed7953',
 '#fb9f3a',
 '#fdca26',
 '#f0f921']

In [0]:
# make a logai=rithmic color scale
covid_colorscale = [
        [0, '#0d0887'],       #0 cases
        [1./1000, '#7201a8'], #10 cases
        [1./100, '#bd3786'],  #100 cases
        [1./10, '#ed7953'],   #1000 cases
        [1., '#fdca26'],      #10000 cases
]

In [0]:
# update plotly version
!pip install plotly --upgrade plotly

Requirement already up-to-date: plotly in /usr/local/lib/python3.6/dist-packages (4.6.0)


In [0]:
import plotly.express as px

In [0]:
# upload the nyt covid-19 data
covid_nyt_data = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"

In [0]:
# save geojson file in our notebook
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    us_counties_shape = json.load(response)

In [0]:
# make a logai=rithmic color scale
covid_colorscale = [
        [0, '#0d0887'],       #0 cases
        [1./1000, '#7201a8'], #10 cases
        [1./100, '#bd3786'],  #100 cases
        [1./10, '#ed7953'],   #1000 cases
        [1., '#fdca26'],      #10000 cases
]

In [33]:
# make a map of covid cases on april 14, 2020
covid_uscounty_color = px.choropleth(df_county_nyt, # dataframe with data for choropleth
                                     geojson=us_counties_shape, # shape, geospatial data geojson
                                     locations='fips', # column in geojson that denotes the shapes
                                     color='cases_per100000', # column in df that denotes the color scale
                                     animation_group = "month_day_name",
                                     animation_frame = "month_day_name",
                                     hover_name = df_county_nyt["county"] + ", " + df_county_nyt["state"],
                                     hover_data = ["cases", "deaths", "cases_per100000", "deaths_per100000"],
                                     color_continuous_scale=covid_colorscale, # custom color scale to better show exponential growth
                                     scope="usa", # scope of map (world, USA, or any continent)
                                     labels={'cases':'Cummulative Reported COVID-19 Cases',
                                             "deaths": "Cummulative Reported COVID-19 Deaths",
                                             "month_day_name": "Date",
                                             "fips": "FIPS Code",
                                            "cases_per100000": "Cummulative Reported Cases per 100,000 People",
                                             "deaths_per100000": "Cummulative Reported Deaths per 100,000 People"
                                            }, # renaming any columns used
                                     title = "Spread of COVID-19 Cases in US Counties Per Capita<br>January 21, 2020-April 15, 2020"
                                          
                          )

ValueError: ignored