# Geog 80 Final - Ryan Meyer
## COVID-19 Pandemic in the United States

### Section 1: Data loading and processing
First, we shall load in the covid case data, US Census 2019 population estimates, as well as some GeoJSON data containing county geometry. Some cleaning needs to be done to match the format of FIPS codes between datasets. Additionally, we will clean the population dataset and join it via census area names. Finally, we divide the joined table by the data from the US population table to yield the final dataset: a dataframe of covid cases per capita for each county for each day.

In [4]:
import pandas as pd
import json
from urllib.request import urlopen
import warnings

caseDataFile = "https://raw.githubusercontent.com/ryanpmeyer/Geog80Final/master/data/time_series_covid19_confirmed_US.csv"
populationDataFile = "https://raw.githubusercontent.com/ryanpmeyer/Geog80Final/master/data/co-est2019-alldata.csv"
geoJSONFile = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'

def getData():
  with urlopen(geoJSONFile) as response:
    counties = json.load(response)


  # get covid case data, clean FIPS code
  covid_cases_raw = pd.read_csv(caseDataFile)
  covid_cases = covid_cases_raw.dropna().reindex()
  old_fips = covid_cases['FIPS'].astype('int32').astype(str)
  max_fips_code_len = max([len(x) for x in old_fips])
  new_fips = list(old_fips)
  for i in range(len(old_fips)):
      while len(new_fips[i]) < max_fips_code_len:
          new_fips[i] = '0' + new_fips[i]
  covid_cases['FIPS'] = new_fips

  # get US population data, clean county names
  us_pop_raw = pd.read_csv(populationDataFile, encoding='latin1')
  us_pop = us_pop_raw[us_pop_raw['CTYNAME'] != us_pop_raw['STNAME']]
  strings2remove = [' County',' Borough',' Census Area',' Municipality',' Parish', ' city', ' City']
  for string in strings2remove:
      us_pop['CTYNAME'] = us_pop['CTYNAME'].str.replace(string,"",case=False)
  us_pop['CTYNAME'] = us_pop['CTYNAME'].str.strip()

  # remove data from population with duplicate locations
  indicies_to_remove = []
  for state in set(us_pop['STNAME']):
    counties_in_state = us_pop[us_pop['STNAME']==state]
    for cty in set(counties_in_state['CTYNAME']):
      rows = counties_in_state[counties_in_state['CTYNAME']==cty]
      if len(rows) > 1:
        rows_index = rows.index
        highest_pop_i = rows['POPESTIMATE2019'].argmax()
        for i in rows_index:
          if i != highest_pop_i:
            indicies_to_remove.append(i)
  us_pop.drop(index=indicies_to_remove,inplace=True)
  us_pop.reindex(copy=False)

  # join tables and divide case nums by population
  us_pop['place'] = us_pop['CTYNAME'] + ',' + us_pop['STNAME']
  covid_cases['place'] = covid_cases['Admin2'] + ',' + covid_cases['Province_State']
  joined = covid_cases.set_index('place').join(us_pop.set_index("place")['POPESTIMATE2019'],
              how='inner', lsuffix='County', rsuffix='').sort_index()
  for col in joined.columns[11:len(joined.columns)-1]:
      joined[col] = joined[col] / joined['POPESTIMATE2019']



  used_fips = set(joined['FIPS'])
  # remove data from GeoJSON without matching FIPS code in case data
  
  cleaned_features = list(counties['features'])
  for feature in counties['features']:
    if feature['id'] not in used_fips:
      cleaned_features.remove(feature)
  counties['features'] = cleaned_features

  return counties, joined

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    counties, caseData = getData()
caseData.head(5)

Unnamed: 0_level_0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,POPESTIMATE2019
place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Abbeville,South Carolina",84045001,US,USA,840,45001,Abbeville,South Carolina,US,34.223334,-82.461707,...,0.037632,0.03808,0.03808,0.038651,0.039141,0.039181,0.039426,0.03963,0.039874,24527
"Acadia,Louisiana",84022001,US,USA,840,22001,Acadia,Louisiana,US,30.295065,-92.414197,...,0.062358,0.0626,0.0626,0.063663,0.063663,0.064179,0.064115,0.064937,0.065743,62045
"Accomack,Virginia",84051001,US,USA,840,51001,Accomack,Virginia,US,37.767072,-75.632346,...,0.040537,0.040692,0.04097,0.041156,0.041466,0.04162,0.041651,0.041992,0.042053,32316
"Ada,Idaho",84016001,US,USA,840,16001,Ada,Idaho,US,43.452658,-116.241552,...,0.049887,0.05052,0.05052,0.05154,0.052184,0.052668,0.053367,0.054281,0.055076,481587
"Adair,Iowa",84019001,US,USA,840,19001,Adair,Iowa,US,41.330756,-94.471059,...,0.058865,0.059424,0.059983,0.059983,0.061521,0.061661,0.061661,0.062081,0.06264,7152


### Section 2: Choropleth analysis

Now we have data for each US county's confirmed cases per capita for each day from 1/22 - 12/2. Next we will look at a choropleth map of this data.

In [5]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipyleaflet
import ipywidgets as widgets
import datetime
import branca

NUMDAYS = 315
STARTDATE = datetime.datetime(2020,1,22)
max_caseload = max(caseData['12/2/20'])


def get_colorscale():
  return branca.colormap.linear.YlOrRd_09.scale(0, max_caseload)

def get_date_string(date):
  date_obj = STARTDATE + datetime.timedelta(days=date) # get date object of date # of days after STARTDATE
  date_string = date_obj.strftime("%x") # format date into D/M/Y
  if date_string[0] == '0': # no leading zeros in month
    date_string = date_string[1:]
  if date_string[date_string.find('/')+1] == '0': # no leading zeros in day
    date_string = date_string[0:date_string.find('/')+1] + date_string[date_string.find('/')+2:]
  return date_string

def get_styler(data_series):
    colorscale = get_colorscale()
    def style(feature):
      value = data_series.get(feature['id'])
      if value is None:
        value = -1
      return {
          'color': 'black',
          'fillColor': colorscale(value) if value != -1 else 'black'
      }
    return style

def get_choropleth(date):

  date_string = get_date_string(date)
  data_series = caseData.set_index('FIPS')[date_string]

  m = ipyleaflet.Map(center=[48,-102],zoom=4)

  choropleth_layer = ipyleaflet.GeoJSON(
      data=counties,
      style={
          'opcaity':1,
          'fillOpacity':.7,
          'weight':1
      },
      style_callback=get_styler(data_series)
  )
  m.add_layer(choropleth_layer)

  return m, choropleth_layer

def get_map_updater(layer):
  def update(date):
    date_string = get_date_string(date)
    layer.style_callback = get_styler(caseData.set_index('FIPS')[date_string])
  return update

def get_map():
  m = get_choropleth(0)
  return m

m, layer = get_map()
update = get_map_updater(layer)
interact(update, date=widgets.IntSlider(min=0, max=NUMDAYS, step=1, value=0))
m

ModuleNotFoundError: No module named 'ipyleaflet'