In [1]:
import pandas as pd
import numpy as np
import math
import glob
import re
from datetime import date, timedelta
import io
import requests

# Standard plotly imports
import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [2]:
path = 'data'
all_files = glob.glob(path + "/*.csv")

files = []

valid = False

for filename in all_files:
    file = re.search(r'([0-9]{2}\-[0-9]{2}\-[0-9]{4})', filename)[0]
    if file == '03-22-2020':
        valid = True
    if valid:
        print(file)
        df = pd.read_csv(filename, index_col=None, header=0)
        df['date'] = pd.to_datetime(file)
        files.append(df)
df = pd.concat(files, axis=0, ignore_index=True, sort=False)
df = df[df['Country_Region'] == 'US']

03-22-2020
03-23-2020
03-24-2020
03-25-2020
03-26-2020


In [3]:
file_date = date(2020, 3, 22)
dates = []

while file_date <= date.today():
    dates.append(file_date)
    file_date += timedelta(days=1)

In [4]:
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
    'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida',
    'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
    'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
    'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
    'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
    'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
    'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
df = df[df['Province_State'].isin(states)]

In [5]:
def plot_map(data, date):

    df = data[data['date'] == date].groupby('Combined_Key').agg({'Confirmed': 'sum',
                                                                        'Long_': 'first',
                                                                        'Lat': 'first',
                                                                        'Province_State': 'first',
                                                                        'Admin2': 'first'})
    df['log_confirmed'] = np.log(df['Confirmed'])
    
    if len(data['date'].unique()) >= 7:
        week = pd.to_datetime(date) - timedelta(7)
        df['share_of_last_week'] = ((data[data['date'] == date].groupby('Admin2')['Confirmed'].sum() -
                                    data[data['date'] == week].groupby('Admin2')['Confirmed'].sum()) /
                                    data[data['date'] == date].groupby('Admin2')['Confirmed'].sum()) * 100
    else:
        df['share_of_last_week'] = [100] * len(df)
    
    df['percentage'] = df['share_of_last_week'].fillna(0).apply(lambda x: '{:.1f}'.format(x))


    fig = go.Figure(data=go.Scattergeo(
            lon = df['Long_'],
            lat = df['Lat'],
            text = df['Admin2'] + ' County, ' + df['Province_State'] + ': ' +\
                        ['{:,}'.format(i) for i in df['Confirmed']] +\
                        ' total cases, ' + df['percentage'] +\
                        '% from previous week',
            hoverinfo='text',
            mode = 'markers',
#             marker_size = np.sqrt(df['Confirmed']),
            marker = dict(reversescale = False,
                          autocolorscale = False,
                          symbol = 'circle',
                          line = dict(width=1, color='rgba(102, 102, 102)'),
                          colorscale = 'Reds',
                          cmin = 0,
                          color = df['share_of_last_week'],
                          cmax = 100,
                          size = np.sqrt(df['Confirmed']),
#                           size = [math.log(val + 1, 20) for val in df['Confirmed']],
#                           sizeref = 1/5,
                          colorbar_title="Confirmed Cases")))

    fig.update_layout(title = 'Number of cumulative confirmed cases by county, {}'.format(date),
                      geo=dict(scope='usa',
                               projection_type='albers usa',
                               showland = True,
                               landcolor = "rgb(100, 125, 100)",
                               showocean = True,
                               oceancolor = "rgb(150, 150, 250)",
                               showcountries=True,
                               showsubunits=True,
                               showlakes=True,))
    fig.show()

In [6]:
for date in dates:
    plot_map(df, date)


Comparing Series of datetimes with 'datetime.date'.  Currently, the
'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and 'the values will not compare equal to the
'datetime.date'. To retain the current behavior, convert the
'datetime.date' to a datetime with 'pd.Timestamp'.


divide by zero encountered in log

