In [3]:
import sys
import warnings
import numpy as np
import pandas as pd
import argparse
import pprint
import plotly.graph_objects as go
import plotly.express as p

In [47]:
states_abbreviations_mapper = {
    'AK': 'Alaska',
    'AL': 'Alabama',
    'AR': 'Arkansas',
    'AS': 'American Samoa',
    'AZ': 'Arizona',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DC': 'District of Columbia',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'GU': 'Guam',
    'HI': 'Hawaii',
    'IA': 'Iowa',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'MA': 'Massachusetts',
    'MD': 'Maryland',
    'ME': 'Maine',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MO': 'Missouri',
    'MP': 'Northern Mariana Islands',
    'MS': 'Mississippi',
    'MT': 'Montana',
    'NA': 'National',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'NE': 'Nebraska',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NV': 'Nevada',
    'NY': 'New York',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'PR': 'Puerto Rico',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VA': 'Virginia',
    'VI': 'Virgin Islands',
    'VT': 'Vermont',
    'WA': 'Washington',
    'WI': 'Wisconsin',
    'WV': 'West Virginia',
    'WY': 'Wyoming'
}


# mapbox_style = "mapbox://styles/plotlymapbox/cjvprkf3t1kns1cqjxuxmwixz"
mapbox_style = 'dark'
mapbox_access_token = open('.mapbox_token').readlines()[0]

# Import from S3:
merged_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/Merged_df.csv.gz', index_col=0).fillna('')
per_day_stats_by_state = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/per_day_stats_by_state.csv.gz', index_col=0)
per_day_stats_by_country = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/per_day_stats_by_country.csv.gz', index_col=0)
per_day_stats_by_county = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/per_day_stats_by_county.csv.gz', index_col=0)


merged_df['Date'] = pd.to_datetime(merged_df['Date'])
per_day_stats_by_state['Date'] = pd.to_datetime(
    per_day_stats_by_state['Date']).fillna('')
per_day_stats_by_country['Date'] = pd.to_datetime(
    per_day_stats_by_country['Date']).fillna('')
per_day_stats_by_county['Date'] = pd.to_datetime(
    per_day_stats_by_county['Date']).fillna('')

date_mapper = pd.DataFrame(
    merged_df['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')
min_date = min(date_mapper.index)
max_date = max(date_mapper.index)

centroid_country_mapper = merged_df.groupby(
    'Country/Region').apply(lambda x: x.sort_values('Cases')[::-1].iloc[0][['Lat', 'Long']])
centroid_country_mapper = {x[0]: {'Long': x[1]['Long'], 'Lat': x[1]['Lat']}
                           for x in centroid_country_mapper.iterrows()}


In [55]:
merged_df[merged_df['County'] != ""]['Cases']

2370

In [123]:
def get_graph_state(date_int, group, metrics, figure):
    # Get initial zoom and shit if the figure is already drawn
    if not figure:
        lat = 15.74
        lon = -1.4
        zoom = 1.6
    elif "layout" in figure:
        lat = figure["layout"]["mapbox"]['center']["lat"]
        lon = figure["layout"]["mapbox"]['center']["lon"]
        zoom = figure["layout"]["mapbox"]["zoom"]

    print(group, metrics)

    official_date = date_mapper.iloc[date_int]['Date']
    print(date_int, official_date)

    

    if group == 'Country/Region':
        sub_df = merged_df[(merged_df['Date'] == official_date) & (merged_df[group]!="")].groupby(
            group).sum().reset_index()
        sub_df['Lat'] = sub_df['Country/Region'].apply(
            lambda x: centroid_country_mapper[x]['Lat'])
        sub_df['Long'] = sub_df['Country/Region'].apply(
            lambda x: centroid_country_mapper[x]['Long'])
        sizeref = 2. * merged_df.groupby(
            ['Date', group]).sum().max()['Cases'] / (20 ** 2)

    
    elif group == 'Province/State':
        sub_df = merged_df[(merged_df['Date'] == official_date) & (merged_df[group]!="") & (merged_df['County'] == '')].groupby(
            group).sum().reset_index()
        county_df = merged_df[(merged_df['Date'] == official_date) & (merged_df['County'] != '')].groupby(
            group).sum().reset_index()
        sub_df = sub_df.merge(county_df,on=['Province/State'],suffixes=('','_county'),how='outer').fillna(0) 
        sub_df['Cases'] = sub_df['Cases'] + sub_df['Cases_county']
        sub_df['Deaths'] = sub_df['Deaths'] + sub_df['Deaths_county']
        sub_df['Recovery'] = sub_df['Deaths'] + sub_df['Recovery_county']
        sizeref = 2. * merged_df.groupby(
            ['Date', group]).sum().max()['Cases'] / (50 ** 2)
    
    else:
        sub_df = merged_df[(merged_df['Date'] == official_date) &  (merged_df['County'] != '')].groupby(
            group).sum().reset_index()
        sizeref = 2. * merged_df.groupby(
            ['Date', group]).sum().max()['Cases'] / (50 ** 2)
        
        
    sub_df['Active'] = sub_df['Cases'] - sub_df['Deaths'] - sub_df['Recovery']
    sub_df['Text_Cases'] = sub_df[group] + '<br>Total Cases at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['Cases'].apply(lambda x: "{:,}".format(int(x)))
    sub_df['Text_Death'] = sub_df[group] + '<br>Total Deaths at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['Deaths'].apply(lambda x: "{:,}".format(int(x)))
    sub_df['Text_Recover'] = sub_df[group] + '<br>Total Recoveries at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['Recovery'].apply(lambda x: "{:,}".format(int(x)))
    
    sub_df['Text_Active'] = sub_df[group] + '<br>Total Active at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['Active'].apply(lambda x: "{:,}".format(int(x)))
    
    # Has to take in a figure state eventually
    
    fig = go.Figure()
    
    if 'cases' in metrics:
        fig.add_trace(go.Scattermapbox(
            lon=sub_df['Long'] +
            np.random.normal(0, .02, len(sub_df['Long'])),
            lat=sub_df['Lat'] +
            np.random.normal(0, .02, len(sub_df['Lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Cases'],
            hoverinfo='text',
            mode='markers',
            marker=dict(
                sizeref=sizeref,
                sizemin=3,
                size=sub_df['Cases'],
                color='yellow')))
    
    if 'deaths' in metrics:
        fig.add_trace(go.Scattermapbox(
            lon=sub_df['Long'] +
            np.random.normal(0, .02, len(sub_df['Long'])),
            lat=sub_df['Lat'] +
            np.random.normal(0, .02, len(sub_df['Lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Death'],
            hoverinfo='text',
            mode='markers',
            marker=dict(
                sizeref=sizeref,
                sizemin=3,
                size=sub_df['Deaths'],
                color='red')))
    
    if 'recovery' in metrics: 
        fig.add_trace(go.Scattermapbox(
            lon=sub_df['Long'] +
            np.random.normal(0, .02, len(sub_df['Long'])),
            lat=sub_df['Lat'] +
            np.random.normal(0, .02, len(sub_df['Lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Recover'],
            hoverinfo='text',
            mode='markers',
            marker=dict(
                sizeref=sizeref,
                sizemin=3,
                size=sub_df['Recovery'],
                color='green')))
    if 'active' in metrics:
            fig.add_trace(go.Scattermapbox(
            lon=sub_df['Long'] +
            np.random.normal(0, .02, len(sub_df['Long'])),
            lat=sub_df['Lat'] +
            np.random.normal(0, .02, len(sub_df['Lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Active'],
            hoverinfo='text',
            mode='markers',
            marker=dict(
                sizeref=sizeref,
                sizemin=3,
                size=sub_df['Active'],
                color='orange')))

    layout = dict(
        title_text='The Corona is Coming',
        autosize=True,
        showlegend=False,
        mapbox=dict(
            accesstoken=mapbox_access_token,
            style=mapbox_style,
            zoom=zoom,
            center=dict(lat=lat, lon=lon)
        ),
        hovermode="closest",
        margin=dict(r=0, l=0, t=0, b=0),
        dragmode="pan",
    )

    fig.update_layout(layout)
    return fig

fig = get_graph_state(max_date,'Province/State',['active'],None)
fig

Province/State ['active']
60 2020-03-22 00:00:00


In [72]:
sub = state_df.merge(county_df,on=['Province/State'],suffixes=('','_county')) 
sub['Cases'] = sub['Cases'] + sub['Cases_county']
sub['Deaths'] = sub['Deaths'] + sub['Deaths_county']
sub['Recovery'] = sub['Deaths'] + sub['Recovery_county']

In [71]:
sub

Unnamed: 0,Province/State,Lat,Long,Cases,Deaths,Recovery,Lat_county,Long_county,Cases_county,Deaths_county,Recovery_county
0,Arizona,33.7298,-111.4312,152,2,0,98.1655,-335.3797,0,0,0
1,California,36.1162,-119.6816,1642,30,0,968.5389,-3143.5417,0,0,0
2,Colorado,39.0598,-105.3111,476,6,0,355.5969,-949.4127,0,0,0
3,Connecticut,41.5978,-72.7554,223,5,0,83.0426,-146.6474,0,0,0
4,Delaware,39.3185,-75.5071,47,0,0,39.5393,-75.6674,0,0,0
5,District of Columbia,38.8974,-77.0268,102,2,0,38.9072,-77.0369,0,0,0
6,Florida,27.7663,-81.6868,830,13,0,368.2484,-1074.5497,0,0,0
7,Georgia,33.0406,-83.6431,600,23,0,305.4701,-756.1757,0,0,0
8,Hawaii,21.0943,-157.4983,48,0,0,21.307,-157.8584,0,0,0
9,Illinois,40.3495,-88.9861,1049,9,0,168.4279,-352.1932,0,0,0


In [63]:
lat = 15.74
lon = -1.4
zoom = 1.6
group = 'Province/State'
sizeref = 2. * merged_df.groupby(
        ['Date', group]).sum().max()['Cases'] / (20 ** 2)
fig = go.Figure()
fig.add_trace(go.Scattermapbox(
    lon=sub_df['Long'] +
    np.random.normal(0, .02, len(sub_df['Long'])),
    lat=sub_df['Lat'] +
    np.random.normal(0, .02, len(sub_df['Lat'])),
    customdata=sub_df[group],
    textposition='top right',
    text=sub_df['Text_Cases'],
    hoverinfo='text',
    mode='markers',
    marker=dict(
        sizeref=sizeref,
        sizemin=3,
        size=sub_df['Cases'],
        color='yellow')))
    
layout = dict(
    title_text='The Corona is Coming',
    autosize=True,
    showlegend=False,
    mapbox=dict(
        accesstoken=mapbox_access_token,
        style=mapbox_style,
        zoom=zoom,
        center=dict(lat=lat, lon=lon)
    ),
    #hovermode="closest",
    margin=dict(r=0, l=0, t=0, b=0),
    dragmode="pan",
)

fig.update_layout(layout)
fig

In [27]:
list(sub_df['Cases'])

[6,
 1,
 29,
 990,
 12,
 12,
 2,
 1,
 437,
 0,
 64,
 340,
 1,
 2,
 576,
 101,
 22,
 1,
 6,
 827,
 742,
 10,
 9,
 76,
 4469,
 5,
 3,
 0,
 296,
 129,
 66,
 1,
 23,
 0,
 1,
 0,
 1356,
 252,
 146,
 168,
 4,
 318,
 482,
 1273,
 140,
 67790,
 1018,
 2,
 64,
 16,
 75,
 17,
 0,
 631,
 935,
 93,
 8,
 14,
 125,
 77,
 10,
 3,
 4,
 26,
 138,
 0,
 25,
 21,
 6,
 4,
 5,
 0,
 14,
 959,
 21,
 1,
 0,
 7,
 69,
 10,
 112,
 525,
 0,
 75,
 24,
 1,
 1,
 0,
 0,
 26,
 4,
 79,
 32,
 47,
 0,
 3,
 18,
 17,
 46,
 6,
 20,
 1,
 2,
 245,
 760,
 353,
 133,
 539,
 0,
 19,
 19,
 9,
 2,
 5,
 32,
 57,
 136,
 1,
 1140,
 10,
 5,
 49,
 1,
 41,
 572,
 0,
 17,
 27,
 2,
 76,
 174,
 1227]