In [None]:
states_abbreviations_mapper = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

In [295]:
import pandas as pd
import warnings
import plotly.graph_objects as go
import numpy as np

##I know I'm setting with a copy
warnings.filterwarnings("ignore",category=pd.core.common.SettingWithCopyWarning)

BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/'
TS_CONFIRMED_CASES = 'csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
TS_DEATH_CASES = 'csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
TS_RECOVERED_CASES = 'csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'
mapbox_access_token = "pk.eyJ1IjoiandpbGxpczA3MjAiLCJhIjoiY2s4MHhoYmF6MDFoODNpcnVyNGR2bWk1bSJ9.YNwklD1Aa6DihVblHr3GVg"
mapbox_style = 'dark'

def get_time_series(url,outcome):
    time_series_df = pd.read_csv(BASE_URL+url)
    sub_df = time_series_df.set_index(
        ['Province/State','Country/Region','Lat','Long']).transpose().unstack(
            fill_value="").reset_index().rename({'level_4':'Date',0:outcome},axis=1)
    sub_df['Date_text'] = sub_df['Date']
    sub_df['Date'] = pd.to_datetime(
         sub_df['Date'], format='%m/%d/%y')
    return sub_df

def parse_into_city(df):
    df['County'] = df['Province/State'].apply(
        lambda x: x.split(', ')[0] if len(x.split(', '))==2 and type(x)==str else "")
    df['Province/State'] = df['Province/State'].apply(
        lambda x: states_abbreviations_mapper[x.split(', ')[1].replace('.','').strip()] if len(x.split(', '))==2 else x)
    return df 


confirmed_cases = get_time_series(TS_CONFIRMED_CASES,'Cases').fillna('')
confirmed_deaths = get_time_series(TS_DEATH_CASES,'Deaths').fillna('')
confirmed_recovery = get_time_series(TS_RECOVERED_CASES,'Recovery').fillna('')

#Confirmed Cases
confirmed_cases = parse_into_city(confirmed_cases)

#Confirmed Death
confirmed_deaths  = parse_into_city(confirmed_deaths)

#Confirmed Recovery
confirmed_recovery = parse_into_city(confirmed_recovery)

#Merge them all together
merged_df = confirmed_cases.merge(
    confirmed_deaths,on=["Province/State","Country/Region","Lat","Long","Date","Date_text","County"]).merge(
    confirmed_recovery,on=["Province/State","Country/Region","Lat","Long","Date","Date_text","County"])

date_mapper = pd.DataFrame(
    merged_df['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')

centroid_country_mapper = merged_df.groupby(
    'Country/Region').apply(lambda x: x.sort_values('Cases')[::-1].iloc[0][['Lat', 'Long']])
centroid_country_mapper = {x[0]: {'Long': x[1]['Long'], 'Lat': x[1]['Lat']}
                           for x in centroid_country_mapper.iterrows()}

In [296]:
def per_x_cases(grouper):
    new_cases_by_country = []
    dates = date_mapper['Date']
    sub_group = merged_df[merged_df[grouper] != ""]
    groupers = sub_group[grouper].unique()

    
    for group in groupers:
        sub_country = sub_group[sub_group[grouper] == group]
        new_cases_by_country.append(
            {grouper: group, 'Date': dates[0],
             'New Cases': sub_country.loc[sub_country['Date'] == dates[0], 'Cases'].sum(),
             'New Deaths':0,
             'New Recovery':0})
        for date_index in range(1, len(dates)):
            current_date = dates[date_index]
            day_before = dates[date_index-1]
            # print(current_date,day_before)
            t_c,t_d,t_r = sub_country.loc[sub_country['Date']
                                          == current_date,:].sum()[['Cases','Deaths','Recovery']]

            y_c,y_d,y_r = sub_country.loc[sub_country['Date']
                                              == day_before,:].sum()[['Cases','Deaths','Recovery']]

            new_cases = t_c - y_c
            new_deaths = t_d - y_d
            new_recovery = t_r - y_r
            new_cases_by_country.append(
                {grouper: group, 'Date': current_date, 'New Cases': new_cases,
                 'New Deaths':new_deaths, 'New Recovery':new_recovery})
    return pd.DataFrame(new_cases_by_country)


per_day_stats_by_state = per_x_cases('Province/State')
per_day_stats_by_country = per_x_cases('Country/Region')

per_day_stats_by_county = per_x_cases('County')

In [298]:
official_date = date_mapper.iloc[53]['Date']
sub_df = merged_df[merged_df['Date'] ==  official_date].groupby('Country/Region')

In [302]:
sub_df = sub_df.sum().reset_index()

In [304]:
sub_df

Unnamed: 0,Country/Region,Lat,Long,Cases,Deaths,Recovery
0,Afghanistan,33.0000,65.0000,16,0,0
1,Albania,41.1533,20.1683,42,1,0
2,Algeria,28.0339,1.6596,48,4,12
3,Andorra,42.5063,1.5218,1,0,1
4,Angola,-11.2027,17.8739,0,0,0
...,...,...,...,...,...,...
166,Uzbekistan,41.3775,64.5853,1,0,0
167,Venezuela,6.4238,-66.5897,10,0,0
168,Vietnam,16.0000,108.0000,56,0,16
169,Zambia,-15.4167,28.2833,0,0,0


In [317]:
merged_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/Merged_df.csv.gz', index_col=0)

In [321]:
pd.to_datetime(merged_df['Date'])

0       2020-01-22
1       2020-01-23
2       2020-01-24
3       2020-01-25
4       2020-01-26
           ...    
29702   2020-03-18
29703   2020-03-19
29704   2020-03-20
29705   2020-03-21
29706   2020-03-22
Name: Date, Length: 29707, dtype: datetime64[ns]

In [316]:
import pandas as pd
import warnings
import plotly.graph_objects as go
import numpy as np



def get_graph_state(date_int):
    # Get initial zoom and shit if the figure is already drawn
#     if not figure:

    lat = 36.017295
    lon = -39.471056
    zoom = 1.5
#     elif "layout" in figure:
#         lat = figure["layout"]["mapbox"]["lat"]
#         lon = figure["layout"]["mapbox"]["lon"]
#         zoom = figure["layout"]["mapbox"]["zoom"]

    official_date = date_mapper.iloc[date_int]['Date']
    print(date_int, official_date)
    
    sub_df = merged_df[merged_df['Date'] ==  official_date].groupby('Country/Region').sum().reset_index()
    sub_df['Lat'] = sub_df['Country/Region'].apply(lambda x: centroid_country_mapper[x]['Lat'])
    sub_df['Long'] = sub_df['Country/Region'].apply(lambda x: centroid_country_mapper[x]['Long'])
    sub_df['Text_Cases'] = sub_df['Country/Region'] + '<br>Total Cases at {} : '.format(official_date.strftime('%m/%d/%y')) + sub_df['Cases'].astype(str)
    sub_df['Text_Death'] = sub_df['Country/Region'] + '<br>Total Deaths at {} : '.format(official_date.strftime('%m/%d/%y')) + sub_df['Deaths'].astype(str)
    sub_df['Text_Recover'] = sub_df['Country/Region'] + '<br>Total Cases at {} : '.format(official_date.strftime('%m/%d/%y')) + sub_df['Recovery'].astype(str)
    sizeref = 2. * world_confirmed_cases.groupby(['Date','Country/Region']).sum().max()['Cases']/ (20 ** 2)

    
    # Has to take in a figure state eventually
    fig = go.Figure()
    fig.add_trace(go.Scattermapbox(
        lon=sub_df['Long'] +
        np.random.normal(0, .02, len(sub_df['Long'])),
        lat=sub_df['Lat'] +
        np.random.normal(0, .02, len(sub_df['Lat'])),
        customdata=sub_df['Country/Region'],
        textposition='top right',
        text=sub_df['Text_Cases'],
        hoverinfo='text',
        mode='markers',
        marker=dict(
            sizeref=sizeref,
            sizemin=3,
            size=sub_df['Cases'],
            color='yellow')))
    
    fig.add_trace(go.Scattermapbox(
        lon=sub_df['Long'] +
        np.random.normal(0, .02, len(sub_df['Long'])),
        lat=sub_df['Lat'] +
        np.random.normal(0, .02, len(sub_df['Lat'])),
        customdata=sub_df['Country/Region'],
        textposition='top right',
        text=sub_df['Text_Death'],
        hoverinfo='text',
        mode='markers',
        marker=dict(
            sizeref=sizeref,
            sizemin=3,
            size=sub_df['Deaths'],
            color='red')))
    
    fig.add_trace(go.Scattermapbox(
        lon=sub_df['Long'] +
        np.random.normal(0, .02, len(sub_df['Long'])),
        lat=sub_df['Lat'] +
        np.random.normal(0, .02, len(sub_df['Lat'])),
        customdata=sub_df['Country/Region'],
        textposition='top right',
        text=sub_df['Text_Recover'],
        hoverinfo='text',
        mode='markers',
        marker=dict(
            sizeref=sizeref,
            sizemin=3,
            size=sub_df['Recovery'],
            color='green')))

    layout = dict(
        title_text='The Corona is Coming',
        autosize=True,
        showlegend=False,
        mapbox=dict(
            accesstoken=mapbox_access_token,
            style=mapbox_style,
            zoom=zoom,
            center=dict(lat=lat, lon=lon)
        ),
        hovermode="closest",
        margin=dict(r=0, l=0, t=0, b=0),
        dragmode="pan",
    )

    fig.update_layout(layout)
    return fig

fig = get_graph_state(date_mapper.index[-1])
fig

60 2020-03-22 00:00:00


In [None]:
centroid_country = centroids = pd.read_csv('country_centroids_az8.csv')
centroid = centroid_country[['admin','Longitude','Latitude']]


centroid_hash = {i[1]['admin']:{'Lat':i[1]['Latitude'],'Long':i[1]['Longitude']} for i in centroid.iterrows()}

In [None]:
list(map(lambda x: centroid_hash[x]['Lat'], world_confirmed['Country/Region'].str.replace('Bahamas, The','The Bahamas')))

In [None]:
centroid_hash.keys()