In [50]:
%load_ext autoreload

from tools import write_pandas_to_gsheet



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [650]:
import warnings
import pandas as pd
import glob
import os
import logging
import boto3
from botocore.exceptions import ClientError
import COVID19Py
import pandas

# Cancel copy warnings of pandas
warnings.filterwarnings(
    "ignore", category=pd.core.common.SettingWithCopyWarning)


def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(
            file_name, bucket, object_name,  ExtraArgs={'ACL': 'public-read'})
    except ClientError as e:
        logging.error(e)
        return False
    return True


def parse_timeline_date_api_json(json,source):
    for_pandas = []
    
    status_dfs = []
    if 'timelines' not in json['locations'][0].keys():
        raise ReferenceError('This is not a timeline json')
    
    dates = []
    timestamp = []
    confirmed = []
    deaths = []
    location = []
    ids = []
    lats = []
    lons = []
    province = []
    country_code = []
    country = []
    county = []
    for location in json['locations']:
        d = list(location['timelines']['confirmed']['timeline'].keys())
        size_len = len(d)
        confirmed_ = list(location['timelines']['confirmed']['timeline'].values())
        deaths_ = list(location['timelines']['deaths']['timeline'].values())
        timestamp += pandas.to_datetime(d)
        assert(len(confirmed_) == len(deaths_) == size_len)
        ids += [location['id']]*size_len
        lats += [location['coordinates']['latitude']]*size_len
        lons +=  [location['coordinates']['longitude']]*size_len
        province += [location['province']]*size_len
        country_code += [location['country_code']]*size_len
        country +=  [location['country']]*size_len
        if 'county' in location.keys():
            county += [location['county']]*size_len
        else:
            county += ['']*size_len
        confirmed += confirmed_
        deaths += deaths_

    #print(len(lats),len(lons),len(timestamp))
    df = pandas.DataFrame({'id':ids,'lat':lats,'lon':lons,'Timestamp':timestamp,'Date':"",'province':province,'country_code':country_code,'country':country,'county':county,'confirmed':confirmed,'deaths':deaths})
    df['source'] = source
    df['Date'] = df['Timestamp'].dt.date
    df['Date'] = pandas.to_datetime(df['Date'])
    return df
                    

def parse_current_date_api_json(json, source):
    for_pandas = []
    confirmed = []
    deaths = []
    recovered = []
    for location in json['locations']:
        entry = {
            'id': location['id'],
            'lat': location['coordinates']['latitude'],
            'lon': location['coordinates']['longitude'],
            'Timestamp': pandas.to_datetime(location['last_updated']),
            'Date': "",
            'province': location['province'],
            'country_code': location['country_code'],
            'country': location['country'],
            'county':'',
            'confirmed': location['latest']['confirmed'],
            'deaths': location['latest']['deaths']}
        if 'county' in location.keys():
            entry['county'] = location['county']
        if 'state' in location.keys():
            entry['state'] = location['state']
        entry['source'] = source
        for_pandas.append(entry)
    df = pd.DataFrame(for_pandas)
    df['Date'] = df['Timestamp'].dt.date
    df['Date'] = pandas.to_datetime(df['Date'])
    return df

def per_x_cases(grouper, df):
    new_cases_by_country = []
    date_mapper = pd.DataFrame(
        df['Date'].unique(), columns=['Date']).sort_values('Date').reset_index(drop=True)
    dates = date_mapper['Date']
    #print(dates)
    sub_group = df[df[grouper] != ""]
    groupers = sub_group[grouper].unique()

    for group in groupers:
        sub_country = sub_group[sub_group[grouper] == group]
        new_cases_by_country.append(
            {grouper: group, 'Date': dates[0],
             'New Cases': sub_country.loc[sub_country['Date'] == dates[0], 'confirmed'].sum(),
             'New Deaths': 0})
        for date_index in range(1, len(dates)):
            current_date = dates[date_index]
            day_before = dates[date_index-1]
            # print(current_date,day_before)
            t_c, t_d = sub_country.loc[sub_country['Date']
                                            == current_date, :].sum()[['confirmed', 'deaths']]

            y_c, y_d = sub_country.loc[sub_country['Date']
                                            == day_before, :].sum()[['confirmed', 'deaths']]

            new_cases = t_c - y_c
            new_deaths = t_d - y_d
            if new_cases < 0:
                new_cases = 0
            if new_deaths < 0:
                new_deaths = 0
                print(current_date,day_before,t_c,y_c,group)
                #return sub_country
            new_cases_by_country.append(
                {grouper: group, 'Date': current_date, 'New Cases': new_cases,
                 'New Deaths': new_deaths})
    return pd.DataFrame(new_cases_by_country)



##Get current streaming API
covid19_csbs = COVID19Py.COVID19(data_source="csbs").getAll(timelines=True)
covid19_jhu = COVID19Py.COVID19(data_source="jhu").getAll(timelines=True)


##Gets current values
jhu_current = parse_current_date_api_json(covid19_jhu,'JHU')
csbs_current = parse_current_date_api_json(covid19_csbs,'CSBS')

##Gets timeline values
jhu_time = parse_timeline_date_api_json(covid19_jhu,'JHU')

##Get Date text
csbs_current['Date_text'] = csbs_current['Timestamp'].dt.strftime('%m/%d/%y')
jhu_time['Date_text'] = jhu_time['Timestamp'].dt.strftime('%m/%d/%y')
jhu_current['Date_text'] = jhu_current['Timestamp'].dt.strftime('%m/%d/%y')


##Lets get the current one
csbs_df_past = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/csbs_df.csv.gz', index_col=0)
csbs_df_past['Timestamp'] = pd.to_datetime(csbs_df_past['Date'])
csbs_df_past['Date'] = csbs_df_past['Timestamp'].dt.date
csbs_df_past['Date'] = pandas.to_datetime(csbs_df_past['Date'])
csbs_df_past['Date_text'] = csbs_df_past['Timestamp'].dt.strftime('%m/%d/%y')

#sort for columns
csbs_df_past = csbs_df_past[csbs_current.columns]

##This should be okay since we stored it this way
csbs_df_past = csbs_df_past.sort_values('confirmed')[::-1].groupby(['lat','lon','Date','province','country_code','country','county','source']).head(1)

##Before we merge lets write out todays date
today = date.today()
csbs_current.to_csv('Data/csbs_df_Archive_{}_{}_{}.csv.gz'.format(today.month,today.day,today.year))
print('Data/csbs_df_Archive_{}_{}_{}.csv.gz'.format(today.month,today.day,today.year))

##Lets add together the past and current
csbs_new = pd.concat([csbs_df_past,csbs_current])

##lets ensure that csbs_new has just one date
csbs_new = csbs_new.sort_values('confirmed').groupby(['Date','province','country_code','country','county','source']).head(1)
csbs_new['Timestamp'] = pandas.to_datetime(csbs_new['Timestamp'],utc=True)
csbs_new['Date_text'] = csbs_new['Timestamp'].dt.strftime('%m/%d/%y')

##Make a Date Mapper
date_mapper = pd.DataFrame(
    pd.to_datetime(jhu_time['Date']).unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')

# provence_df_per_day = per_x_cases('province', jhu_df_time, date_mapper)
# country_df_per_day = per_x_cases('country', jhu_df_time, date_mapper)
# print('Generated Data')

##We can combine both csbs and jhu for one awesome timescale
combined_time_scales = pandas.concat([jhu_time,csbs_new])

##Now get a per day basis
province_df = per_x_cases('province',jhu_time)
country_df = per_x_cases('country',jhu_time)

assert((jhu_time.groupby(['Date','country','province']).count() == 1).all().all())
assert((csbs_new.groupby(['Date','country','province','county']).count() == 1).all().all())
assert((jhu_current.groupby(['Date','country','province','county']).count() == 1).all().all())
assert((all_time_scales.groupby(['Date','country','province','county']).count() == 1).all().all())
assert((jhu_current.columns == csbs_new.columns).all())


##Lets Write everything out
jhu_current.to_csv('Data/jhu_df.csv.gz', compression='gzip')

##Write Out Time Course
jhu_time.to_csv('Data/jhu_df_time.csv.gz', compression='gzip')

##Write out Current CSV
csbs_new.to_csv('Data/csbs_df.csv.gz', compression='gzip')

##Write oute combined time course
combined_time_scales.to_csv('Data/combined_time_scales.csv.gz',compression='gzip')

##Province
province_jh.to_csv('Data/per_day_stats_by_state.csv.gz')

##Province per case by day
country_df.to_csv('Data/per_day_stats_by_country.csv.gz')


# provence_df_per_day.to_csv(
#     'Data/provence_df_per_day.csv.gz', compression='gzip')
# country_df_per_day.to_csv('Data/country_df_per_day.csv.gz', compression='gzip')


print('Syncing Data')
ea = ExtraArgs = {'ACL': 'public-read'}
gs = glob.glob('Data/*.csv.gz')
for file in gs:
    upload_file(file, 'jordansdatabucket', os.path.join(
        'covid19data', os.path.basename(file)))
    print("Uploaded " + os.path.basename(file))

Data/csbs_df_Archive_3_26_2020.csv.gz
2020-03-22 00:00:00 2020-03-21 00:00:00 219 181 Quebec
2020-03-16 00:00:00 2020-03-15 00:00:00 180 171 Iceland
2020-03-20 00:00:00 2020-03-19 00:00:00 409 330 Iceland
2020-03-21 00:00:00 2020-03-20 00:00:00 330 244 India
2020-03-21 00:00:00 2020-03-20 00:00:00 53 49 Kazakhstan
2020-03-19 00:00:00 2020-03-18 00:00:00 217 202 Philippines
2020-03-22 00:00:00 2020-03-21 00:00:00 185 178 Slovakia
2020-03-25 00:00:00 2020-03-24 00:00:00 -1 59 West Bank and Gaza
Syncing Data
Uploaded provence_df_per_day.csv.gz
Uploaded jhu_df_time.csv.gz
Uploaded country_df_per_day.csv.gz
Uploaded Merged_df.csv.gz
Uploaded csbs_df_Archive_03_25_2020.csv.gz
Uploaded per_day_stats_by_country.csv.gz
Uploaded jhu_df.csv.gz
Uploaded csbs_df.csv.gz
Uploaded csbs_df_Archive_3_26_2020.csv.gz
Uploaded per_day_stats_by_state.csv.gz
Uploaded per_day_stats_by_county.csv.gz
Uploaded combined_time_scales.csv.gz


In [651]:
jhu_df_time = jhu_time

date_mapper = pd.DataFrame(
    jhu_df_time['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')

In [672]:
csbs_df = csbs_new

In [733]:
import sys
import warnings
import numpy as np
import pandas as pd
import argparse
import pprint
from datetime import date, timedelta
try:
    import dash
    import dash_core_components as dcc
    import dash_html_components as html
    from dash.exceptions import PreventUpdate
    import dash_table
    from dash.dependencies import Input, Output, State
    import plotly.graph_objects as go
    import plotly.express as px
    from plotly.subplots import make_subplots
except ImportError:
    sys.exit('Please install dash, e.g, pip install dash')

mapbox_style = 'dark'
mapbox_access_token = open('.mapbox_token').readlines()[0]

def get_graph_state(date_int, group, metrics, figure):
    
    if not figure:
        lat = 15.74
        lon = -1.4
        zoom = 1.6
    elif "layout" in figure:
        lat = figure["layout"]["mapbox"]['center']["lat"]
        lon = figure["layout"]["mapbox"]['center']["lon"]
        zoom = figure["layout"]["mapbox"]["zoom"]

    if 'cases' in metrics:
        normalizer = 'confirmed'
    else:
        normalizer = 'deaths'

    official_date = date_mapper.iloc[date_int]['Date']
    # print(date_int, official_date)

    if group == 'country':
        country_mapper = jhu_df_time[
            jhu_df_time['Date'] == official_date].groupby(
            'country', as_index=False).apply(lambda x: x.sort_values('confirmed')[::-1].head(1)[['country', 'lat', 'lon']])
        sub_df = jhu_df_time[
            jhu_df_time['Date'] == official_date].groupby('country', as_index=False).sum()[['country', 'confirmed', 'deaths']].merge(
                country_mapper, on=['country'])
        sizeref = 2. * jhu_df_time.groupby(
            ['Date', group]).sum().max()[normalizer] / (20 ** 2)

        print(sub_df.columns)
        

    elif group == 'province':
        sub_df = jhu_df_time[jhu_df_time[group] != '']
#         province_mapper = sub_df[
#             sub_df['Date'] == official_date].groupby(
#                 group, as_index=False).apply(lambda x: x.sort_values('confirmed')[::-1].head(1)[[group, 'lat', 'lon']])
        sub_df = sub_df[
            sub_df['Date'] == official_date]#.groupby(group, as_index=False)#.sum()[['province', 'confirmed', 'deaths']]#.merge(province_mapper, on=['province'])

    
#         # Now do it for the CSBS
        sub_df_csbs = csbs_df[csbs_df[group] != '']
        sub_df_csbs = sub_df_csbs[sub_df_csbs['Date'] == official_date]
        province_mapper = csbs_df.sort_values('confirmed')[::-1].groupby(
            group, as_index=False).head(1)[[group,  'lat', 'lon']]
        sub_df_csbs = sub_df_csbs.groupby(group, as_index=False).sum()[
            ['province', 'confirmed', 'deaths']].merge(province_mapper, on=['province'])
        sub_df = pd.concat([sub_df, sub_df_csbs])
        sizeref = 2. * jhu_df_time[jhu_df_time['province'] !=''].groupby(
            ['Date', group]).sum().max()[normalizer] / (20 ** 2)

    elif group == 'county':
        sub_df = csbs_df.groupby(
            ['county', 'state', 'lat', 'lon'], as_index=False).sum()
        sub_df.rename({'cases': 'confirmed'}, axis=1, inplace=1)
        sizeref = 2. * sub_df.max()[normalizer] / (20 ** 2)

    sub_df['Text_Cases'] = sub_df[group] + '<br>Total Cases at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['confirmed'].apply(lambda x: "{:,}".format(int(x)))
    sub_df['Text_Death'] = sub_df[group] + '<br>Total Deaths at {} : '.format(
        official_date.strftime('%m/%d/%y')) + sub_df['deaths'].apply(lambda x: "{:,}".format(int(x)))

    sub_df.loc[sub_df['confirmed'] < 0, 'confirmed'] = 0    
    fig = go.Figure()
    if 'cases' in metrics:
        fig.add_trace(go.Scattermapbox(
            lon=sub_df['lon'].astype(float) +
            np.random.normal(0, .02, len(sub_df['lon'])),
            lat=sub_df['lat'].astype(float) +
            np.random.normal(0, .02, len(sub_df['lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Cases'],
            hoverinfo='text',
            mode='markers',
            name='cases',
            marker=dict(
                sizeref=sizeref,
                sizemin=10,
                size=sub_df['confirmed'],
                color='yellow')))

    if 'deaths' in metrics:
        fig.add_trace(go.Scattermapbox(
            lon=sub_df['lon'] +
            np.random.normal(0, .02, len(sub_df['lon'])),
            lat=sub_df['lat'] +
            np.random.normal(0, .02, len(sub_df['lat'])),
            customdata=sub_df[group],
            textposition='top right',
            text=sub_df['Text_Death'],
            hoverinfo='text',
            name='deaths',
            mode='markers',
            marker=dict(
                sizeref=sizeref,
                sizemin=10,
                size=sub_df['deaths'],
                color='red')))

    if not metrics:
        fig.add_trace(go.Scattermapbox(
            lon=[],
            lat=[]
        ))
    layout = dict(
        title_text='The Corona is Coming',
        autosize=True,
        showlegend=True,
        mapbox=dict(
            accesstoken=mapbox_access_token,
            style=mapbox_style,
            zoom=zoom,
            center=dict(lat=lat, lon=lon)
        ),
        hovermode="closest",
        margin=dict(r=0, l=0, t=0, b=0),
        dragmode="pan",
        legend=dict(
            x=0.92,
            y=1,
            traceorder="normal",
            font=dict(
                family="sans-serif",
                size=14,
                color="white"
            ),
            bgcolor='rgba(0,0,0,0)',
            # bordercolor="",
            # borderwidth=2
        )
    )

    fig.update_layout(layout)
    return fig

f = get_graph_state(63,'province',['cases'],None)
f.show()

In [730]:
2. * jhu_df_time.groupby(
            ['Date', group]).sum().max()['confirmed'] / (20 ** 2)

1898.375

In [735]:
sub_df = csbs_df[csbs_df['Date'] == official_date]
sub_df = csbs_df.groupby(
    ['county', 'province', 'lat', 'lon'], as_index=False).sum()
sub_df.rename({'cases': 'confirmed'}, axis=1, inplace=1)
sizeref = 2. * sub_df.max()['confirmed'] / (20 ** 2)

In [739]:
sub_df.groupby('county').get_group('New York')

Unnamed: 0,county,province,lat,lon,id,confirmed,deaths
1793,New York,New York,40.7146,-74.0071,0,28716,317
1794,New York,New York,40.71455,-74.00714,0,20011,280


In [701]:
sub_df = jhu_df_time[jhu_df_time[group] != '']

sub_df = sub_df[
    sub_df['Date'] == official_date]#.groupby(group, as_index=False)#.sum()[['province', 'confirmed', 'deaths']]#.merge(province_mapper, on=['province'])


#         # Now do it for the CSBS
sub_df_csbs = csbs_df[csbs_df[group] != '']
sub_df_csbs = sub_df_csbs[sub_df_csbs['Date'] == official_date]
province_mapper = csbs_df.sort_values('confirmed')[::-1].groupby(
    group, as_index=False).head(1)[[group,  'lat', 'lon']]
#sub_df_csbs = csbs_df.groupby(group, as_index=False).sum()[
#    ['province', 'confirmed', 'deaths']].merge(province_mapper, on=['province'])
#sub_df = pd.concat([sub_df, sub_df_csbs])
#sizeref = 2. * jhu_df_time.groupby(
#    ['Date', group]).sum().max()[normalizer] / (20 ** 2)


In [694]:
province_mapper = csbs_df.sort_values('confirmed')[::-1].groupby(
    group, as_index=False).head(1)[[group,  'lat', 'lon']]

In [703]:
sub_df_csbs.groupby(group, as_index=False).get_group('New York').sum()['confirmed']

26376