In [62]:
# Import the relevent packages
import urllib.request
from datetime import date
from pathlib import Path
import pandas as pd
import numpy as np
import datetime

import ipywidgets as widgets

# Download the dataset

In [19]:
# Download the dataset
# This only downloads a new file once a day

## Make a new folder for each day's data
todayfolder = date.today().strftime("%d/%m/%Y").replace('/','.')
Path(todayfolder).mkdir(parents=True, exist_ok=True)

# Reference to file names being created
datafiles = {'confirmed':f'{todayfolder}/time_series_2019-ncov-Confirmed.csv',
            'deaths':f'{todayfolder}/time_series_2019-ncov-Deaths.csv',
            'recoverd':f'{todayfolder}/time_series_2019-ncov-Recovered.csv'}

# if the data has not been downloaded, download it

if not Path(datafiles['confirmed']).is_file():
    urllib.request.urlretrieve('https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_confirmed_global.csv&filename=time_series_covid19_confirmed_global.csv', datafiles['confirmed'])

if not Path(datafiles['deaths']).is_file():
    urllib.request.urlretrieve('https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_deaths_global.csv&filename=time_series_covid19_deaths_global.csv', datafiles['deaths'])

if not Path(datafiles['recoverd']).is_file():
    urllib.request.urlretrieve('https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_recovered_global.csv&filename=time_series_covid19_recovered_global.csv', datafiles['recoverd'])

print('Downloaded latest data')


Downloaded latest data


# Read the downloaded data

In [30]:
# Read the data files

conf_df = pd.read_csv(datafiles['confirmed'])
death_df = pd.read_csv(datafiles['deaths'])
cure_df = pd.read_csv(datafiles['recoverd'])

# Add identification for each dataset
dateCols = conf_df.columns[conf_df.columns.get_loc("Long")+1:]
conf_df['Attribute'] = 'confirmed'
death_df['Attribute'] = 'deaths'
cure_df['Attribute'] = 'cured'

# Change the order of columns for each dataset
newCols = ['Country/Region','Province/State','Attribute','Lat','Long']
conf_df = conf_df[[*newCols, *dateCols]]
death_df = death_df[[*newCols, *dateCols]]
cure_df = cure_df[[*newCols, *dateCols]]

# Set up widgets

In [41]:
countryList = conf_df['Country/Region'].unique()
selectedCountry = widgets.Dropdown(
    options=countryList,
    value='Australia',
    description='Country:',
    disabled=False,
)
selectedCountry

Dropdown(description='Country:', index=8, options=('Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', '…

In [42]:
stateList = conf_df[conf_df['Country/Region'] == selectedCountry.value]['Province/State'].unique()
selectedState = widgets.Dropdown(
    options=stateList,
    description='Country:',
    disabled=False,
)
selectedState

Dropdown(description='Country:', options=('Australian Capital Territory', 'New South Wales', 'Northern Territo…

In [45]:
# Calculate the number of new cases per day
conf_new_df = conf_df.copy()
conf_new_df[dateCols] = conf_new_df[dateCols].iloc[:,:].rolling(window=2, axis = 1).apply(lambda x: x[-1] - x[0],raw=True)


In [87]:
start_date = datetime.datetime.strptime(conf_df.columns[5:][0], '%m/%d/%y')
end_date = datetime.datetime.strptime(conf_df.columns[5:][-1], '%m/%d/%y')
dateRange = pd.date_range(start_date, end_date, freq='D')
dateRangetxt = [(date.strftime(' %d %b %y '), date) for date in dateRange]
dtrindex = (0, len(dateRangetxt)-1)

date_range_slider = widgets.SelectionRangeSlider(
    options=dateRangetxt,
    index=dtrindex,
    description='Dates',
    orientation='horizontal',
    layout={'width': '400px'}
)

date_range_slider


SelectionRangeSlider(description='Dates', index=(0, 133), layout=Layout(width='400px'), options=((' 22 Jan 202…

In [60]:
data = conf_new_df[conf_new_df['Country/Region'] == selectedCountry.value]
y = data.columns[5:]

In [74]:
dtPickStart = widgets.DatePicker(
 description= 'Start:',
 value = datetime.datetime.strptime(conf_df.columns[5:][0], '%m/%d/%y')
 )
dtPickStart

DatePicker(value=datetime.datetime(2020, 1, 22, 0, 0), description='Start:')

In [68]:
y[0].split()

'1/22/20'

In [61]:
y

Index(['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20', '1/30/20', '1/31/20',
       ...
       '5/25/20', '5/26/20', '5/27/20', '5/28/20', '5/29/20', '5/30/20',
       '5/31/20', '6/1/20', '6/2/20', '6/3/20'],
      dtype='object', length=134)

In [55]:
data.columns

Index(['Country/Region', 'Province/State', 'Attribute', 'Lat', 'Long',
       '1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20',
       ...
       '5/25/20', '5/26/20', '5/27/20', '5/28/20', '5/29/20', '5/30/20',
       '5/31/20', '6/1/20', '6/2/20', '6/3/20'],
      dtype='object', length=139)

In [8]:
#df = df[df['Country/Region'] == selectedCountry.value]
aggregate_window = 7

# Get the number of active cases
active_df = conf_df.copy()
active_df[dateCols] = active_df[dateCols].values - death_df[dateCols].values - cure_df[dateCols].values

# Calculate the n day moving average
avr_window = aggregate_window
conf_avr_df = conf_df.copy()
conf_avr_df[dateCols] = conf_avr_df[dateCols].iloc[:,:].rolling(window=avr_window, axis = 1).mean()

# Calculate the number of new cases per day
conf_new_df = conf_df.copy()
conf_new_df[dateCols] = conf_new_df[dateCols].iloc[:,:].rolling(window=2, axis = 1).apply(lambda x: x[-1] - x[0],raw=True)

# Calculate the n day diffrence
diff_Window = aggregate_window
conf_nday_diff_df = conf_df.copy()
conf_nday_diff_df[dateCols] = conf_nday_diff_df[dateCols].iloc[:,:].rolling(window=aggregate_window, axis = 1).apply(lambda x: x[-1] - x[0],raw=True)

# Calculate the growth rate per day
conf_new_pct_df = conf_df.copy()
conf_new_pct_df[dateCols] = conf_new_pct_df[dateCols].iloc[:,:].rolling(window=2, axis = 1).apply(lambda x: (x[1] - x[0])/x[0],raw=True)


# Calculate the n day average growth rate
avr_nday_growth_window = aggregate_window
conf_nday_avr_new_df = conf_new_pct_df.copy()
conf_nday_avr_new_df[dateCols] = conf_nday_avr_new_df[dateCols].iloc[:,:].rolling(window=aggregate_window, axis = 1).mean()


ValueError: operands could not be broadcast together with shapes (266,134) (253,134) 

Dropdown(description='Country:', index=8, options=('Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', '…

In [15]:
conf_nday_avr_new_df[conf_new_pct_df['Country/Region'] == selectedCountry.value]

NameError: name 'conf_nday_avr_new_df' is not defined