In [116]:
import pandas as pd
import numpy as np

# COVID-19

In [117]:
# Total cases

In [118]:
world_total = pd.read_csv('output_data/time_series/total-world-covid19.csv')
world_total['Date_Confirmed'] = pd.DatetimeIndex(world_total['Date_Confirmed'])
world_total = world_total[['Date_Confirmed','Italy','Korea, South','Mexico','Spain','United Kingdom','US','Mexico']]

In [119]:
# Daily cases

In [120]:
world_daily = pd.read_csv('output_data/time_series/daily-world_covid19.csv')
world_daily['Date_Confirmed'] = pd.DatetimeIndex(world_daily['Date_Confirmed'])
world_daily = world_daily[['Date_Confirmed','Italy','Korea, South','Spain','United Kingdom','US','Mexico']]

In [121]:
world_daily

Unnamed: 0,Date_Confirmed,Italy,"Korea, South",Spain,United Kingdom,US,Mexico
0,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,1.0,0.0,0.0,1.0,0.0
3,2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,0.0,1.0,0.0,0.0,3.0,0.0
...,...,...,...,...,...,...,...
87,2020-04-18,3491.0,18.0,887.0,504.0,32491.0,578.0
88,2020-04-19,3047.0,8.0,6948.0,533.0,26612.0,622.0
89,2020-04-20,2256.0,13.0,1536.0,425.0,25517.0,764.0
90,2020-04-21,2729.0,9.0,3968.0,393.0,27539.0,511.0


In [122]:
# Process

In [123]:
total = world_total
total = total.fillna(method='ffill') # First fill missing time series totals with previous row value
#total = total.replace(0, np.nan) # Now convert 0 to NaN

daily = world_daily

In [124]:
# Make sure datetimes are correct

In [125]:
    # Total
total['Date_Confirmed'] = pd.to_datetime(total.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
total = total.set_index(pd.DatetimeIndex(total['Date_Confirmed']))

    # Daily
daily['Date_Confirmed'] = pd.to_datetime(daily.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
daily = daily.set_index(pd.DatetimeIndex(daily['Date_Confirmed']))

In [126]:
# Apply rolling window

In [127]:
total = total.dropna().rolling(window=7).mean().round().reset_index()
daily = daily.dropna().rolling(window=7).mean().round().reset_index()

In [128]:
# Rename index

In [129]:
total = total.rename(columns={'index':'Date_Confirmed'})
daily = daily.rename(columns={'index':'Date_Confirmed'})

In [130]:
# Melt to long format

In [131]:
tbl_total = pd.DataFrame(total.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [132]:
tbl_daily = pd.DataFrame(daily.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [133]:
tbl = pd.merge(tbl_total, tbl_daily, how='inner', on=['Date_Confirmed', 'Region'])

In [134]:
tbl

Unnamed: 0_level_0,Unnamed: 1_level_0,0_x,0_y
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,Italy,0.0,0.0
2020-01-28,"Korea, South",2.0,0.0
2020-01-28,Mexico,0.0,0.0
2020-01-28,Mexico,0.0,0.0
2020-01-28,Spain,0.0,0.0
...,...,...,...
2020-04-22,Mexico,7579.0,586.0
2020-04-22,Mexico,7579.0,586.0
2020-04-22,Spain,196995.0,4392.0
2020-04-22,United Kingdom,10923.0,457.0


In [135]:
tbl = tbl.rename(columns={'0_x': 'total', '0_y': 'daily'})

In [136]:
tbl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total,daily
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,Italy,0.0,0.0
2020-01-28,"Korea, South",2.0,0.0
2020-01-28,Mexico,0.0,0.0
2020-01-28,Mexico,0.0,0.0
2020-01-28,Spain,0.0,0.0


In [137]:
# Convert format to fit this plotly express template (long, instead of wide):
# https://plotly.com/python/animations/#using-a-slider-and-buttons
# https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv
tbl_long = tbl.reset_index()
tbl_long.to_csv('output_data/time_series/rolling-time-series.csv', index=False)

In [138]:
tbl_long

Unnamed: 0,Date_Confirmed,Region,total,daily
0,2020-01-28,Italy,0.0,0.0
1,2020-01-28,"Korea, South",2.0,0.0
2,2020-01-28,Mexico,0.0,0.0
3,2020-01-28,Mexico,0.0,0.0
4,2020-01-28,Spain,0.0,0.0
...,...,...,...,...
597,2020-04-22,Mexico,7579.0,586.0
598,2020-04-22,Mexico,7579.0,586.0
599,2020-04-22,Spain,196995.0,4392.0
600,2020-04-22,United Kingdom,10923.0,457.0
