In [24]:
import pandas as pd
import numpy as np

# COVID-19

In [25]:
# Total cases

In [26]:
world_total = pd.read_csv('output_data/time_series/total-world-covid19.csv')
world_total['Date_Confirmed'] = pd.DatetimeIndex(world_total['Date_Confirmed'])
world_total = world_total[['Date_Confirmed','Australia','US','United Kingdom','Korea, South','Italy','Mexico']]

In [27]:
# Daily cases

In [28]:
world_daily = pd.read_csv('output_data/time_series/daily-world_covid19.csv')
world_daily['Date_Confirmed'] = pd.DatetimeIndex(world_daily['Date_Confirmed'])
world_daily = world_daily[['Date_Confirmed','Australia','US','United Kingdom','Korea, South','Italy','Mexico']]

In [29]:
world_daily

Unnamed: 0,Date_Confirmed,Australia,US,United Kingdom,"Korea, South",Italy,Mexico
0,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,1.0,0.0,1.0,0.0,0.0
3,2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,0.0,3.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...
92,2020-04-23,1.0,28819.0,419.0,14.0,2646.0,1089.0
93,2020-04-24,2.0,36188.0,490.0,10.0,3021.0,1239.0
94,2020-04-25,2.0,32796.0,448.0,10.0,2357.0,970.0
95,2020-04-26,2.0,27631.0,406.0,10.0,2324.0,835.0


In [30]:
# Process

In [31]:
total = world_total
total = total.fillna(method='ffill') # First fill missing time series totals with previous row value
#total = total.replace(0, np.nan) # Now convert 0 to NaN

daily = world_daily

In [32]:
# Make sure datetimes are correct

In [33]:
    # Total
total['Date_Confirmed'] = pd.to_datetime(total.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
total = total.set_index(pd.DatetimeIndex(total['Date_Confirmed']))

    # Daily
daily['Date_Confirmed'] = pd.to_datetime(daily.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
daily = daily.set_index(pd.DatetimeIndex(daily['Date_Confirmed']))

In [34]:
# Apply rolling window

In [35]:
total = total.dropna().rolling(window=7).mean().round().reset_index()
daily = daily.dropna().rolling(window=7).mean().round().reset_index()

In [36]:
# Rename index

In [37]:
total = total.rename(columns={'index':'Date_Confirmed'})
daily = daily.rename(columns={'index':'Date_Confirmed'})

In [38]:
# Melt to long format

In [39]:
tbl_total = pd.DataFrame(total.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [40]:
tbl_daily = pd.DataFrame(daily.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [41]:
tbl = pd.merge(tbl_total, tbl_daily, how='inner', on=['Date_Confirmed', 'Region'])

In [42]:
tbl

Unnamed: 0_level_0,Unnamed: 1_level_0,0_x,0_y
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,Australia,0.0,0.0
2020-01-28,US,3.0,1.0
2020-01-28,United Kingdom,0.0,0.0
2020-01-28,"Korea, South",2.0,0.0
2020-01-28,Italy,0.0,0.0
...,...,...,...
2020-04-27,US,902697.0,29124.0
2020-04-27,United Kingdom,13125.0,422.0
2020-04-27,"Korea, South",10717.0,11.0
2020-04-27,Italy,192384.0,2598.0


In [43]:
tbl = tbl.rename(columns={'0_x': 'total', '0_y': 'daily'})

In [44]:
tbl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total,daily
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,Australia,0.0,0.0
2020-01-28,US,3.0,1.0
2020-01-28,United Kingdom,0.0,0.0
2020-01-28,"Korea, South",2.0,0.0
2020-01-28,Italy,0.0,0.0


In [45]:
# Convert format to fit this plotly express template (long, instead of wide):
# https://plotly.com/python/animations/#using-a-slider-and-buttons
# https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv
tbl_long = tbl.reset_index()
tbl_long.to_csv('output_data/time_series/rolling-time-series.csv', index=False)

In [46]:
tbl_long

Unnamed: 0,Date_Confirmed,Region,total,daily
0,2020-01-28,Australia,0.0,0.0
1,2020-01-28,US,3.0,1.0
2,2020-01-28,United Kingdom,0.0,0.0
3,2020-01-28,"Korea, South",2.0,0.0
4,2020-01-28,Italy,0.0,0.0
...,...,...,...,...
541,2020-04-27,US,902697.0,29124.0
542,2020-04-27,United Kingdom,13125.0,422.0
543,2020-04-27,"Korea, South",10717.0,11.0
544,2020-04-27,Italy,192384.0,2598.0
