In [30]:
import pandas as pd
import numpy as np

# COVID-19

In [31]:
# Total cases

In [32]:
world_total = pd.read_csv('output_data/time_series/total-world-covid19.csv')
world_total['Date_Confirmed'] = pd.DatetimeIndex(world_total['Date_Confirmed'])
world_total = world_total[['Date_Confirmed','US','United Kingdom','Switzerland','Italy','India','Australia','Mexico']]

In [33]:
# Daily cases

In [34]:
world_daily = pd.read_csv('output_data/time_series/daily-world_covid19.csv')
world_daily['Date_Confirmed'] = pd.DatetimeIndex(world_daily['Date_Confirmed'])
world_daily = world_daily[['Date_Confirmed','US','United Kingdom','Switzerland','Italy','India','Australia','Mexico']]

In [35]:
world_daily

Unnamed: 0,Date_Confirmed,US,United Kingdom,Switzerland,Italy,India,Australia,Mexico
0,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
93,2020-04-24,36188.0,490.0,181.0,3021.0,1453.0,2.0,1239.0
94,2020-04-25,32796.0,448.0,217.0,2357.0,1753.0,2.0,970.0
95,2020-04-26,27631.0,406.0,167.0,2324.0,1607.0,2.0,835.0
96,2020-04-27,22412.0,392.0,103.0,1739.0,1561.0,1.0,852.0


In [36]:
# Process

In [37]:
total = world_total
total = total.fillna(method='ffill') # First fill missing time series totals with previous row value
#total = total.replace(0, np.nan) # Now convert 0 to NaN

daily = world_daily

In [38]:
# Make sure datetimes are correct

In [39]:
    # Total
total['Date_Confirmed'] = pd.to_datetime(total.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
total = total.set_index(pd.DatetimeIndex(total['Date_Confirmed']))

    # Daily
daily['Date_Confirmed'] = pd.to_datetime(daily.Date_Confirmed, format='%Y-%m-%d', errors='coerce')
daily = daily.set_index(pd.DatetimeIndex(daily['Date_Confirmed']))

In [40]:
# Apply rolling window

In [41]:
total = total.dropna().rolling(window=7).mean().round().reset_index()
daily = daily.dropna().rolling(window=7).mean().round().reset_index()

In [42]:
# Rename index

In [43]:
total = total.rename(columns={'index':'Date_Confirmed'})
daily = daily.rename(columns={'index':'Date_Confirmed'})

In [44]:
# Melt to long format

In [45]:
tbl_total = pd.DataFrame(total.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [46]:
tbl_daily = pd.DataFrame(daily.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [47]:
tbl = pd.merge(tbl_total, tbl_daily, how='inner', on=['Date_Confirmed', 'Region'])

In [48]:
tbl

Unnamed: 0_level_0,Unnamed: 1_level_0,0_x,0_y
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,US,3.0,1.0
2020-01-28,United Kingdom,0.0,0.0
2020-01-28,Switzerland,0.0,0.0
2020-01-28,Italy,0.0,0.0
2020-01-28,India,0.0,0.0
...,...,...,...
2020-04-28,Switzerland,28832.0,172.0
2020-04-28,Italy,194891.0,2507.0
2020-04-28,India,26275.0,1606.0
2020-04-28,Australia,837.0,2.0


In [49]:
tbl = tbl.rename(columns={'0_x': 'total', '0_y': 'daily'})

In [50]:
tbl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total,daily
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-28,US,3.0,1.0
2020-01-28,United Kingdom,0.0,0.0
2020-01-28,Switzerland,0.0,0.0
2020-01-28,Italy,0.0,0.0
2020-01-28,India,0.0,0.0


In [51]:
# Convert format to fit this plotly express template (long, instead of wide):
# https://plotly.com/python/animations/#using-a-slider-and-buttons
# https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv
tbl_long = tbl.reset_index()
tbl_long.to_csv('output_data/time_series/rolling-time-series.csv', index=False)

In [52]:
tbl_long

Unnamed: 0,Date_Confirmed,Region,total,daily
0,2020-01-28,US,3.0,1.0
1,2020-01-28,United Kingdom,0.0,0.0
2,2020-01-28,Switzerland,0.0,0.0
3,2020-01-28,Italy,0.0,0.0
4,2020-01-28,India,0.0,0.0
...,...,...,...,...
639,2020-04-28,Switzerland,28832.0,172.0
640,2020-04-28,Italy,194891.0,2507.0
641,2020-04-28,India,26275.0,1606.0
642,2020-04-28,Australia,837.0,2.0
