In [1]:
import pandas as pd
import numpy as np

# COVID-19

In [2]:
# Total cases

In [3]:
    # World regions
world_total = pd.read_csv('output_data/time_series/total-world-covid19.csv')
world_total['Date_Confirmed'] = pd.DatetimeIndex(world_total['Date_Confirmed'])
world_total = world_total[['Date_Confirmed','China','Italy','Spain','United Kingdom','Korea, South','US','Mexico']]

    # Mexico regions
mexico_total = pd.read_csv('output_data/time_series/total-mexico-covid19.csv')
mexico_total['Date_Confirmed'] = pd.DatetimeIndex(mexico_total['Date_Confirmed'])
mexico_total['Mexico_SS'] = mexico_total.sum(axis=1) # Sum rows for Mexico total (to indicate: Secretaría de Salud)

In [4]:
# Daily cases

In [5]:
    # World regions
world_daily = pd.read_csv('output_data/time_series/daily-world_covid19.csv')
world_daily['Date_Confirmed'] = pd.DatetimeIndex(world_daily['Date_Confirmed'])
world_daily = world_daily[['Date_Confirmed','China','Italy','Spain','United Kingdom','Korea, South','US','Mexico']]

    # Mexico regions
mexico_daily = pd.read_csv('output_data/time_series/daily-mexico-covid19.csv')
mexico_daily['Date_Confirmed'] = pd.DatetimeIndex(mexico_daily['Date_Confirmed'])
mexico_daily['Mexico_SS'] = mexico_daily.sum(axis=1) # Sum rows for Mexico total (to indicate: Secretaría de Salud)

In [6]:
# Merge world and Mexico data and produce time series

In [7]:
    # Total
total = world_total#.merge(mexico_ts, on='Date_Confirmed', how='outer') # Uncomment to include mexican regions
total = total.fillna(method='ffill') # First fill missing time series totals with previous row value
total = total.replace(0, np.nan) # Now convert 0 to NaN

    # Daily
daily = world_daily#.merge(mexico_daily, on='Date_Confirmed', how='outer') # Uncomment to include mexican regions

In [8]:
# Make sure datetimes are correct

In [9]:
    # Total
total['Date_Confirmed'] = pd.to_datetime(total.Date_Confirmed, format='%d-%m-%Y')
total['Date_Confirmed'] = total['Date_Confirmed'].dt.strftime('%d-%m-%Y')
total = total.set_index(pd.DatetimeIndex(total['Date_Confirmed']))
total.index = total.index.strftime('%d-%m-%Y')

    # Daily
daily['Date_Confirmed'] = pd.to_datetime(daily.Date_Confirmed, format='%d-%m-%Y')
daily['Date_Confirmed'] = daily['Date_Confirmed'].dt.strftime('%d-%m-%Y')
daily = daily.set_index(pd.DatetimeIndex(daily['Date_Confirmed']))
daily.index = daily.index.strftime('%d-%m-%Y')

In [10]:
# Apply rolling window

In [11]:
total_rw = total.dropna().rolling(window=7).mean().round().reset_index()
daily_rw = daily.fillna(0).rolling(window=7).mean().round().reset_index()

In [13]:
# Rename index

In [13]:
total_rw = total_rw.rename(columns={'index':'Date_Confirmed'})
daily_rw = daily_rw.rename(columns={'index':'Date_Confirmed'})

In [21]:
# Melt to long format

In [23]:
tbl_total = pd.DataFrame(total_rw.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [24]:
tbl_daily = pd.DataFrame(daily_rw.set_index(['Date_Confirmed']).rename_axis(['Region'], axis=1).stack())

In [25]:
tbl = pd.merge(tbl_total, tbl_daily, how='inner', on=['Date_Confirmed', 'Region'])

In [26]:
tbl

Unnamed: 0_level_0,Unnamed: 1_level_0,0_x,0_y
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
03-05-2020,China,2422.0,8.0
03-05-2020,Italy,2171.0,458.0
03-05-2020,Spain,132.0,35.0
03-05-2020,United Kingdom,5.0,1.0
03-05-2020,"Korea, South",4350.0,617.0
...,...,...,...
21-04-2020,Spain,192603.0,4520.0
21-04-2020,United Kingdom,10466.0,459.0
21-04-2020,"Korea, South",10644.0,17.0
21-04-2020,US,729006.0,30874.0


In [27]:
tbl = tbl.rename(columns={'0_x': 'total', '0_y': 'daily'})

In [28]:
tbl.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,total,daily
Date_Confirmed,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
21-04-2020,Spain,192603.0,4520.0
21-04-2020,United Kingdom,10466.0,459.0
21-04-2020,"Korea, South",10644.0,17.0
21-04-2020,US,729006.0,30874.0
21-04-2020,Mexico,6993.0,537.0


In [29]:
# Right format for plotly (long, instead of wide)
# https://plotly.com/python/animations/#using-a-slider-and-buttons
# https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv
plot_data = (tbl.reset_index()).to_csv('output_data/time_series/rolling-time-series.csv', index=False)