In [1]:
from imputationLibrary import decompose, forwardFilling, hotDeck, meanImputation, movingAverage, splineInterpolation, randomSampleImputation, nature
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from sklearn import preprocessing
from datetime import datetime, timedelta
from tsmoothie.smoother import *

In [2]:
def plot_ac(df, name):
    normalized = preprocessing.scale([np.array(df.fillna(0))])
    corr = signal.correlate(normalized, normalized, mode='full')
    plt.plot(corr[0], 'o-', markersize=2)
    plt.axhline(2/np.sqrt(len(df)), ls=':')
    plt.axhline(-2/np.sqrt(len(df)), ls=':')
    plt.title(name + ' auto-correlation')
    plt.show()

In [3]:
def decompose_and_plot(ts, flag_plot = False):
    ts_decomposed, decomp_type = decompose.decompose(ts, period)
    print(decomp_type)
    if flag_plot:
        ts_decomposed.plot()
        plt.show()
        print("Resid is white noise? ", nature.isWhiteNoise(ts_decomposed.resid))
        print("Resid is seasonal noise? ", nature.isSeasonal(ts_decomposed.resid))
        print("Resid is trended noise? ", nature.isTrended(ts_decomposed.resid, period))
        print("Resid is seasonal and trended noise? ", nature.isTrendedAndSeasonal(ts_decomposed.resid, period))
        plot_ac(ts_decomposed.resid, "Resid")
        plot_ac(ts_decomposed.trend, "Trend")
        plot_ac(ts_decomposed.seasonal, "Seasonal")
    return ts_decomposed.resid, ts_decomposed.trend, ts_decomposed.seasonal, decomp_type
    

In [4]:

df = pd.read_csv(r'output\/CompleteCovid.csv',index_col=0, parse_dates=True)
df_train = df.loc['2020-01-22 09:00:00':'2020-02-26 02:00:00'].dropna(how='all', axis=1) #Selecting training data
df_test = df.loc['2020-02-26 02:30:00':, df_train.columns] #Selecting test data
period=30
df_filled = df_train.fillna(0.000001)
df_filled_test = df_test.fillna(0.000001)

df_decomposed_resid = pd.DataFrame()
df_decomposed_trend = pd.DataFrame()
df_decomposed_seasonal = pd.DataFrame()

df_decomposed_resid_test = pd.DataFrame()
df_decomposed_trend_test = pd.DataFrame()
df_decomposed_seasonal_test = pd.DataFrame()

df_nan = df_train.isna()
df_nan_test = df_test.isna()

decomp_type_dict_train = {}
decomp_type_dict_test = {}
for col in df_filled.columns:
    resid, trend, seasonal, decomp_type = decompose_and_plot(df_filled.loc[:,col])
    decomp_type_dict_train[col] = decomp_type
    df_decomposed_resid[col] = resid
    df_decomposed_trend[col] = trend
    df_decomposed_seasonal[col] = seasonal

for col in df_filled_test.columns:
    resid, trend, seasonal, decomp_type = decompose_and_plot(df_filled_test.loc[:,col])
    decomp_type_dict_test[col] = decomp_type
    df_decomposed_resid_test[col] = resid
    df_decomposed_trend_test[col] = trend
    df_decomposed_seasonal_test[col] = seasonal

df_final_resid = df_decomposed_resid.mask(df_nan == True, np.nan)
df_final_trend = df_decomposed_trend.mask(df_nan == True, np.nan)
df_final_seasonal = df_decomposed_seasonal.mask(df_nan == True, np.nan)

df_final_resid_test = df_decomposed_resid_test.mask(df_nan_test == True, np.nan)
df_final_trend_test = df_decomposed_trend_test.mask(df_nan_test == True, np.nan)
df_final_seasonal_test = df_decomposed_seasonal_test.mask(df_nan_test == True, np.nan)

df_white_noise_train, df_white_noise_test =  meanImputation.input(df_final_resid, df_final_resid_test)
df_seasonal_train, df_seasonal_test = splineInterpolation.input(df_final_seasonal, df_final_seasonal_test)
df_trended_train, df_trended_test = forwardFilling.input(df_final_trend, df_final_trend_test)

df_final_train = pd.DataFrame(index = df_filled.index, columns = df_filled.columns)
df_final_test = pd.DataFrame(index = df_filled_test.index, columns = df_filled_test.columns)
for col in df_filled.columns:
    if decomp_type_dict_train[col] == "additive":
        df_final_train[col] = df_white_noise_train[col] + df_seasonal_train[col] + df_trended_train[col]
    else:
        df_final_train[col] = df_white_noise_train[col] * df_seasonal_train[col] * df_trended_train[col]
    if decomp_type_dict_test[col] == "additive":
        df_final_test[col] = df_white_noise_test[col] + df_seasonal_test[col] + df_trended_test[col]
    else:
        df_final_test[col] = df_white_noise_test[col] * df_seasonal_test[col] * df_trended_test[col]

### USE KALMAN FILTER TO SMOOTH ALL DATA (ONLY VISUALIZATION PURPOSE) ###

smoother_train = KalmanSmoother(component='level_longseason', 
                          component_noise={'level':0.1, 'longseason':0.1}, 
                          n_longseasons=365)
smoother_train.smooth(df_final_train.T)

smoother_test = KalmanSmoother(component='level_longseason', 
                          component_noise={'level':0.1, 'longseason':0.1}, 
                          n_longseasons=365)
smoother_test.smooth(df_final_test.T)

df_filled = pd.DataFrame(data = smoother_train.smooth_data.T, index = df_train.index, columns= df_train.columns)
df_filled_test = pd.DataFrame(data = smoother_test.smooth_data.T, index = df_test.index, columns= df_test.columns)


df_filled.to_csv(r'output\/CompleteCovidTrainDecompKalman_0.csv', index = True)
df_filled_test.to_csv(r'output\/CompleteCovidTestDecompKalman_0.csv', index = True)

multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is n

Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative s

Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative s

multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is n

additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multipl

Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
M

additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multipl

In [5]:
df_filled

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-22 09:00:00,88.260251,38.158827,215.101408,36.509017,5.981773,-31.786029,5.779440,1.028195e-08,0.237501,7.094856,-5.255338e-08,0.518457,0.161525,0.0,0.0
2020-01-22 09:30:00,98.235064,42.537796,238.381402,36.316195,5.944033,-31.520517,5.792284,1.018063e-08,0.238825,7.104483,-5.203129e-08,0.519433,0.163107,0.0,0.0
2020-01-22 10:00:00,113.485095,48.899112,272.064639,35.979167,5.881310,-31.100935,5.798041,9.915459e-09,0.240176,7.102840,-5.124467e-08,0.519689,0.164676,0.0,0.0
2020-01-22 10:30:00,130.665094,55.927338,308.977277,35.551995,5.803061,-30.585890,5.799039,9.560054e-09,0.241510,7.094071,-5.029335e-08,0.519488,0.166229,0.0,0.0
2020-01-22 11:00:00,147.720032,62.811800,344.719279,35.069377,5.715356,-30.012987,5.796636,9.161216e-09,0.242791,7.080835,-4.924141e-08,0.518999,0.167768,0.0,0.0
2020-01-22 11:30:00,163.426708,69.064885,376.665566,34.553579,5.622086,-29.406350,5.791489,8.748424e-09,0.243983,7.064838,-4.812995e-08,0.518330,0.169290,0.0,0.0
2020-01-22 12:00:00,177.101340,74.404800,403.321356,34.018884,5.525749,-28.781443,5.783686,8.339897e-09,0.245049,7.047178,-4.698535e-08,0.517551,0.170797,0.0,0.0
2020-01-22 12:30:00,188.414795,78.679413,423.903032,33.474450,5.427944,-28.148170,5.772767,7.946397e-09,0.245933,7.028554,-4.582450e-08,0.516707,0.172286,0.0,0.0
2020-01-22 13:00:00,197.278587,81.815956,438.060925,32.926143,5.329693,-27.512869,5.757627,7.573548e-09,0.246560,7.009419,-4.465824e-08,0.515826,0.173759,0.0,0.0
2020-01-22 13:30:00,203.778889,83.786562,445.688682,32.377718,5.231651,-26.879584,5.736298,7.223132e-09,0.246812,6.990061,-4.349348e-08,0.514927,0.175213,0.0,0.0


In [6]:
df_filled_test

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-02-26 02:30:00,2.975713e+192,2.509729e+190,1.363150e+191,5.339803e+133,4.661303e+128,4.402854e+128,6.212737e+120,2.878805e+116,-1.174946e+117,-1.428587e+60,1.307947e+56,-9.749614e+57,6.517454e+53,-7.411458e+49,-1.080839e+51
2020-02-26 03:00:00,1.859128e+192,1.567996e+190,8.516512e+190,3.394702e+133,2.963312e+128,2.799009e+128,3.966217e+120,1.837832e+116,-7.500866e+116,-9.119292e+59,8.349197e+55,-6.223604e+57,4.162855e+53,-4.733877e+49,-6.903578e+50
2020-02-26 03:30:00,1.141739e+192,9.629472e+189,5.230212e+190,2.145681e+133,1.872968e+128,1.769120e+128,2.523841e+120,1.169476e+116,-4.773060e+116,-5.802086e+59,5.312119e+55,-3.959725e+57,2.651111e+53,-3.014766e+49,-4.396539e+50
2020-02-26 04:00:00,6.806445e+191,5.740584e+189,3.117977e+190,1.343569e+133,1.172758e+128,1.107733e+128,1.597757e+120,7.403551e+115,-3.021661e+116,-3.672250e+59,3.362141e+55,-2.506185e+57,1.680515e+53,-1.911033e+49,-2.786926e+50
2020-02-26 04:30:00,3.840927e+191,3.239453e+189,1.759497e+190,8.283834e+132,7.230210e+127,6.829326e+127,1.003147e+120,4.648299e+115,-1.897141e+116,-2.304738e+59,2.110111e+55,-1.572905e+57,1.057351e+53,-1.202389e+49,-1.753486e+50
2020-02-26 05:00:00,1.931726e+191,1.629225e+189,8.849075e+189,4.974038e+132,4.340885e+127,4.100202e+127,6.213457e+119,2.879139e+115,-1.175082e+116,-1.426643e+59,1.306167e+55,-9.736347e+56,6.572411e+52,-7.473956e+48,-1.089953e+50
2020-02-26 05:30:00,7.005944e+190,5.908836e+188,3.209362e+189,2.846773e+132,2.483859e+127,2.346140e+127,3.761606e+119,1.743021e+115,-7.113908e+115,-8.627377e+58,7.898823e+54,-5.887889e+56,4.003251e+52,-4.552381e+48,-6.638897e+49
2020-02-26 06:00:00,-9.532721e+189,-8.040009e+187,-4.366901e+188,1.478576e+132,1.289466e+127,1.217971e+127,2.186720e+119,1.013264e+115,-4.135501e+115,-5.005168e+58,4.582498e+54,-3.415855e+56,2.353280e+52,-2.676083e+48,-3.902625e+49
2020-02-26 06:30:00,-6.119721e+190,-5.161406e+188,-2.803398e+189,5.975518e+131,5.203548e+126,4.915034e+126,1.174697e+119,5.443211e+114,-2.221574e+115,-2.677428e+58,2.451328e+54,-1.827253e+56,1.293276e+52,-1.470678e+48,-2.144741e+49
2020-02-26 07:00:00,-9.494605e+190,-8.007797e+188,-4.349405e+189,2.912498e+130,2.412704e+125,2.278930e+125,5.238591e+118,2.427414e+114,-9.907164e+114,-1.180340e+58,1.080664e+54,-8.055413e+55,6.118534e+51,-6.957834e+47,-1.014685e+49
