In [1]:
from imputationLibrary import decompose, forwardFilling, hotDeck, meanImputation, movingAverage, splineInterpolation, randomSampleImputation, nature
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from sklearn import preprocessing
from datetime import datetime, timedelta

In [2]:
INPUT = 'output/\CompleteCovid'

In [3]:
df = pd.read_csv(r'output\/CompleteCovid.csv',index_col=0, parse_dates=True)
df_train = df.loc['2020-01-22 09:00:00':'2020-02-26 02:00:00'].dropna(how='all', axis=1) #Selecting training data
df_test = df.loc['2020-02-26 02:30:00':, df_train.columns]  #Selecting test data
period=30
df_filled = df_train.fillna(0.000001)
df_filled_test = df_test.fillna(0.000001)

In [4]:
df_decomposed_resid = pd.DataFrame()
df_decomposed_trend = pd.DataFrame()
df_decomposed_seasonal = pd.DataFrame()

df_decomposed_resid_test = pd.DataFrame()
df_decomposed_trend_test = pd.DataFrame()
df_decomposed_seasonal_test = pd.DataFrame()

df_nan = df_train.isna()
df_nan_test = df_test.isna()

In [5]:
def plot_ac(df, name):
    df = df.astype(float)
    #scaled = preprocessing.StandardScaler().fit_transform([np.array(df.fillna(0))])
    normalized = preprocessing.normalize(np.array([np.array(df.fillna(0))]))
    corr = signal.correlate(normalized, normalized, mode='full')
    plt.plot(corr[0], 'o-', markersize=2)
    plt.axhline(2/np.sqrt(len(df)), ls=':')
    plt.axhline(-2/np.sqrt(len(df)), ls=':')
    plt.title(name + ' auto-correlation')
    plt.show()

In [6]:
def decompose_and_plot(ts, flag_plot = False):
    ts_decomposed, decomp_type = decompose.decompose(ts, period)
    print(decomp_type)
    if flag_plot:
        ts_decomposed.plot()
        plt.show()
        print("Resid is white noise? ", nature.isWhiteNoise(ts_decomposed.resid))
        print("Resid is seasonal noise? ", nature.isSeasonal(ts_decomposed.resid))
        print("Resid is trended noise? ", nature.isTrended(ts_decomposed.resid, period))
        print("Resid is seasonal and trended noise? ", nature.isTrendedAndSeasonal(ts_decomposed.resid, period))
        plot_ac(ts_decomposed.resid, "Resid")
        plot_ac(ts_decomposed.trend, "Trend")
        plot_ac(ts_decomposed.seasonal, "Seasonal")
    return ts_decomposed.resid, ts_decomposed.trend, ts_decomposed.seasonal, decomp_type
    

In [7]:
decomp_type_dict_train = {}
decomp_type_dict_test = {}
for col in df_filled.columns:
    resid, trend, seasonal, decomp_type = decompose_and_plot(df_filled.loc[:,col])
    decomp_type_dict_train[col] = decomp_type
    df_decomposed_resid[col] = resid
    df_decomposed_trend[col] = trend
    df_decomposed_seasonal[col] = seasonal

for col in df_filled_test.columns:
    resid, trend, seasonal, decomp_type = decompose_and_plot(df_filled_test.loc[:,col])
    decomp_type_dict_test[col] = decomp_type
    df_decomposed_resid_test[col] = resid
    df_decomposed_trend_test[col] = trend
    df_decomposed_seasonal_test[col] = seasonal

df_final_resid = df_decomposed_resid.mask(df_nan == True, np.nan)
df_final_trend = df_decomposed_trend.mask(df_nan == True, np.nan)
df_final_seasonal = df_decomposed_seasonal.mask(df_nan == True, np.nan)

df_final_resid_test = df_decomposed_resid_test.mask(df_nan_test == True, np.nan)
df_final_trend_test = df_decomposed_trend_test.mask(df_nan_test == True, np.nan)
df_final_seasonal_test = df_decomposed_seasonal_test.mask(df_nan_test == True, np.nan)

df_white_noise_train, df_white_noise_test =  meanImputation.input(df_final_resid, df_final_resid_test)
df_seasonal_train, df_seasonal_test = splineInterpolation.input(df_final_seasonal, df_final_seasonal_test)
df_trended_train, df_trended_test = forwardFilling.input(df_final_trend, df_final_trend_test)

df_final_train = pd.DataFrame(index = df_filled.index, columns = df_filled.columns)
df_final_test = pd.DataFrame(index = df_filled_test.index, columns = df_filled_test.columns)
for col in df_filled.columns:
    if decomp_type_dict_train[col] == "additive":
        df_final_train[col] = df_white_noise_train[col] + df_seasonal_train[col] + df_trended_train[col]
    else:
        df_final_train[col] = df_white_noise_train[col] * df_seasonal_train[col] * df_trended_train[col]
    
    if decomp_type_dict_test[col] == "additive":
        df_final_test[col] = df_white_noise_test[col] + df_seasonal_test[col] + df_trended_test[col]
    else:
        df_final_test[col] = df_white_noise_test[col] * df_seasonal_test[col] * df_trended_test[col]

df_final_train.to_csv(r'output\/CompleteCovidTrain_decomp_0.csv', index = True)
df_final_test.loc['2020-02-26 02:30:00':, df_train.columns].to_csv(r'output\/CompleteCovidTest_decomp_0.csv', index = True)

multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is not appropriate for zero and negative values
additive
multiplicative
Multiplicative seasonality is not appropriate for zero and negative values
additive
Multiplicative seasonality is n

In [8]:
df_final_test

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-02-26 02:30:00,-1.104570e+05,-2.149915e+03,-1.139766e+04,7.707450e+03,2.928263e+00,4.636181e+00,-4.458158e+02,1.768513e-06,1.108985e+00,-6.692082e+00,4.842411e-07,-3.938930e-01,1.773743e+00,-6.773488e-03,-8.254515e-03
2020-02-26 03:00:00,-2.148418e+05,-4.348967e+03,-2.330555e+04,9.378470e+03,3.405470e+00,5.103202e+00,-4.665727e+02,1.851475e-06,1.151407e+00,-6.766532e+00,4.895899e-07,-4.031777e-01,1.775192e+00,-6.786172e-03,-8.269973e-03
2020-02-26 03:30:00,7.463075e+03,1.508042e+02,1.205594e+03,1.127169e+04,3.951992e+00,5.631093e+00,-4.879409e+02,1.936873e-06,1.195159e+00,-6.841462e+00,4.949736e-07,-4.125190e-01,1.776641e+00,-6.798857e-03,-8.285431e-03
2020-02-26 04:00:00,1.024162e+06,2.101338e+04,1.146663e+05,1.340050e+04,4.572150e+00,6.223483e+00,-5.099290e+02,2.024742e-06,1.240262e+00,-6.916873e+00,5.003923e-07,-4.219171e-01,1.778090e+00,-6.811541e-03,-8.300888e-03
2020-02-26 04:30:00,3.302960e+06,6.790276e+04,3.696073e+05,1.577829e+04,5.270268e+00,6.884003e+00,-5.325456e+02,2.115115e-06,1.286734e+00,-6.992765e+00,5.058463e-07,-4.313721e-01,1.779539e+00,-6.824225e-03,-8.316346e-03
2020-02-26 05:00:00,7.176976e+03,1.716397e+02,2.004856e+03,1.841843e+04,6.050667e+00,7.616283e+00,-5.557994e+02,2.208027e-06,1.334595e+00,-7.069142e+00,5.113355e-07,-4.408843e-01,1.780988e+00,-6.836910e-03,-8.331804e-03
2020-02-26 05:30:00,-1.765707e+07,-3.631030e+05,-1.970959e+06,2.133432e+04,6.917669e+00,8.423953e+00,-5.796990e+02,2.303513e-06,1.383864e+00,-7.146003e+00,5.168602e-07,-4.504537e-01,1.782437e+00,-6.849594e-03,-8.347262e-03
2020-02-26 06:00:00,-5.848367e+07,-1.202844e+06,-6.532104e+06,2.453933e+04,7.875596e+00,9.310643e+00,-6.042530e+02,2.401606e-06,1.434561e+00,-7.223350e+00,5.224203e-07,-4.600805e-01,1.783886e+00,-6.862279e-03,-8.362720e-03
2020-02-26 06:30:00,-1.312665e+08,-2.699975e+06,-1.466425e+07,2.804686e+04,8.928770e+00,1.027998e+01,-6.294700e+02,2.502340e-06,1.486706e+00,-7.301185e+00,5.280160e-07,-4.697650e-01,1.785334e+00,-6.874963e-03,-8.378178e-03
2020-02-26 07:00:00,1.973353e+03,3.382375e+01,-3.137552e+01,3.187029e+04,1.008151e+01,1.133560e+01,-6.553586e+02,2.605751e-06,1.540317e+00,-7.379510e+00,5.336475e-07,-4.795072e-01,1.786783e+00,-6.887648e-03,-8.393636e-03


In [9]:
df_final_train

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-22 09:00:00,35.078071,17.721425,1.054870e+02,37.620183,6.175199,-32.981784,5.832064,1.167328e-08,2.370044e-01,7.180964,-5.458916e-08,0.523936,,,
2020-01-22 09:30:00,71.789216,32.604766,1.862203e+02,37.006575,6.063852,-32.260112,5.826093,1.099566e-08,2.386452e-01,7.158330,-5.329006e-08,0.522863,,,
2020-01-22 10:00:00,103.732815,45.529847,2.557015e+02,36.399882,5.953964,-31.548267,5.820228,1.036299e-08,2.402217e-01,7.136006,-5.200555e-08,0.521805,,,
2020-01-22 10:30:00,131.150532,56.596364,3.145348e+02,35.800069,5.845529,-30.846182,5.814469,9.774438e-09,2.417349e-01,7.113989,-5.073553e-08,0.520762,,,
2020-01-22 11:00:00,154.284031,65.904013,3.633247e+02,35.207098,5.738535,-30.153793,5.808815,9.229134e-09,2.431859e-01,7.092279,-4.947993e-08,0.519734,,,
2020-01-22 11:30:00,173.374975,73.552490,4.026753e+02,34.620934,5.632974,-29.471032,5.803265,8.726224e-09,2.445756e-01,7.070873,-4.823868e-08,0.518721,,,
2020-01-22 12:00:00,188.665028,79.641492,4.331912e+02,34.041540,5.528836,-28.797835,5.797819,8.264849e-09,2.459053e-01,7.049769,-4.701168e-08,0.517723,,,
2020-01-22 12:30:00,200.395854,84.270714,4.554765e+02,33.468881,5.426111,-28.134136,5.792476,7.844152e-09,2.471758e-01,7.028965,-4.579887e-08,0.516740,,,
2020-01-22 13:00:00,208.809118,87.539854,4.701356e+02,32.902919,5.324791,-27.479868,5.787236,7.463276e-09,2.483883e-01,7.008460,-4.460015e-08,0.515772,,,
2020-01-22 13:30:00,214.146482,89.548607,4.777729e+02,32.343620,5.224867,-26.834966,5.782098,7.121361e-09,2.495438e-01,6.988251,-4.341546e-08,0.514818,,,
