In [6]:
from imputationLibrary import decompose, forwardFilling, hotDeck, meanImputation, movingAverage, splineInterpolation, randomSampleImputation, nature
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from sklearn import preprocessing
from datetime import datetime, timedelta

In [7]:
INPUT = 'output/\CompleteCovid'

In [26]:
df = pd.read_csv(r'output\/CompleteCovid.csv',index_col=0, parse_dates=True)
df_train = df.loc['2020-01-22 09:00:00':'2020-02-26 02:00:00'].dropna(how='all', axis=1) #Selecting training data
df_test = df.loc['2020-02-26 02:30:00':, df_train.columns]  #Selecting test data
period=30
df_filled = df_train.fillna(0)
df_filled_test = df_test.fillna(0)

In [27]:
df_decomposed_resid = pd.DataFrame()
df_decomposed_trend = pd.DataFrame()
df_decomposed_seasonal = pd.DataFrame()

df_decomposed_resid_test = pd.DataFrame()
df_decomposed_trend_test = pd.DataFrame()
df_decomposed_seasonal_test = pd.DataFrame()

df_nan = df_train.isna()
df_nan_test = df_test.isna()

In [28]:
def plot_ac(df, name):
    df = df.astype(float)
    #scaled = preprocessing.StandardScaler().fit_transform([np.array(df.fillna(0))])
    normalized = preprocessing.normalize(np.array([np.array(df.fillna(0))]))
    corr = signal.correlate(normalized, normalized, mode='full')
    plt.plot(corr[0], 'o-', markersize=2)
    plt.axhline(2/np.sqrt(len(df)), ls=':')
    plt.axhline(-2/np.sqrt(len(df)), ls=':')
    plt.title(name + ' auto-correlation')
    plt.show()

In [29]:
def decompose_and_plot(ts, flag_plot = False):
    ts_decomposed = decompose.additive(ts, period)
    if flag_plot:
        ts_decomposed.plot()
        plt.show()
        print("Resid is white noise? ", nature.isWhiteNoise(ts_decomposed.resid))
        print("Resid is seasonal noise? ", nature.isSeasonal(ts_decomposed.resid))
        print("Resid is trended noise? ", nature.isTrended(ts_decomposed.resid, period))
        print("Resid is seasonal and trended noise? ", nature.isTrendedAndSeasonal(ts_decomposed.resid, period))
        plot_ac(ts_decomposed.resid, "Resid")
        plot_ac(ts_decomposed.trend, "Trend")
        plot_ac(ts_decomposed.seasonal, "Seasonal")
    return ts_decomposed.resid, ts_decomposed.trend, ts_decomposed.seasonal
    

In [30]:
for col in df_filled.columns:
    resid, trend, seasonal = decompose_and_plot(df_filled.loc[:,col])
    df_decomposed_resid[col] = resid
    df_decomposed_trend[col] = trend
    df_decomposed_seasonal[col] = seasonal

for col in df_filled_test.columns:
    resid, trend, seasonal = decompose_and_plot(df_filled_test.loc[:,col])
    df_decomposed_resid_test[col] = resid
    df_decomposed_trend_test[col] = trend
    df_decomposed_seasonal_test[col] = seasonal

df_final_resid = df_decomposed_resid.mask(df_nan == True, np.nan)
df_final_trend = df_decomposed_trend.mask(df_nan == True, np.nan)
df_final_seasonal = df_decomposed_seasonal.mask(df_nan == True, np.nan)

df_final_resid_test = df_decomposed_resid_test.mask(df_nan_test == True, np.nan)
df_final_trend_test = df_decomposed_trend_test.mask(df_nan_test == True, np.nan)
df_final_seasonal_test = df_decomposed_seasonal_test.mask(df_nan_test == True, np.nan)

df_white_noise_train, df_white_noise_test =  meanImputation.input(df_final_resid, df_final_resid_test)
df_seasonal_train, df_seasonal_test = splineInterpolation.input(df_final_seasonal, df_final_seasonal_test)
df_trended_train, df_trended_test = forwardFilling.input(df_final_trend, df_final_trend_test)

df_final_train = df_white_noise_train + df_seasonal_train + df_trended_train
df_final_test = df_white_noise_test + df_seasonal_test + df_trended_test

df_final_train.to_csv(r'output\/CompleteCovidTrain_decomp_add_0.csv', index = True)
df_final_test.loc['2020-02-26 02:30:00':, df_train.columns].to_csv(r'output\/CompleteCovidTest_decomp_add_0.csv', index = True)

In [31]:
df_final_test

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-02-26 02:30:00,-6.440632e+04,-2.149915e+03,-1.139766e+04,1.155122e+02,2.928264e+00,4.636182e+00,-4.755629e+01,0.000000e+00,1.108984e+00,1.838061e+00,0.000000e+00,-3.938935e-01,1.260508e+00,-6.773453e-03,-8.254480e-03
2020-02-26 03:00:00,-1.300164e+05,-4.348967e+03,-2.330555e+04,1.337625e+02,3.405472e+00,5.103204e+00,-5.010207e+01,0.000000e+00,1.151405e+00,1.786742e+00,0.000000e+00,-4.031782e-01,1.260996e+00,-6.786137e-03,-8.269938e-03
2020-02-26 03:30:00,4.444063e+03,1.508042e+02,1.205594e+03,1.546223e+02,3.951994e+00,5.631094e+00,-5.272282e+01,0.000000e+00,1.195157e+00,1.735094e+00,0.000000e+00,-4.125195e-01,1.261484e+00,-6.798821e-03,-8.285395e-03
2020-02-26 04:00:00,6.274355e+05,2.101338e+04,1.146663e+05,1.782534e+02,4.572153e+00,6.223485e+00,-5.541960e+01,0.000000e+00,1.240260e+00,1.683114e+00,0.000000e+00,-4.219176e-01,1.261972e+00,-6.811506e-03,-8.300853e-03
2020-02-26 04:30:00,2.027418e+06,6.790276e+04,3.696073e+05,2.048175e+02,5.270271e+00,6.884006e+00,-5.819347e+01,0.000000e+00,1.286732e+00,1.630801e+00,0.000000e+00,-4.313727e-01,1.262460e+00,-6.824190e-03,-8.316311e-03
2020-02-26 05:00:00,4.930856e+03,1.716397e+02,2.004856e+03,2.344765e+02,6.050670e+00,7.616286e+00,-6.104548e+01,0.000000e+00,1.334592e+00,1.578156e+00,0.000000e+00,-4.408848e-01,1.262948e+00,-6.836874e-03,-8.331769e-03
2020-02-26 05:30:00,-1.084202e+07,-3.631030e+05,-1.970959e+06,2.673920e+02,6.917672e+00,8.423957e+00,-6.397670e+01,0.000000e+00,1.383862e+00,1.525176e+00,0.000000e+00,-4.504542e-01,1.263436e+00,-6.849559e-03,-8.347227e-03
2020-02-26 06:00:00,-3.591544e+07,-1.202844e+06,-6.532104e+06,3.037259e+02,7.875600e+00,9.310647e+00,-6.698818e+01,0.000000e+00,1.434559e+00,1.471861e+00,0.000000e+00,-4.600811e-01,1.263923e+00,-6.862243e-03,-8.362684e-03
2020-02-26 06:30:00,-8.061732e+07,-2.699975e+06,-1.466425e+07,3.436399e+02,8.928775e+00,1.027999e+01,-7.008097e+01,0.000000e+00,1.486704e+00,1.418210e+00,0.000000e+00,-4.697655e-01,1.264411e+00,-6.874928e-03,-8.378142e-03
2020-02-26 07:00:00,1.623124e+03,3.382375e+01,-3.137552e+01,3.872959e+02,1.008152e+01,1.133561e+01,-7.325614e+01,0.000000e+00,1.540315e+00,1.364221e+00,0.000000e+00,-4.795077e-01,1.264899e+00,-6.887612e-03,-8.393600e-03


In [32]:
df_final_train

Unnamed: 0_level_0,Asia_confirmed_cases,Asia_deaths,Asia_recovered,Europe_confirmed_cases,Europe_deaths,Europe_recovered,Americas_confirmed_cases,Americas_deaths,Americas_recovered,Oceania_confirmed_cases,Oceania_deaths,Oceania_recovered,Africa_confirmed_cases,Africa_deaths,Africa_recovered
update_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-22 09:00:00,778.952840,17.721424,1.054870e+02,148.621111,6.175199,-32.981784,5.832064,0.0,0.237004,7.180964,0.0,0.523936,,,
2020-01-22 09:30:00,1332.359900,32.604766,1.862203e+02,146.247203,6.063851,-32.260113,5.826093,0.0,0.238645,7.158330,0.0,0.522863,,,
2020-01-22 10:00:00,1812.303090,45.529847,2.557015e+02,143.903831,5.953964,-31.548267,5.820228,0.0,0.240222,7.136006,0.0,0.521805,,,
2020-01-22 10:30:00,2222.562387,56.596364,3.145348e+02,141.590802,5.845528,-30.846183,5.814469,0.0,0.241735,7.113989,0.0,0.520762,,,
2020-01-22 11:00:00,2566.917770,65.904013,3.633247e+02,139.307924,5.738534,-30.153793,5.808815,0.0,0.243186,7.092279,0.0,0.519734,,,
2020-01-22 11:30:00,2849.149217,73.552490,4.026753e+02,137.055006,5.632973,-29.471033,5.803265,0.0,0.244576,7.070873,0.0,0.518721,,,
2020-01-22 12:00:00,3073.036707,79.641491,4.331912e+02,134.831856,5.528835,-28.797835,5.797819,0.0,0.245905,7.049769,0.0,0.517723,,,
2020-01-22 12:30:00,3242.360218,84.270714,4.554765e+02,132.638283,5.426111,-28.134136,5.792476,0.0,0.247176,7.028965,0.0,0.516740,,,
2020-01-22 13:00:00,3360.899728,87.539854,4.701356e+02,130.474094,5.324791,-27.479868,5.787236,0.0,0.248388,7.008460,0.0,0.515772,,,
2020-01-22 13:30:00,3432.435216,89.548607,4.777729e+02,128.339099,5.224866,-26.834967,5.782098,0.0,0.249544,6.988251,0.0,0.514818,,,
