In [1]:
import numpy as np
import pandas as pd
import sqlalchemy as sa

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
engine = sa.create_engine('oracle://"MHaleta":trankvilisator@localhost:1521/xe', max_identifier_length=128)

In [3]:
query = """
    select
        cov."Date",
        cov."New_Cases",
        m."Retail_and_Recreation",
        m."Grocery_and_Pharmacy",
        m."Parks",
        m."Transit_Stations",
        m."Workplaces",
        m."Residentials"
    from
        "Covid19_data" cov
        inner join "Countries" c on cov."ISO3_Code" = c."ISO3_Code"
        left join "Movements_Changes_from_Baseline" m on c."ISO2_Code" = m."ISO2_Code" and cov."Date" = m."Date"
    where
        cov."ISO3_Code" = 'UKR'
        and cov."Date" >= '01.05.2020'
    order by cov."Date"
"""
df = pd.read_sql(query, engine)

In [4]:
pd.set_option('display.max_rows', 200)
df

Unnamed: 0,Date,New_Cases,Retail_and_Recreation,Grocery_and_Pharmacy,Parks,Transit_Stations,Workplaces,Residentials
0,2020-05-01,455,-46.0,-18.0,15.0,-46.0,-66.0,15.0
1,2020-05-02,550,-50.0,-24.0,9.0,-43.0,-32.0,5.0
2,2020-05-03,502,-50.0,-22.0,-3.0,-43.0,-22.0,3.0
3,2020-05-04,418,-39.0,-17.0,2.0,-42.0,-40.0,12.0
4,2020-05-05,366,-38.0,-15.0,4.0,-41.0,-40.0,12.0
5,2020-05-06,487,-37.0,-12.0,0.0,-40.0,-40.0,12.0
6,2020-05-07,507,-37.0,-11.0,1.0,-39.0,-40.0,12.0
7,2020-05-08,504,-35.0,-6.0,5.0,-36.0,-38.0,12.0
8,2020-05-09,515,-42.0,-10.0,25.0,-34.0,-27.0,3.0
9,2020-05-10,522,-40.0,-10.0,27.0,-32.0,-13.0,1.0


In [5]:
scaler = MinMaxScaler(feature_range=(0, 1))
ds_not_scaled = df.iloc[:,1].values
ds_scaled = scaler.fit_transform(df.iloc[:,1].values.reshape(-1,1)).reshape(-1)

In [21]:
X_not_scaled = np.concatenate((
    np.vstack(ds_not_scaled[:-14]),
    np.vstack(ds_not_scaled[1:-13]),
    np.vstack(ds_not_scaled[2:-12]),
    np.vstack(ds_not_scaled[3:-11]),
    np.vstack(ds_not_scaled[4:-10]),
    np.vstack(ds_not_scaled[5:-9]),
    np.vstack(ds_not_scaled[6:-8]),
    np.vstack(ds_not_scaled[7:-7])
), axis=1)

X_scaled = np.concatenate((
    np.vstack(ds_scaled[:-14]),
    np.vstack(ds_scaled[1:-13]),
    np.vstack(ds_scaled[2:-12]),
    np.vstack(ds_scaled[3:-11]),
    np.vstack(ds_scaled[4:-10]),
    np.vstack(ds_scaled[5:-9]),
    np.vstack(ds_scaled[6:-8]),
    np.vstack(ds_scaled[7:-7])
), axis=1)


y_not_scaled = np.concatenate((
    np.vstack(ds_not_scaled[8:-6]),
    np.vstack(ds_not_scaled[9:-5]),
    np.vstack(ds_not_scaled[10:-4]),
    np.vstack(ds_not_scaled[11:-3]),
    np.vstack(ds_not_scaled[12:-2]),
    np.vstack(ds_not_scaled[13:-1]),
    np.vstack(ds_not_scaled[14:]),
), axis=1)

y_scaled = np.concatenate((
    np.vstack(ds_scaled[8:-6]),
    np.vstack(ds_scaled[9:-5]),
    np.vstack(ds_scaled[10:-4]),
    np.vstack(ds_scaled[11:-3]),
    np.vstack(ds_scaled[12:-2]),
    np.vstack(ds_scaled[13:-1]),
    np.vstack(ds_scaled[14:]),
), axis=1)

In [22]:
X_train_not_scaled, X_test_not_scaled = X_not_scaled[:int(X_not_scaled.shape[0]*0.8), :], X_not_scaled[int(X_not_scaled.shape[0]*0.8):, :]
y_train_not_scaled, y_test_not_scaled = y_not_scaled[:int(y_not_scaled.shape[0]*0.8), :], y_not_scaled[int(y_not_scaled.shape[0]*0.8):, :]

X_train_scaled, X_test_scaled = X_scaled[:int(X_scaled.shape[0]*0.8), :], X_scaled[int(X_scaled.shape[0]*0.8):, :]
y_train_scaled, y_test_scaled = y_scaled[:int(y_scaled.shape[0]*0.8), :], y_scaled[int(y_scaled.shape[0]*0.8):, :]

X_not_scaled, y_not_scaled = ds_not_scaled, ds_not_scaled[:,0].reshape(-1)
X_scaled, y_scaled = ds_scaled, ds_scaled[:,0].reshape(-1)

X_not_scaled[:,0]

X_train_scaled, X_test_scaled = X_scaled[:int(len(X_scaled)*0.67), :], X_scaled[int(len(X_scaled)*0.67):, :]
y_train_scaled, y_test_scaled = y_scaled[:int(len(y_scaled)*0.67)], y_scaled[int(len(y_scaled)*0.67):]

X_train_not_scaled, X_test_not_scaled = X_not_scaled[:int(len(X_not_scaled)*0.67), :], X_not_scaled[int(len(X_not_scaled)*0.67):, :]
y_train_not_scaled, y_test_not_scaled = y_not_scaled[:int(len(y_not_scaled)*0.67)], y_not_scaled[int(len(y_not_scaled)*0.67):]

In [23]:
X_train_not_scaled = np.reshape(X_train_not_scaled, (X_train_not_scaled.shape[0], 1, X_train_not_scaled.shape[1]))
X_test_not_scaled = np.reshape(X_test_not_scaled, (X_test_not_scaled.shape[0], 1, X_test_not_scaled.shape[1]))

X_train_scaled = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

y_train_scaled = np.concatenate((
    np.vstack(y_train_scaled[7:-7]),
    np.vstack(y_train_scaled[1:-6]),
    np.vstack(y_train_scaled[2:-5]),
    np.vstack(y_train_scaled[3:-4]),
    np.vstack(y_train_scaled[4:-3]),
    np.vstack(y_train_scaled[5:-2]),
    np.vstack(y_train_scaled[6:-1]),
    np.vstack(y_train_scaled[7:]),
#     np.vstack(y_train_scaled[8:-1]),
#     np.vstack(y_train_scaled[9:])
), axis=1)

In [65]:
def loss_func(y_true, y_pred):
    return np.sum(np.mean((y_true-y_pred)**2, axis=1))

In [54]:
model = Sequential()
model.add(LSTM(50, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dense(7))
model.compile(loss="mean_squared_error", optimizer='adam')
model.fit(X_train_scaled, y_train_scaled, epochs=500, batch_size=7, verbose=2)

Epoch 1/500
135/135 - 3s - loss: 0.0197
Epoch 2/500
135/135 - 0s - loss: 0.0104
Epoch 3/500
135/135 - 0s - loss: 0.0051
Epoch 4/500
135/135 - 0s - loss: 0.0035
Epoch 5/500
135/135 - 0s - loss: 0.0026
Epoch 6/500
135/135 - 0s - loss: 0.0019
Epoch 7/500
135/135 - 0s - loss: 0.0014
Epoch 8/500
135/135 - 0s - loss: 0.0011
Epoch 9/500
135/135 - 0s - loss: 9.3774e-04
Epoch 10/500
135/135 - 0s - loss: 8.4931e-04
Epoch 11/500
135/135 - 0s - loss: 7.8958e-04
Epoch 12/500
135/135 - 0s - loss: 7.7149e-04
Epoch 13/500
135/135 - 0s - loss: 7.5849e-04
Epoch 14/500
135/135 - 0s - loss: 7.4847e-04
Epoch 15/500
135/135 - 0s - loss: 7.4757e-04
Epoch 16/500
135/135 - 0s - loss: 7.4248e-04
Epoch 17/500
135/135 - 0s - loss: 7.3490e-04
Epoch 18/500
135/135 - 0s - loss: 7.2729e-04
Epoch 19/500
135/135 - 0s - loss: 7.2821e-04
Epoch 20/500
135/135 - 0s - loss: 7.1679e-04
Epoch 21/500
135/135 - 0s - loss: 7.1811e-04
Epoch 22/500
135/135 - 0s - loss: 7.1222e-04
Epoch 23/500
135/135 - 0s - loss: 7.0457e-04
Epoch 

135/135 - 0s - loss: 4.0723e-04
Epoch 183/500
135/135 - 0s - loss: 4.2154e-04
Epoch 184/500
135/135 - 0s - loss: 4.2592e-04
Epoch 185/500
135/135 - 0s - loss: 4.0089e-04
Epoch 186/500
135/135 - 0s - loss: 4.1386e-04
Epoch 187/500
135/135 - 0s - loss: 4.1408e-04
Epoch 188/500
135/135 - 0s - loss: 4.1629e-04
Epoch 189/500
135/135 - 0s - loss: 4.1036e-04
Epoch 190/500
135/135 - 0s - loss: 4.2750e-04
Epoch 191/500
135/135 - 0s - loss: 4.1920e-04
Epoch 192/500
135/135 - 0s - loss: 4.1545e-04
Epoch 193/500
135/135 - 0s - loss: 4.1458e-04
Epoch 194/500
135/135 - 0s - loss: 4.1801e-04
Epoch 195/500
135/135 - 0s - loss: 4.1994e-04
Epoch 196/500
135/135 - 0s - loss: 4.1076e-04
Epoch 197/500
135/135 - 0s - loss: 4.5563e-04
Epoch 198/500
135/135 - 0s - loss: 4.1120e-04
Epoch 199/500
135/135 - 0s - loss: 4.1193e-04
Epoch 200/500
135/135 - 0s - loss: 4.0621e-04
Epoch 201/500
135/135 - 0s - loss: 4.1288e-04
Epoch 202/500
135/135 - 0s - loss: 4.4351e-04
Epoch 203/500
135/135 - 0s - loss: 4.1911e-04
Ep

Epoch 361/500
135/135 - 0s - loss: 3.9336e-04
Epoch 362/500
135/135 - 0s - loss: 3.9853e-04
Epoch 363/500
135/135 - 0s - loss: 4.3561e-04
Epoch 364/500
135/135 - 0s - loss: 3.9750e-04
Epoch 365/500
135/135 - 0s - loss: 4.1586e-04
Epoch 366/500
135/135 - 0s - loss: 4.2280e-04
Epoch 367/500
135/135 - 0s - loss: 4.0260e-04
Epoch 368/500
135/135 - 0s - loss: 3.9398e-04
Epoch 369/500
135/135 - 0s - loss: 4.0072e-04
Epoch 370/500
135/135 - 0s - loss: 3.9822e-04
Epoch 371/500
135/135 - 0s - loss: 4.1800e-04
Epoch 372/500
135/135 - 0s - loss: 4.1104e-04
Epoch 373/500
135/135 - 0s - loss: 4.0816e-04
Epoch 374/500
135/135 - 0s - loss: 4.0079e-04
Epoch 375/500
135/135 - 0s - loss: 4.0985e-04
Epoch 376/500
135/135 - 0s - loss: 4.1442e-04
Epoch 377/500
135/135 - 0s - loss: 4.0749e-04
Epoch 378/500
135/135 - 0s - loss: 3.9887e-04
Epoch 379/500
135/135 - 0s - loss: 3.9722e-04
Epoch 380/500
135/135 - 0s - loss: 3.8908e-04
Epoch 381/500
135/135 - 0s - loss: 3.9115e-04
Epoch 382/500
135/135 - 0s - loss:

<tensorflow.python.keras.callbacks.History at 0x15209f38f08>

In [None]:
scaler_new = MinMaxScaler(feature_range=(0, 1))
scaler_new.min_, scaler_new.scale_ = scaler.min_[0], scaler.scale_[0]

In [None]:
X_not_scaled

In [55]:
y_train_scaled_pred = model.predict(X_train_scaled)
y_test_scaled_pred = model.predict(X_test_scaled)

In [56]:
# invert predictions
y_train_pred = np.round(scaler.inverse_transform(y_train_scaled_pred),0).astype(int)
y_test_pred = np.round(scaler.inverse_transform(y_test_scaled_pred),0).astype(int)

In [59]:
y_test_not_scaled

array([[2675, 2884, 3497, 3372, 3565, 3833, 3130],
       [2884, 3497, 3372, 3565, 3833, 3130, 2671],
       [3497, 3372, 3565, 3833, 3130, 2671, 3627],
       [3372, 3565, 3833, 3130, 2671, 3627, 4027],
       [3565, 3833, 3130, 2671, 3627, 4027, 4069],
       [3833, 3130, 2671, 3627, 4027, 4069, 4633],
       [3130, 2671, 3627, 4027, 4069, 4633, 4661],
       [2671, 3627, 4027, 4069, 4633, 4661, 4140],
       [3627, 4027, 4069, 4633, 4661, 4140, 3774],
       [4027, 4069, 4633, 4661, 4140, 3774, 4348],
       [4069, 4633, 4661, 4140, 3774, 4348, 4753],
       [4633, 4661, 4140, 3774, 4348, 4753, 5397],
       [4661, 4140, 3774, 4348, 4753, 5397, 5804],
       [4140, 3774, 4348, 4753, 5397, 5804, 5728],
       [3774, 4348, 4753, 5397, 5804, 5728, 4768],
       [4348, 4753, 5397, 5804, 5728, 4768, 4420],
       [4753, 5397, 5804, 5728, 4768, 4420, 5133],
       [5397, 5804, 5728, 4768, 4420, 5133, 5590],
       [5804, 5728, 4768, 4420, 5133, 5590, 5062],
       [5728, 4768, 4420, 5133,

In [60]:
y_test_pred

array([[2951, 3180, 3488, 3728, 3654, 3427, 3268],
       [3080, 3364, 3628, 3659, 3467, 3219, 3187],
       [3313, 3575, 3586, 3429, 3236, 3196, 3388],
       [3761, 3681, 3473, 3194, 3132, 3386, 3778],
       [3431, 3255, 3025, 3021, 3385, 3744, 3976],
       [3449, 3124, 2974, 3262, 3698, 3889, 3902],
       [3350, 3103, 3271, 3657, 3862, 3924, 3937],
       [3049, 3197, 3527, 3830, 3928, 3788, 3626],
       [3145, 3468, 3693, 3899, 3830, 3475, 3266],
       [3818, 3907, 4024, 3869, 3451, 3379, 3689],
       [3942, 4000, 3884, 3461, 3332, 3773, 4288],
       [4055, 3848, 3368, 3264, 3750, 4264, 4417],
       [4058, 3438, 3232, 3720, 4277, 4527, 4511],
       [3605, 3265, 3659, 4294, 4544, 4597, 4551],
       [3605, 3848, 4255, 4615, 4602, 4455, 4203],
       [4043, 4325, 4580, 4769, 4524, 4138, 3880],
       [4407, 4547, 4803, 4706, 4218, 4104, 4355],
       [4712, 4867, 4746, 4302, 4008, 4371, 4831],
       [5112, 4812, 4300, 4050, 4281, 4876, 5204],
       [4881, 4219, 3907, 4309,

In [66]:

# calculate root mean squared error
trainScore = loss_func(y_train_not_scaled, y_train_pred)
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = loss_func(y_test_not_scaled, y_test_pred)
#print('Test Score: %.2f RMSE' % (testScore))

In [67]:
trainScore

3462495.428571428

In [47]:
np.mean((y_test_not_scaled-y_test_pred)**2, axis=1)

array([  79650.28571429,   82453.85714286,   78233.71428571,
         92094.28571429,  104998.71428571,  208600.14285714,
        341803.57142857,  453246.71428571,  485988.        ,
        451426.71428571,  509304.42857143,  750945.14285714,
        995825.71428571, 1273900.85714286, 1317993.28571429,
       1188013.85714286, 1020766.28571429, 1156155.42857143,
       1013047.85714286, 1029546.28571429, 1323846.85714286,
       1494791.14285714, 1251739.28571429,  967458.        ,
       1508804.85714286, 2017077.57142857, 2844207.28571429,
       3824328.14285714, 4096350.28571429, 3450610.42857143,
       3379417.        , 3933910.14285714, 4551678.14285714,
       5289930.85714286])