In [None]:
## importing required libraries, we will go on importing in our notebook code as we require

import pandas as pd
from sklearn.model_selection import train_test_split
!pip install tensorflow
!pip install keras
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA

from numpy import concatenate

In [None]:
# defining a function

def lstm_lag(n):
    
    # reading data
    data = pd.read_csv('converted.csv')
    # shifting by specified lag
    data['Inflation (Can)'] = data['Inflation (Can)'].shift(-1*n)
    # other cleaning
    data = data.head(84*30)
    data.drop(['Period'],axis=1,inplace=True)

    # scaling the data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)
    y = scaled_data[:,0]
    x = scaled_data[:,1:]
    
    # applying PCA to reduce number of features
    from sklearn.decomposition import PCA
    pca = PCA(n_components=5)
    scaled_data = pca.fit_transform(x)

    # splitting data into train and test sets
    train_X = scaled_data[:72*30]
    train_y = y[:72*30]
    test_X = scaled_data[72*30:84*30]
    test_y = y[72*30:84*30]

    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))

    # training lstm with a network and other parameters
    model = Sequential()
    model.add(LSTM(128,activation='relu', input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))
    model.add(LSTM(128,return_sequences=True))
    model.add(LSTM(64,return_sequences=True))
    model.add(LSTM(16,return_sequences=True))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    history = model.fit(train_X, train_y, epochs=10, batch_size=30, validation_data=(test_X, test_y), verbose=0, shuffle=False)
    
    # plotting prediction results
    yhat = model.predict(test_X)
    # invert pca for forecast
    test_X1 = pca.inverse_transform(test_X.reshape((test_X.shape[0], test_X.shape[2])))
    # invert scaling for forecast
    inv_yhat = concatenate((yhat.reshape(12*30,1), test_X1), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,0]
    a = inv_yhat
    # invert scaling for actual
    test_y1 = test_y.reshape((len(test_y), 1))
    inv_y = concatenate((test_y1, test_X1), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:,0]
    b = inv_y

    pyplot.plot(inv_yhat, label='predicted')
    pyplot.plot(inv_y, label='actual')
    pyplot.legend()
    pyplot.title(str(n)+'days lag')
    pyplot.savefig('lstm'+str(n)+'.png')
    pyplot.show()
    
    # printing mse and mae
    print('mse = ',mean_squared_error(a,b))
    print('mae = ',mean_absolute_error(a,b))
    
    return

In [None]:
# change the number in argument from 0, 30, 60, 90 representing lag in number of days
lstm_lag(90)

# by running this cell, lstm model will be trained on data with specified lag
# it will give output the mse and mae
# it will show the graph of predictions and save that graph

In [None]:
# similar function for svm

def svm_lag(n):
    data = pd.read_csv('raw_data.csv')
    data['Inflation (Can)'] = data['Inflation (Can)'].shift(-1*n)
    data = data.head(84)
    data.drop(['Period'],axis=1,inplace=True)

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)
    y = scaled_data[:,0]
    x = scaled_data[:,1:]
    from sklearn.decomposition import PCA
    pca = PCA(n_components=5)
    scaled_data = pca.fit_transform(x)

    train_X = scaled_data[:72]
    train_y = y[:72]
    test_X = scaled_data[72:84]
    test_y = y[72:84]

    para = {
    'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
    'tol' : [0.0001,0.0005,0.001,0.01,0.1],
    'C' : [0.1,0.5,1,2]
    }
    grid_ser = GridSearchCV(SVR(),scoring = 'neg_mean_squared_error',param_grid=para ,n_jobs =1,cv = 4,verbose=0)
    grid_ser.fit(train_X,train_y)

    a = scaler.inverse_transform(
    concatenate(
    (grid_ser.best_estimator_.predict(test_X).reshape(12,-1),pca.inverse_transform(test_X)),
     axis=1
    )
    )[:,0]

    b = scaler.inverse_transform(
    concatenate(
    (test_y.reshape(12,-1),pca.inverse_transform(test_X)),
     axis=1
    )
    )[:,0]

    pyplot.plot(a, label='predicted')
    pyplot.plot(b, label='actual')
    pyplot.legend()
    pyplot.savefig('svm'+str(n)+'.png')
    pyplot.show()
    
    print('mse = ',mean_squared_error(a,b))
    print('mae = ',mean_absolute_error(a,b))
    
    return

In [None]:
# change the number in argument from 0, 1, 2, 3 representing lag in number of months

svm_lag(3)

# a similar function as of lstm with same attributes