In [26]:
import numpy as np
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import time
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


In [27]:
import os
import sys

nb_dir_1 = os.path.split(os.getcwd())[0]
current_path = os.getcwd()
parent = os.path.dirname(current_path)
nb_dir_2 = os.path.split(parent)[0]

if nb_dir_1 not in sys.path:
    sys.path.append(nb_dir_1)

if nb_dir_2 not in sys.path:
    sys.path.append(nb_dir_2)

In [28]:
from core.models import LSTM
from core.time_series_clustering import KmeansClustering
from core.data_preparation import DataPreparation

Load and Prepare data to input to LSTM model

In [29]:
filepath = "../Data/BHARTIARTL_2010-01-012022-02-10.csv"
data = pd.read_csv(filepath, usecols=[0,4], names=['date', 'close'], header=0)
data = data.sort_values('date')
data['date'] = pd.to_datetime(data['date'])
data.head()

Unnamed: 0,date,close
0,2010-01-04,325.2
1,2010-01-05,330.35
2,2010-01-06,326.85
3,2010-01-07,329.4
4,2010-01-08,325.05


In [30]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['date'], y=data['close'], mode='lines', name='closing price'))

In [31]:
window_size = 20
prep = DataPreparation()
scaler = MinMaxScaler(feature_range=(-1, 1))

In [32]:
scaler, x_train, x_test, y_train_lstm, y_test_lstm = prep.normalize_and_prepare_data(data['close'].values, scaler)

x_train.shape =  (2386, 20, 1)
y_train.shape =  (2386, 1)
x_test.shape =  (582, 20, 1)
y_test.shape =  (582, 1)


In [33]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

cuda


In [34]:
model = torch.load('lstm_airtel.pt')
model.eval()

LSTM(
  (lstm): LSTM(1, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)

In [35]:
y_train_pred = model(x_train)

In [36]:
predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train_lstm.detach().numpy()))

In [37]:
# make predictions
y_test_pred = model(x_test)

# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train_lstm.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test_lstm.detach().numpy())

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
# lstm.append(trainScore)
# lstm.append(testScore)

Train Score: 8.24 RMSE
Test Score: 41.86 RMSE


In [38]:
train_gt = y_train[:,0]
train_preds = y_train_pred[:,0]

In [39]:
train_preds_df = pd.DataFrame({
    'Train GT': train_gt,
    'Train preds': train_preds
})

train_preds_df.head()

Unnamed: 0,Train GT,Train preds
0,307.649994,310.843445
1,309.0,308.395264
2,304.100006,307.82785
3,300.050018,306.336426
4,300.399994,303.671539


In [40]:
train_preds_df['diff'] = (train_preds_df['Train GT'] - train_preds_df['Train preds']).abs() 

In [41]:
train_preds_df.head()

Unnamed: 0,Train GT,Train preds,diff
0,307.649994,310.843445,3.193451
1,309.0,308.395264,0.604736
2,304.100006,307.82785,3.727844
3,300.050018,306.336426,6.286407
4,300.399994,303.671539,3.271545


In [42]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_preds_df.index, y=train_preds_df['Train GT'], mode='lines', name='training ground truth'))
fig.add_trace(go.Scatter(x=train_preds_df.index, y=train_preds_df['Train preds'], mode='lines', name='training preds'))
fig.add_trace(go.Scatter(x=train_preds_df.index, y=train_preds_df['diff'], mode='lines', name='training error'))

In [43]:
train_preds_df.to_csv('Airtel_ErrorDistribution.csv', index=False)

In [44]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_preds_df.index[1850:1900], y=train_preds_df['Train GT'][1850:1900], mode='lines', name='training ground truth'))
fig.add_trace(go.Scatter(x=train_preds_df.index[1850:1900], y=train_preds_df['Train preds'][1850:1900], mode='lines', name='training preds'))
fig.add_trace(go.Scatter(x=train_preds_df.index[1850:1900], y=train_preds_df['diff'][1850:1900], mode='lines', name='training error'))
