In [1]:
import rpy2
import rpy2.robjects
from rpy2.robjects.packages import importr

In [2]:
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

In [3]:
import numpy as np
import pandas as pd
import torch
from Models import LSTM
from sklearn.preprocessing import MinMaxScaler


import plotly.express as px
import plotly.graph_objects as go

In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

cuda


In [5]:
filepath = "RELIANCE_2010-01-012021-08-30.csv"
data = pd.read_csv(filepath, usecols=[0,4], names=['date', 'close'], header=0)
data = data.sort_values('date')
data['date'] = pd.to_datetime(data['date'])
data.head()

Unnamed: 0,date,close
0,2010-01-04,1075.5
1,2010-01-05,1070.7
2,2010-01-06,1088.0
3,2010-01-07,1106.05
4,2010-01-08,1103.15


In [6]:
data.tail()

Unnamed: 0,date,close
2890,2021-08-23,2162.35
2891,2021-08-24,2183.7
2892,2021-08-25,2202.6
2893,2021-08-26,2230.45
2894,2021-08-27,2227.4


In [10]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['date'], y=data['close'], mode='lines', name='closing price'))

fig.update_xaxes(range=["2009-11-01", "2021-11-01"])
fig.update_yaxes(range=[500, 2500])

In [11]:
TsAD = importr('otsad')



In [12]:
train_len = 0.8*len(data)

In [13]:
res = TsAD.OcpTsSdEwma(data['close'].values, train_len, 0.01, 3, 10 )

In [14]:
df = pd.DataFrame(res)

In [15]:
df.head()

Unnamed: 0,is.anomaly,lcl,ucl
0,0.0,1075.5,1075.5
1,0.0,1070.7,1070.7
2,0.0,1088.0,1088.0
3,0.0,1106.05,1106.05
4,0.0,1103.15,1103.15


In [16]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['close'], mode='lines', name='close price'))
fig.add_trace(go.Scatter(x=df.index, y=df['lcl'], mode='lines', name='lower control limit'))
fig.add_trace(go.Scatter(x=df.index, y=df['ucl'], mode='lines', name='upper control limit'))
# fig.add_trace(go.Scatter(x=df.index[2316:2416], y=preds, mode='lines', name='predictions'))


# fig.update_xaxes(range=["2009-11-01", "2021-11-01"])
fig.update_yaxes(range=[500, 2500])

In [17]:
anomaly_indices = df[df['is.anomaly'] == 1.0].index

In [18]:
rows = data.iloc[anomaly_indices, :]

In [19]:
rows


Unnamed: 0,date,close
2388,2019-08-13,1274.75
2413,2019-09-20,1254.35
2529,2020-03-09,1114.15
2531,2020-03-12,1063.0
2533,2020-03-16,1015.7
2540,2020-03-25,1082.25
2547,2020-04-07,1206.1
2556,2020-04-22,1363.6
2655,2020-09-10,2314.0
2831,2021-05-28,2094.8


In [20]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['date'], y=data['close'], mode='lines', name='lines'))
fig.add_trace(go.Scatter(x=rows['date'], y=rows['close'], mode='markers', name='markers'))

fig.update_xaxes(range=["2009-11-01", "2021-11-01"])
fig.update_yaxes(range=[500, 2500])

In [21]:
def splitData(ts):

    test_set_size = int(np.round(0.2*len(ts)));
    train_set = ts[:-test_set_size]    
    test_set = ts[-test_set_size:]

    return train_set, test_set

In [22]:
#Normalize data
scaler = MinMaxScaler(feature_range=(-1, 1))

In [23]:
# Normalize the training, validation and test set

def normalize_data(train_set, test_set):
    train_norm = scaler.fit_transform(train_set.reshape(-1, 1))
    test_norm = scaler.transform(test_set.reshape(-1, 1))
    
    return train_norm, test_norm

In [24]:
window_size = 20
def prepareDataForTraining(seq):

    x_data = []
    y_data = []
    L = len(seq)
    for i in range(L-window_size):        
        window = seq[i:i+window_size]
        label = seq[i+window_size:i+window_size+1]
        x_data.append(window)
        y_data.append(label)
    return x_data, y_data 

In [25]:
train_set, test_set = splitData(data['close'].values)
train_norm, test_norm = normalize_data(train_set, test_set)

x_train, y_train = prepareDataForTraining(train_norm)
x_test, y_test = prepareDataForTraining(test_norm)

x_train = np.asarray(x_train).reshape(-1, window_size, 1)
y_train = np.asarray(y_train).reshape(-1, 1)
x_test = np.asarray(x_test).reshape(-1, window_size, 1)
y_test = np.asarray(y_test).reshape(-1, 1)

print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)

x_train = torch.from_numpy(x_train).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)

x_train.shape =  (2296, 20, 1)
y_train.shape =  (2296, 1)
x_test.shape =  (559, 20, 1)
y_test.shape =  (559, 1)


In [26]:
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

hist = np.zeros(num_epochs)
lstm = []

for t in range(num_epochs):
    y_train_pred = model(x_train)

    loss = criterion(y_train_pred, y_train_lstm)
    hist[t] = loss.item()

    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
    


In [37]:
actual_test_len = len(test_set) - window_size

In [27]:
# make predictions
y_test_pred = model(x_test)

# invert predictions
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test_lstm.detach().numpy())

In [41]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index[-actual_test_len:], y=y_test[:,0], mode='lines', name='actual close price'))
# fig.add_trace(go.Scatter(x=data.index[-len(test_set):], y=data['close'][-actual_test_len:], mode='lines', name='close price'))
fig.add_trace(go.Scatter(x=df.index[-actual_test_len:], y=df['lcl'][-actual_test_len:], mode='lines', name='lower control limit'))
fig.add_trace(go.Scatter(x=df.index[-actual_test_len:], y=df['ucl'][-actual_test_len:], mode='lines', name='upper control limit'))
fig.add_trace(go.Scatter(x=df.index[-actual_test_len:], y= y_test_pred[:,0], mode='lines', name='predicted close price'))


fig.update_yaxes(range=[500, 2500])

In [32]:
data['close'][-len(test_set):]

2316    1392.80
2317    1405.05
2318    1408.85
2319    1384.90
2320    1343.50
         ...   
2890    2162.35
2891    2183.70
2892    2202.60
2893    2230.45
2894    2227.40
Name: close, Length: 579, dtype: float64

In [34]:
len(y_test[:,0])

559