In [19]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import *
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Load data from the text file
with open('./data/IBM.txt', 'r') as file:
    lines = file.readlines()
    data = []
    dates = []

    for line in lines[1:]:
        parts = line.strip().split(',')
        date = parts[0]
        if '1980-12-12' <= date <= '2022-07-22':
            dates.append(date)
            data.append(float(parts[4]))  # 'Close' column

# Create a DataFrame from the loaded data
df = pd.DataFrame({'Date': pd.to_datetime(dates), 'Value': data})

# Set the 'Date' column as the index
df.set_index('Date', inplace=True)

# Extract the values from the DataFrame
signal = df['Value'].values
signal = signal.reshape(-1, 1)

# Define the training and testing data
train_size = int(len(signal) * 0.8)
train_data = signal[:train_size, :]
test_data = signal[train_size:, :]

# normalization
scaler = MinMaxScaler()
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)

def create_groups(dataset, window_size_1, window_size_2, window_size_3, timeslice, step):
    X_data, y_data = [], []
    index = 0
    while index + (timeslice * window_size_3) < len(dataset):
        i = 0
        t1, t2, t3 = [], [], []
        l1, l2, l3 = [], [], []
        while i < timeslice* window_size_1:
            current_slice = dataset[index + i:index + i + window_size_1, 0]
            if not np.isnan(current_slice).all():
                t1.append(np.mean(current_slice))
                
            i = i + window_size_1
        l1.append(dataset[index +  timeslice*window_size_1])
        i = 0   
        while i < timeslice* window_size_2:
            current_slice = dataset[index + i:index + i + window_size_2, 0]
            if not np.isnan(current_slice).all():
                t2.append(np.mean(current_slice))
                
            i = i + window_size_2
        l2.append(dataset[index + timeslice*window_size_2])
        i = 0   
        while i < timeslice* window_size_3:
            current_slice = dataset[index + i:index + i + window_size_3, 0]
            if not np.isnan(current_slice).all():
                t3.append(np.mean(current_slice))
            
            i = i + window_size_3
        l3.append(dataset[index + timeslice*window_size_3])
        X_data.append(np.concatenate([t1, t2, t3]))
        y_data.append(np.concatenate([l3]))
        index = index +step

    return np.array(X_data), np.array(y_data)

window_size_1 = 1
window_size_2 = 3
window_size_3 = 9
timeslice = 4
step = 1
X_train, y_train = create_groups(train_data, window_size_1, window_size_2, window_size_3, timeslice, step)
X_test, y_test = create_groups(test_data, window_size_1, window_size_2, window_size_3, timeslice, step)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [20]:
y_train[0]

array([[0.03268669]])

In [21]:
# Define the LSTM model architecture
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=128, validation_data=(X_test, y_test), verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x291dbfb80>

In [22]:
# Make predictions
y_pred = model.predict(X_test)

y_test = y_test.reshape((y_test.shape[0], y_test.shape[1]))
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
msle = mean_squared_log_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

from sklearn.metrics import mean_squared_error, mean_absolute_error

# Assuming y_pred and y_test are NumPy arrays
# Note: For MAPE, make sure y_test does not contain zeros to avoid division by zero.

# RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# MAPE
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100


print('RMSE:', rmse)
print('MAPE:', mape)


print('MSE: ', mse)
print('MSLE: ', msle)
print('MAE: ', mae)
print('R-squared: ', r2)

RMSE: 5.829913366149171
MAPE: 3.1513523969415114
MSE:  33.987889856804756
MSLE:  0.0019353868755328366
MAE:  4.222430059186883
R-squared:  0.8922566906678941


# 1-3

RMSE: 5.829913366149171
MAPE: 3.1513523969415114
MSE:  33.987889856804756
MSLE:  0.0019353868755328366
MAE:  4.222430059186883
R-squared:  0.8922566906678941

# 1-3-9

RMSE: 10.743495846758746
MAPE: 6.150518474732841
MSE:  115.42270300932242
MSLE:  0.006470582817612666
MAE:  8.509658938709455
R-squared:  0.6053778770352263

