In [1]:
import warnings
warnings.filterwarnings("ignore")

# data ingestion and data transformation
from src.dataset.data_ingestion import DataIngestion
from src.dataset.data_transformation import DataTransformation

# pandas and numpy
import pandas as pd
import numpy as np

# sklearn metrics and MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# torch
import torch
import torch.nn as nn

# device
device = "cuda"

# import helpers
from helpers.sliding_window import sliding_window, convert_array_to_tensor



# lstm
from src.models.lstm import LSTM

# plots through matplotlib
import matplotlib.pyplot as plt


# load in data
SERIES_ID = "DEXUSEU"
# batch size and window_size
BATCH_SIZE = 64
WINDOW_SIZE = 14

data = DataIngestion(SERIES_ID).fetch_data()
data.head(10)






Unnamed: 0,DEXUSEU,date
0,1.1812,1999-01-04
1,1.176,1999-01-05
2,1.1636,1999-01-06
3,1.1672,1999-01-07
4,1.1554,1999-01-08
5,1.1534,1999-01-11
6,1.1548,1999-01-12
7,1.1698,1999-01-13
8,1.1689,1999-01-14
9,1.1591,1999-01-15


- Data Transformation

In [2]:
# from data transformation, transform into scaled training and testing split
train, test = DataTransformation(SERIES_ID).split_data()

2025-12-31 17:50:26 - INFO - tools.logger - 43 - Train Size: 3030
2025-12-31 17:50:26 - INFO - tools.logger - 55 - Shape of scaled training data: (3030, 1)
2025-12-31 17:50:26 - INFO - tools.logger - 56 - Shape of scaled testing data: (758, 1)


In [3]:
# apply sliding window
X_train, y_train = sliding_window(train, WINDOW_SIZE)
X_test, y_test = sliding_window(test, WINDOW_SIZE)

X_train = convert_array_to_tensor(X_train)
X_test = convert_array_to_tensor(X_test)
y_train = convert_array_to_tensor(y_train)
y_test = convert_array_to_tensor(y_test)



In [4]:
# gru
model = LSTM(input_size=1, hidden_size=512, num_layers=2, output_size=1)
learning_rate = 0.001
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()

In [5]:
for epoch in range(epochs):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred.float(),y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 1 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred,y_train))
        y_pred_test =  model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred_test,y_test))
        print(f'Epoch: {epoch}; train_rmse: {train_rmse:.4}; Test rmse: {test_rmse:.4}')


Epoch: 0; train_rmse: 0.433; Test rmse: 0.2568
Epoch: 1; train_rmse: 0.2884; Test rmse: 0.09859
Epoch: 2; train_rmse: 0.3023; Test rmse: 0.3329
Epoch: 3; train_rmse: 0.2222; Test rmse: 0.1925
Epoch: 4; train_rmse: 0.2211; Test rmse: 0.09097
Epoch: 5; train_rmse: 0.24; Test rmse: 0.07191
Epoch: 6; train_rmse: 0.2403; Test rmse: 0.07122
Epoch: 7; train_rmse: 0.2226; Test rmse: 0.06991
Epoch: 8; train_rmse: 0.1952; Test rmse: 0.0901
Epoch: 9; train_rmse: 0.1803; Test rmse: 0.1466
Epoch: 10; train_rmse: 0.1975; Test rmse: 0.208
Epoch: 11; train_rmse: 0.1889; Test rmse: 0.2012
Epoch: 12; train_rmse: 0.1534; Test rmse: 0.1383
Epoch: 13; train_rmse: 0.1432; Test rmse: 0.07809
Epoch: 14; train_rmse: 0.1472; Test rmse: 0.05074
Epoch: 15; train_rmse: 0.1337; Test rmse: 0.04423
Epoch: 16; train_rmse: 0.08645; Test rmse: 0.03872
Epoch: 17; train_rmse: 0.08273; Test rmse: 0.06952
Epoch: 18; train_rmse: 0.09317; Test rmse: 0.05186
Epoch: 19; train_rmse: 0.0506; Test rmse: 0.05454
Epoch: 20; train_rm

In [6]:
# load in MinMaxScaler
scaler = MinMaxScaler()
training = data.iloc[:, 0:1].values
scaler.fit(training)

# training length
train_length = int(len(training) * 0.80)



# torch undue scaler from y_pred and y_test

with torch.no_grad():
    y_pred = model(X_test)
    y_pred_np = y_pred.cpu()
    y_test_np = y_test.cpu()
    pred_rescaled = scaler.inverse_transform(y_pred_np)
    actual_rescaled = scaler.inverse_transform(y_test_np)





In [7]:
# r2-score
r2 = r2_score(actual_rescaled,pred_rescaled)
print(f"R2 Score: {r2*100:.2f}")

# mean-square error
mse = mean_squared_error(actual_rescaled,pred_rescaled)
print(f"Mean Squared Error: {mse:.4}")


# mean absolute percentage error
mape = mean_absolute_percentage_error(actual_rescaled,pred_rescaled)
print(f"Mean Absolute Percentage Error: {mape:.4f}")


# mean absolute error
mae = mean_absolute_error(actual_rescaled,pred_rescaled)
print(f"Mean Absolute Error: {mae}")

R2 Score: 92.90
Mean Squared Error: 0.0002735
Mean Absolute Percentage Error: 0.0116
Mean Absolute Error: 0.012878069414847315


In [8]:
# actual compared to predicted spot rate

test_dates = data.iloc[train_length + WINDOW_SIZE:]['date'].reset_index(drop=True)

comparison_df = pd.DataFrame({
    "date": test_dates,
    "Actual Spot Rate": actual_rescaled.flatten(),
    "Predicted Spot Rate": pred_rescaled.flatten()
})

print(comparison_df.head(20))
print(comparison_df.tail(20))

         date  Actual Spot Rate  Predicted Spot Rate
0  2016-07-12            1.1074             1.109992
1  2016-07-14            1.1109             1.108923
2  2016-07-15            1.1059             1.108649
3  2016-07-20            1.1007             1.108445
4  2016-07-22            1.0968             1.107634
5  2016-07-25            1.0980             1.106199
6  2016-07-27            1.0988             1.104517
7  2016-07-28            1.1094             1.103429
8  2016-07-29            1.1168             1.103316
9  2016-08-01            1.1176             1.104540
10 2016-08-02            1.1225             1.106463
11 2016-08-04            1.1134             1.108951
12 2016-08-10            1.1171             1.110842
13 2016-08-16            1.1277             1.112356
14 2016-08-18            1.1334             1.114376
15 2016-08-19            1.1326             1.117113
16 2016-08-22            1.1314             1.120012
17 2016-08-26            1.1237             1.