In [1]:
import warnings
warnings.filterwarnings("ignore")

# data ingestion and data transformation
from src.dataset.data_ingestion import DataIngestion
from src.dataset.data_transformation import DataTransformation

# pandas and numpy
import pandas as pd
import numpy as np

# sklearn metrics and MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# torch
import torch
import torch.nn as nn

# device
device = "cuda"

# import helpers
from helpers.sliding_window import sliding_window, convert_array_to_tensor



# config for lstm


# gru
from src.models.gru_model import GRUModel
from src.models.lstm import LSTM

# plots through matplotlib
import matplotlib.pyplot as plt


# load in data
SERIES_ID = "DEXUSEU"
# batch size and window_size
BATCH_SIZE = 64
WINDOW_SIZE = 14

data = DataIngestion(SERIES_ID).fetch_data()
data.head(10)






Unnamed: 0,DEXUSEU,date
0,1.1812,1999-01-04
1,1.176,1999-01-05
2,1.1636,1999-01-06
3,1.1672,1999-01-07
4,1.1554,1999-01-08
5,1.1534,1999-01-11
6,1.1548,1999-01-12
7,1.1698,1999-01-13
8,1.1689,1999-01-14
9,1.1591,1999-01-15


- Data Transformation

In [2]:
# from data transformation, transform into scaled training and testing split
train, test = DataTransformation(SERIES_ID).split_data()

2025-12-31 14:36:25 - INFO - tools.logger - 43 - Train Size: 3333
2025-12-31 14:36:25 - INFO - tools.logger - 55 - Shape of scaled training data: (3333, 1)
2025-12-31 14:36:25 - INFO - tools.logger - 56 - Shape of scaled testing data: (455, 1)


In [3]:
# apply sliding window
X_train, y_train = sliding_window(train, WINDOW_SIZE)
X_test, y_test = sliding_window(test, WINDOW_SIZE)

X_train = convert_array_to_tensor(X_train)
X_test = convert_array_to_tensor(X_test)
y_train = convert_array_to_tensor(y_train)
y_test = convert_array_to_tensor(y_test)



In [4]:
# gru
model = LSTM(input_size=1, hidden_size=512, num_layers=1, output_size=1)
learning_rate = 0.001
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()

In [5]:
for epoch in range(epochs):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred.float(),y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 1 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred,y_train))
        y_pred_test =  model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred_test,y_test))
        print(f'Epoch: {epoch}; train_rmse: {train_rmse:.4}; Test rmse: {test_rmse:.4}')


Epoch: 0; train_rmse: 0.4996; Test rmse: 0.3247
Epoch: 1; train_rmse: 0.4209; Test rmse: 0.2437
Epoch: 2; train_rmse: 0.3201; Test rmse: 0.139
Epoch: 3; train_rmse: 0.199; Test rmse: 0.1114
Epoch: 4; train_rmse: 0.4687; Test rmse: 0.5292
Epoch: 5; train_rmse: 0.2203; Test rmse: 0.2377
Epoch: 6; train_rmse: 0.2056; Test rmse: 0.09135
Epoch: 7; train_rmse: 0.2527; Test rmse: 0.08022
Epoch: 8; train_rmse: 0.2858; Test rmse: 0.1073
Epoch: 9; train_rmse: 0.3045; Test rmse: 0.1261
Epoch: 10; train_rmse: 0.3133; Test rmse: 0.1356
Epoch: 11; train_rmse: 0.315; Test rmse: 0.138
Epoch: 12; train_rmse: 0.3113; Test rmse: 0.1348
Epoch: 13; train_rmse: 0.3029; Test rmse: 0.127
Epoch: 14; train_rmse: 0.2901; Test rmse: 0.1151
Epoch: 15; train_rmse: 0.2731; Test rmse: 0.09977
Epoch: 16; train_rmse: 0.2519; Test rmse: 0.08282
Epoch: 17; train_rmse: 0.2269; Test rmse: 0.07074
Epoch: 18; train_rmse: 0.2006; Test rmse: 0.07897
Epoch: 19; train_rmse: 0.1809; Test rmse: 0.117
Epoch: 20; train_rmse: 0.1841;

In [6]:
# load in MinMaxScaler
scaler = MinMaxScaler()
training = data.iloc[:, 0:1].values
scaler.fit(training)

# training length
train_length = int(len(training) * 0.88)



# torch undue scaler from y_pred and y_test

with torch.no_grad():
    y_pred = model(X_test)
    y_pred_np = y_pred.cpu()
    y_test_np = y_test.cpu()
    pred_rescaled = scaler.inverse_transform(y_pred_np)
    actual_rescaled = scaler.inverse_transform(y_test_np)





In [7]:
# r2-score
r2 = r2_score(actual_rescaled,pred_rescaled)
print(f"R2 Score: {r2*100:.2f}")

# mean-square error
mse = mean_squared_error(actual_rescaled,pred_rescaled)
print(f"Mean Squared Error: {mse:.4}")


# mean absolute percentage error
mape = mean_absolute_percentage_error(actual_rescaled,pred_rescaled)
print(f"Mean Absolute Percentage Error: {mape:.4f}")


# mean absolute error
mae = mean_absolute_error(actual_rescaled,pred_rescaled)
print(f"Mean Absolute Error: {mae}")

R2 Score: 91.02
Mean Squared Error: 0.0003745
Mean Absolute Percentage Error: 0.0138
Mean Absolute Error: 0.015142834313669981


In [8]:
# actual compared to predicted spot rate

test_dates = data.iloc[train_length + WINDOW_SIZE:]['date'].reset_index(drop=True)

comparison_df = pd.DataFrame({
    "date": test_dates,
    "Actual Spot Rate": actual_rescaled.flatten(),
    "Predicted Spot Rate": pred_rescaled.flatten()
})

print(comparison_df.head(20))
print(comparison_df.tail(20))

         date  Actual Spot Rate  Predicted Spot Rate
0  2019-09-26            1.0938             1.105866
1  2019-09-27            1.0942             1.104107
2  2019-09-30            1.0905             1.102412
3  2019-10-02            1.0951             1.100682
4  2019-10-04            1.0974             1.099436
5  2019-10-15            1.1036             1.098639
6  2019-10-17            1.1129             1.098563
7  2019-10-18            1.1155             1.099535
8  2019-10-23            1.1118             1.101091
9  2019-10-25            1.1081             1.102697
10 2019-10-30            1.1123             1.103697
11 2019-11-01            1.1169             1.104906
12 2019-11-08            1.1019             1.106283
13 2019-11-12            1.1017             1.106504
14 2019-11-21            1.1067             1.106368
15 2019-11-22            1.1029             1.106521
16 2019-11-25            1.1009             1.106383
17 2019-11-26            1.1012             1.