In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Define Functions
class Time_Series_Dataset(Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = self.inputs[idx]
        y = self.outputs[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

def split_series(series, input_size, output_size, train_ratio, seed):
    # 1. split univariate series to input (X) and output (y)
    X, y = [], []
    for i in range(len(series) - input_size - output_size + 1):
        X.append(series[i:i + input_size])                            # X = input e.g. [10, 20, 30]
        y.append(series[i + input_size:i + input_size + output_size]) # y = output e.g. [40, 50]
    # 2. shuffle batches and split into train/test
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size = train_ratio, random_state=seed)
    
    return X_train, X_test, Y_train, Y_test

# Quantile loss function
def quantile_loss(preds, target, quantiles):
    losses = []
    for i, quantile in enumerate(quantiles):
        errors = targets[:, :, i] - preds[:, :, i]
        losses.append(torch.mean(torch.max((quantile - 1) * errors, quantile * errors)))
    return torch.mean(torch.stack(losses))

def evaluate_model(model, dataloader, quantiles):
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_targets = []
    
    with torch.no_grad():  # Disable gradient calculation
        for inputs, targets in dataloader:
            inputs = inputs.unsqueeze(-1)  # Adjust dimensions if necessary
            targets = targets.unsqueeze(-1).expand(-1, -1, len(quantiles))  # Reshape targets
            outputs = model(inputs)  # Forward pass
            all_preds.append(outputs)
            all_targets.append(targets)
    
    all_preds = torch.cat(all_preds, dim=0)
    all_targets = torch.cat(all_targets, dim=0)
    
    return all_preds, all_targets

pm = "\u00B1"

class BDLSTMClassic(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BDLSTMClassic, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        h, _ = self.lstm(x)
        out = self.fc(h[:, -1, :])
        return out

# Load and preprocess the data
Bitcoin = pd.read_csv('data/coin_Bitcoin.csv')
Close_Price = Bitcoin.iloc[:, 7].copy()
Close_Price_reshaped = np.array(Close_Price).reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
Close_Price_scaled = scaler.fit_transform(Close_Price_reshaped).flatten()

input_size = 1
hidden_size = 50
num_layers = 2
output_size = 5
train_ratio = 0.8
seed = 5925
num_experiments = 30

# Your split_series and Time_Series_Dataset definitions remain the same
rmse_train, mae_train, mape_train = [], [], []
rmse_steps_train = [[] for _ in range(output_size)]
mae_steps_train = [[] for _ in range(output_size)]
mape_steps_train = [[] for _ in range(output_size)]

rmse_test, mae_test, mape_test = [], [], []
rmse_steps_test = [[] for _ in range(output_size)]
mae_steps_test = [[] for _ in range(output_size)]
mape_steps_test = [[] for _ in range(output_size)]

for exp in range(num_experiments):
    X_train, X_test, y_train, y_test = split_series(Close_Price_scaled, input_size, output_size, train_ratio, seed)
    train_dataset = Time_Series_Dataset(X_train, y_train)
    test_dataset = Time_Series_Dataset(X_test, y_test)
    train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    model = BDLSTMClassic(input_size, hidden_size, num_layers, output_size)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
    # Training loop
    num_epochs = 100
    for epoch in range(1, num_epochs + 1):
        model.train()
        for inputs, targets in train_dataloader:
            inputs = inputs.unsqueeze(-1).float()
            targets = targets.float()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    # Evaluate the model on the train set
    model.eval()
    y_pred_train = []
    y_train_list = []
    
    with torch.no_grad():
        for inputs, targets in train_dataloader:
            inputs = inputs.unsqueeze(-1).float()
            outputs = model(inputs)
            y_pred_train.append(outputs.numpy())
            y_train_list.append(targets.numpy())
    
    y_pred_train = np.concatenate(y_pred_train, axis=0)
    y_train_actual = np.concatenate(y_train_list, axis=0)

    mse_train = mean_squared_error(y_train_actual, y_pred_train)
    rmse_train.append(np.sqrt(mse_train))
    
    for step in range(output_size):
        mse_step_train = mean_squared_error(y_pred_train[:, step], y_train_actual[:, step])
        rmse_steps_train[step].append(np.sqrt(mse_step_train))

    predicted_values_train = scaler.inverse_transform(y_pred_train)
    actual_values_train = scaler.inverse_transform(y_train_actual)
    
    mae_train.append(mean_absolute_error(actual_values_train, predicted_values_train))
    mape_train.append(mean_absolute_percentage_error(actual_values_train, predicted_values_train))
    
    actual_values_steps_train = list(zip(*actual_values_train))
    predicted_values_steps_train = list(zip(*predicted_values_train))
    
    for step in range(output_size):
        mae_steps_train[step].append(mean_absolute_error(actual_values_steps_train[step], predicted_values_steps_train[step]))
        mape_steps_train[step].append(mean_absolute_percentage_error(actual_values_steps_train[step], predicted_values_steps_train[step]))

    # Evaluate the model on the test set
    model.eval()
    y_pred_test = []
    y_test_list = []
    
    with torch.no_grad():
        for inputs, targets in test_dataloader:
            inputs = inputs.unsqueeze(-1).float()
            outputs = model(inputs)
            y_pred_test.append(outputs.numpy())
            y_test_list.append(targets.numpy())
    
    y_pred_test = np.concatenate(y_pred_test, axis=0)
    y_test_actual = np.concatenate(y_test_list, axis=0)

    mse_test = mean_squared_error(y_test_actual, y_pred_test)
    rmse_test.append(np.sqrt(mse_test))
    
    for step in range(output_size):
        mse_step_test = mean_squared_error(y_pred_test[:, step], y_test_actual[:, step])
        rmse_steps_test[step].append(np.sqrt(mse_step_test))

    predicted_values_test = scaler.inverse_transform(y_pred_test)
    actual_values_test = scaler.inverse_transform(y_test_actual)
    
    mae_test.append(mean_absolute_error(actual_values_test, predicted_values_test))
    mape_test.append(mean_absolute_percentage_error(actual_values_test, predicted_values_test))
    
    actual_values_steps_test = list(zip(*actual_values_test))
    predicted_values_steps_test = list(zip(*predicted_values_test))
    
    for step in range(output_size):
        mae_steps_test[step].append(mean_absolute_error(actual_values_steps_test[step], predicted_values_steps_test[step]))
        mape_steps_test[step].append(mean_absolute_percentage_error(actual_values_steps_test[step], predicted_values_steps_test[step]))

    print(f"Experiment {exp+1}/{num_experiments} done")
    seed += 1

print(f"BDLSTM Classic Train: After {num_experiments} experimental runs, here are the results:")
print(f"Across {output_size} predictive time steps on train data, " +
      f"Avg RMSE: {np.mean(rmse_train):.4f} ± {np.std(rmse_train):.4f}, " +
      f"Avg MAE: {np.mean(mae_train):.2f} ± {np.std(mae_train):.2f}, " +
      f"Avg MAPE: {np.mean(mape_train)*100:.3f}% ± {np.std(mape_train)*100:.3f}%")
for step in range(output_size):
    print(
        f"At time step {step + 1} on train data, "
        f"Avg RMSE: {np.mean(rmse_steps_train[step]):.4f} ± {np.std(rmse_steps_train[step]):.4f}, "
        f"Avg MAE: {np.mean(mae_steps_train[step]):.2f} ± {np.std(mae_steps_train[step]):.2f}, "
        f"Avg MAPE: {np.mean(mape_steps_train[step]) * 100:.3f}% ± {np.std(mape_steps_train[step]) * 100:.3f}%"
    )

print(f"BDLSTM Classic Test: After {num_experiments} experimental runs, here are the results:")
print(f"Across {output_size} predictive time steps on test data, " +
      f"Avg RMSE: {np.mean(rmse_test):.4f} ± {np.std(rmse_test):.4f}, " +
      f"Avg MAE: {np.mean(mae_test):.2f} ± {np.std(mae_test):.2f}, " +
      f"Avg MAPE: {np.mean(mape_test)*100:.3f}% ± {np.std(mape_test)*100:.3f}%")
for step in range(output_size):
    print(
        f"At time step {step + 1} on test data, "
        f"Avg RMSE: {np.mean(rmse_steps_test[step]):.4f} ± {np.std(rmse_steps_test[step]):.4f}, "
        f"Avg MAE: {np.mean(mae_steps_test[step]):.2f} ± {np.std(mae_steps_test[step]):.2f}, "
        f"Avg MAPE: {np.mean(mape_steps_test[step]) * 100:.3f}% ± {np.std(mape_steps_test[step]) * 100:.3f}%"
    )


Experiment 1/30 done
Experiment 2/30 done
Experiment 3/30 done
Experiment 4/30 done
Experiment 5/30 done
Experiment 6/30 done
Experiment 7/30 done
Experiment 8/30 done
Experiment 9/30 done
Experiment 10/30 done
Experiment 11/30 done
Experiment 12/30 done
Experiment 13/30 done
Experiment 14/30 done
Experiment 15/30 done
Experiment 16/30 done
Experiment 17/30 done
Experiment 18/30 done
Experiment 19/30 done
Experiment 20/30 done
Experiment 21/30 done
Experiment 22/30 done
Experiment 23/30 done
Experiment 24/30 done
Experiment 25/30 done
Experiment 26/30 done
Experiment 27/30 done
Experiment 28/30 done
Experiment 29/30 done
Experiment 30/30 done
BDLSTM Classic Train: After 30 experimental runs, here are the results:
Across 5 predictive time steps on train data, Avg RMSE: 0.0159 ± 0.0004, Avg MAE: 439.17 ± 58.01, Avg MAPE: 23.473% ± 14.421%
At time step 1 on train data, Avg RMSE: 0.0097 ± 0.0006, Avg MAE: 298.66 ± 77.28, Avg MAPE: 24.150% ± 16.871%
At time step 2 on train data, Avg RMSE: 0

In [4]:
pd.DataFrame(actual_values_test).to_csv('bitcoin_uni_classic_bdlstm_actual.csv')
pd.DataFrame(predicted_values_test).to_csv('bitcoin_uni_classic_bdlstm_pred.csv')
pd.DataFrame(rmse_steps_test).transpose().to_csv('bitcoin_uni_classic_bdlstm_rmse.csv')
pd.DataFrame(mae_steps_test).transpose().to_csv('bitcoin_uni_classic_bdlstm_mae.csv')
pd.DataFrame(mape_steps_test).transpose().to_csv('bitcoin_uni_classic_bdlstm_mape.csv')