In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Define Classes
class Time_Series_Dataset(Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = self.inputs[idx]
        y = self.outputs[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# BD-LSTM Quantile Regression Model
class BDLSTM_Quantile(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_quantiles, num_steps_ahead):
        super(BDLSTM_Quantile, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_quantiles = num_quantiles
        self.num_steps_ahead = num_steps_ahead

        self.lstms = nn.ModuleList()
        self.lstms.append(nn.LSTM(input_size, hidden_size[0], batch_first=True, bidirectional=True))
        for i in range(1, num_layers):
            self.lstms.append(nn.LSTM(hidden_size[i-1] * 2, hidden_size[i], batch_first=True, bidirectional=True))
        
        # Define separate linear layers for each quantile
        self.fc = nn.ModuleList([nn.Linear(hidden_size[-1] * 2, self.num_steps_ahead) for _ in range(num_quantiles)])

    def forward(self, x):
        for lstm in self.lstms:
            x, _ = lstm(x)
        
        lstm_out = x[:, -1, :]  # Use the output of the last time step
        
        # Compute the outputs for each quantile
        quantile_outputs = [fc(lstm_out) for fc in self.fc]
        
        # Stack the quantile outputs
        output = torch.stack(quantile_outputs, dim=2)
        return output

# Define functions
def split_data(data, input_size, output_size, train_ratio, seed):
    X, y = [], [] 
    total_size = input_size + output_size
    for i in range(len(data) - total_size + 1):
        X.append(features[i:i + input_size])                            
        y.append(target[i + input_size:i + total_size])        
    # Shuffle batches and split into train/test
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=train_ratio, random_state=seed)
    
    return X_train, X_test, Y_train, Y_test

# Quantile loss function
def quantile_loss(preds, targets, quantiles):
    losses = []
    for i, quantile in enumerate(quantiles):
        errors = targets[:, :, i] - preds[:, :, i]
        losses.append(torch.mean(torch.max((quantile - 1) * errors, quantile * errors)))
    return torch.mean(torch.stack(losses))

def evaluate_model(model, dataloader, quantiles):
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_targets = []
    
    with torch.no_grad():  # Disable gradient calculation
        for inputs, targets in dataloader:
            targets = targets.unsqueeze(-1).expand(-1, -1, len(quantiles))  # Reshape targets
            outputs = model(inputs)  # Forward pass
            all_preds.append(outputs)
            all_targets.append(targets)
    
    all_preds = torch.cat(all_preds, dim=0)
    all_targets = torch.cat(all_targets, dim=0)
    
    return all_preds, all_targets

pm = "\u00B1"

# Data handling
Bitcoin = pd.read_csv('data/coin_Ethereum.csv')
data = Bitcoin.iloc[:, 4:]

features = data[['High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']]
features = MinMaxScaler().fit_transform(features)  # Normalize input
target = data['Close']
target_reshaped = np.array(target).reshape(-1, 1)  # Normalize output
scaler = MinMaxScaler(feature_range=(0, 1))
target = scaler.fit_transform(target_reshaped).flatten()

# Define parameters
input_size = 6        # 6 steps input
output_size = 5       # 5 steps output
train_ratio = 0.8
seed = 5925
num_experiments = 30  # Default: 30
quantiles = [0.05, 0.25, 0.5, 0.75, 0.95]

rmse, mae, mape = [], [], []
rmse_steps = [[] for _ in range(output_size)]
mae_steps = [[] for _ in range(output_size)]
mape_steps = [[] for _ in range(output_size)]
train_rmse, train_mae, train_mape = [], [], []
train_rmse_steps = [[] for _ in range(output_size)]
train_mae_steps = [[] for _ in range(output_size)]
train_mape_steps = [[] for _ in range(output_size)]
rmse_005, rmse_025, rmse_050, rmse_075, rmse_095 = [], [], [], [], []

for exp in range(num_experiments):
    X_train, X_test, y_train, y_test = split_data(data, input_size, output_size, train_ratio, seed)
    train_dataset = Time_Series_Dataset(X_train, y_train)
    test_dataset = Time_Series_Dataset(X_test, y_test)
    train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)  # Changing batch size affect model accuracy significantly
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    
    # Hyperparameters
    hidden_size = [50, 50]
    num_layers = len(hidden_size)
    num_quantiles = len(quantiles)  # Number of quantiles to predict
    
    # Create the model
    model = BDLSTM_Quantile(input_size, hidden_size, num_layers, num_quantiles, output_size)

    # Loss and optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Training loop
    num_epochs = 100  # Default: 100
    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_dataloader:
            targets = targets.unsqueeze(-1).expand(-1, -1, len(quantiles))  # [size, num_steps_ahead, num_quantiles]
            
            optimizer.zero_grad()
            outputs = model(inputs)  # Forward pass - [size, num_steps_ahead, num_quantiles]
            loss = quantile_loss(outputs, targets, quantiles)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
        
        # Print epoch information for the first epoch and every 10th epoch thereafter
        # if epoch == 1 or epoch % 10 == 0 or epoch == num_epochs:
            # print(f'Epoch [{epoch}/{num_epochs}], Loss: {running_loss / len(train_dataloader):.6f}')

    predicted_values, actual_values = evaluate_model(model, test_dataloader, quantiles)
    predicted_values_train, actual_values_train = evaluate_model(model, train_dataloader, quantiles)
    
    predicted_values = predicted_values.numpy()
    actuals = actual_values.numpy()[:, :, 0]
    predicted_values_train = predicted_values_train.numpy()
    actuals_train = actual_values_train.numpy()[:, :, 0]

    pred_005 = predicted_values[:, :, 0]
    pred_025 = predicted_values[:, :, 1]
    pred_050 = predicted_values[:, :, 2]
    pred_075 = predicted_values[:, :, 3]
    pred_095 = predicted_values[:, :, 4]

    mse_005 = mean_squared_error(pred_005.flatten(), actuals.flatten())
    mse_025 = mean_squared_error(pred_025.flatten(), actuals.flatten())
    mse_050 = mean_squared_error(pred_050.flatten(), actuals.flatten())
    mse_075 = mean_squared_error(pred_075.flatten(), actuals.flatten())
    mse_095 = mean_squared_error(pred_095.flatten(), actuals.flatten())

    rmse_005.append(np.sqrt(mse_005))
    rmse_025.append(np.sqrt(mse_025))
    rmse_050.append(np.sqrt(mse_050))
    rmse_075.append(np.sqrt(mse_075))
    rmse_095.append(np.sqrt(mse_095))

    pred_values = predicted_values.reshape(-1, 1)
    pred_values = scaler.inverse_transform(pred_values)
    pred_values = pred_values.reshape(predicted_values.shape)
    actual_values = scaler.inverse_transform(actuals)

    predicted_005 = pred_values[:, :, 0]
    predicted_025 = pred_values[:, :, 1]
    predicted_050 = pred_values[:, :, 2]
    predicted_075 = pred_values[:, :, 3]
    predicted_095 = pred_values[:, :, 4]

    mae.append(mean_absolute_error(actual_values, predicted_050))
    mape.append(mean_absolute_percentage_error(actual_values, predicted_050))

    for step in range(output_size):
        mse_step = mean_squared_error(pred_050[:, step], actuals[:, step])
        rmse_steps[step].append(np.sqrt(mse_step))
        mae_steps[step].append(mean_absolute_error(predicted_050[:, step], actual_values[:, step]))
        mape_steps[step].append(mean_absolute_percentage_error(predicted_050[:, step], actual_values[:, step]))

    pred_005_train = predicted_values_train[:, :, 0]
    pred_025_train = predicted_values_train[:, :, 1]
    pred_050_train = predicted_values_train[:, :, 2]
    pred_075_train = predicted_values_train[:, :, 3]
    pred_095_train = predicted_values_train[:, :, 4]

    train_mse_050 = mean_squared_error(pred_050_train, actuals_train)
    train_rmse.append(np.sqrt(train_mse_050))

    train_pred_values = predicted_values_train.reshape(-1, 1)
    train_pred_values = scaler.inverse_transform(train_pred_values)
    train_pred_values = train_pred_values.reshape(predicted_values_train.shape)
    train_actual_values = scaler.inverse_transform(actuals_train)

    train_predicted_050 = train_pred_values[:, :, 2]

    train_mae.append(mean_absolute_error(train_actual_values, train_predicted_050))
    train_mape.append(mean_absolute_percentage_error(train_actual_values, train_predicted_050))

    for step in range(output_size):
        train_mse_step = mean_squared_error(pred_050_train[:, step], actuals_train[:, step])
        train_rmse_steps[step].append(np.sqrt(train_mse_step))
        train_mae_steps[step].append(mean_absolute_error(train_predicted_050[:, step], train_actual_values[:, step]))
        train_mape_steps[step].append(mean_absolute_percentage_error(train_predicted_050[:, step], train_actual_values[:, step]))

    print(f"Experiment {exp+1}/{num_experiments} done")
    seed += 1

print(f'Bitcoin BD-LSTM Average RMSE across {output_size} time steps at different quantiles:')
print(f'Quantile 0.05: {np.mean(rmse_005):.6f} {pm} {np.std(rmse_005):.6f}')
print(f'Quantile 0.25: {np.mean(rmse_025):.6f} {pm} {np.std(rmse_025)::.6f}')
print(f'Quantile 0.50: {np.mean(rmse_050):.6f} {pm} {np.std(rmse_050):.6f}')
print(f'Quantile 0.75: {np.mean(rmse_075):.6f} {pm} {np.std(rmse_075):.6f}')
print(f'Quantile 0.95: {np.mean(rmse_095):.6f} {pm} {np.std(rmse_095):.6f}')

print("--------------------------------------------")

print(f"BD-LSTM Quantile Regression Average Performance on Test Data: RMSE: {np.mean(rmse_050):.4f} {pm} {np.std(rmse_050):.4f}, MAE: {np.mean(mae):.4f} {pm} {np.std(mae):.4f}, MAPE: {np.mean(mape)*100:.4f}% {pm} {np.std(mape)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1}, test predictions have RMSE: {np.mean(rmse_steps[step]):.4f} {pm} {np.std(rmse_steps[step]):.4f}, MAE: {np.mean(mae_steps[step]):.4f} {pm} {np.std(mae_steps[step]):.4f} and MAPE: {np.mean(mape_steps[step])*100:.4f}% {pm} {np.std(mape_steps[step])*100:.4f}%")

print("--------------------------------------------")

print(f"BD-LSTM Quantile Regression Average Performance on Train Data: RMSE: {np.mean(train_rmse):.4f} {pm} {np.std(train_rmse):.4f}, MAE: {np.mean(train_mae):.4f} {pm} {np.std(train_mae):.4f}, MAPE: {np.mean(train_mape)*100:.4f}% {pm} {np.std(train_mape)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1}, train predictions have RMSE: {np.mean(train_rmse_steps[step]):.4f} {pm} {np.std(train_rmse_steps[step]):.4f}, MAE: {np.mean(train_mae_steps[step]):.4f} {pm} {np.std(train_mae_steps[step]):.4f} and MAPE: {np.mean(train_mape_steps[step])*100:.4f}% {pm} {np.std(train_mape_steps[step])*100:.4f}%")


Experiment 1/30 done
Experiment 2/30 done
Experiment 3/30 done
Experiment 4/30 done
Experiment 5/30 done
Experiment 6/30 done
Experiment 7/30 done
Experiment 8/30 done
Experiment 9/30 done
Experiment 10/30 done
Experiment 11/30 done
Experiment 12/30 done
Experiment 13/30 done
Experiment 14/30 done
Experiment 15/30 done
Experiment 16/30 done
Experiment 17/30 done
Experiment 18/30 done
Experiment 19/30 done
Experiment 20/30 done
Experiment 21/30 done
Experiment 22/30 done
Experiment 23/30 done
Experiment 24/30 done
Experiment 25/30 done
Experiment 26/30 done
Experiment 27/30 done
Experiment 28/30 done
Experiment 29/30 done
Experiment 30/30 done
Bitcoin BD-LSTM Average RMSE across 5 time steps at different quantiles:
Quantile 0.05: 0.038480 ± 0.004178


ValueError: Invalid format specifier

In [2]:
print(f'Bitcoin BD-LSTM Average RMSE across {output_size} time steps at different quantiles:')
print(f'Quantile 0.05: {np.mean(rmse_005):.6f} {pm} {np.std(rmse_005):.6f}')
print(f'Quantile 0.25: {np.mean(rmse_025):.6f} {pm} {np.std(rmse_025):.6f}')
print(f'Quantile 0.50: {np.mean(rmse_050):.6f} {pm} {np.std(rmse_050):.6f}')
print(f'Quantile 0.75: {np.mean(rmse_075):.6f} {pm} {np.std(rmse_075):.6f}')
print(f'Quantile 0.95: {np.mean(rmse_095):.6f} {pm} {np.std(rmse_095):.6f}')

print("--------------------------------------------")

print(f"BD-LSTM Quantile Regression Average Performance on Test Data: RMSE: {np.mean(rmse_050):.4f} {pm} {np.std(rmse_050):.4f}, MAE: {np.mean(mae):.4f} {pm} {np.std(mae):.4f}, MAPE: {np.mean(mape)*100:.4f}% {pm} {np.std(mape)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1}, test predictions have RMSE: {np.mean(rmse_steps[step]):.4f} {pm} {np.std(rmse_steps[step]):.4f}, MAE: {np.mean(mae_steps[step]):.4f} {pm} {np.std(mae_steps[step]):.4f} and MAPE: {np.mean(mape_steps[step])*100:.4f}% {pm} {np.std(mape_steps[step])*100:.4f}%")

print("--------------------------------------------")

print(f"BD-LSTM Quantile Regression Average Performance on Train Data: RMSE: {np.mean(train_rmse):.4f} {pm} {np.std(train_rmse):.4f}, MAE: {np.mean(train_mae):.4f} {pm} {np.std(train_mae):.4f}, MAPE: {np.mean(train_mape)*100:.4f}% {pm} {np.std(train_mape)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1}, train predictions have RMSE: {np.mean(train_rmse_steps[step]):.4f} {pm} {np.std(train_rmse_steps[step]):.4f}, MAE: {np.mean(train_mae_steps[step]):.4f} {pm} {np.std(train_mae_steps[step]):.4f} and MAPE: {np.mean(train_mape_steps[step])*100:.4f}% {pm} {np.std(train_mape_steps[step])*100:.4f}%")


Bitcoin BD-LSTM Average RMSE across 5 time steps at different quantiles:
Quantile 0.05: 0.038480 ± 0.004178
Quantile 0.25: 0.022783 ± 0.002483
Quantile 0.50: 0.020219 ± 0.003040
Quantile 0.75: 0.024856 ± 0.005336
Quantile 0.95: 0.038268 ± 0.005633
--------------------------------------------
BD-LSTM Quantile Regression Average Performance on Test Data: RMSE: 0.0202 ± 0.0030, MAE: 31.7566 ± 3.2381, MAPE: 27.7637% ± 12.5915%
At time step 1, test predictions have RMSE: 0.0128 ± 0.0024, MAE: 20.7363 ± 2.8725 and MAPE: 29.8036% ± 32.2427%
At time step 2, test predictions have RMSE: 0.0172 ± 0.0026, MAE: 27.7738 ± 3.0369 and MAPE: 44.2163% ± 44.1468%
At time step 3, test predictions have RMSE: 0.0204 ± 0.0038, MAE: 32.6860 ± 4.0473 and MAPE: 38.7799% ± 38.7359%
At time step 4, test predictions have RMSE: 0.0227 ± 0.0034, MAE: 36.6205 ± 3.6755 and MAPE: 34.6962% ± 38.9120%
At time step 5, test predictions have RMSE: 0.0253 ± 0.0043, MAE: 40.9664 ± 4.0903 and MAPE: 36.5177% ± 32.3593%
--------

In [3]:
quantile_dfs = []
for i in range(pred_values.shape[2]):
    quantile_df = pd.DataFrame(
        pred_values[:, :, i], 
        columns=[f'Quantile_{i+1}_Timestep_{j+1}' for j in range(pred_values.shape[1])]
    )
    quantile_dfs.append(quantile_df)
predicted_df = pd.concat(quantile_dfs, axis=1)

predicted_df.to_csv('ethereum_multi_bdlstm_quantile_results.csv')
pd.DataFrame(actual_values).to_csv('ethereum_multi_bdlstm_quantile_actual.csv')
pd.DataFrame(rmse_steps).transpose().to_csv('ethereum_multi_bdlstm_quantile_rmse.csv')
pd.DataFrame(mae_steps).transpose().to_csv('ethereum_multi_bdlstm_quantile_mae.csv')
pd.DataFrame(mape_steps).transpose().to_csv('ethereum_multi_bdlstm_quantile_mape.csv')