In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Define Classes
class Time_Series_Dataset(Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = self.inputs[idx]
        y = self.outputs[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Bitcoin Second Best DL Model: Bi-directional Long-Short Term Memory Model (BD-LSTM) Multivariate
class BDLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BDLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstms = nn.ModuleList()
        self.lstms.append(nn.LSTM(input_size, hidden_size[0], batch_first=True, bidirectional=True))

        for i in range(1, num_layers):
            self.lstms.append(nn.LSTM(hidden_size[i-1]*2, hidden_size[i], batch_first=True, bidirectional=True))
        
        self.fc = nn.Linear(hidden_size[-1] * 2, output_size)  # * 2 because of bidirectional

    def forward(self, x):
        h = x
        for lstm in self.lstms:
            out, _ = lstm(h)
            h = out
        out = self.fc(out[:, -1, :])
        return out

class BDLSTM_Quantile(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_quantiles, num_steps_ahead):
        super(BDLSTM_Quantile, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_quantiles = num_quantiles
        self.num_steps_ahead = num_steps_ahead

        self.lstms = nn.ModuleList()
        self.lstms.append(nn.LSTM(input_size, hidden_size[0], batch_first=True, bidirectional=True))
        for i in range(1, num_layers):
            self.lstms.append(nn.LSTM(hidden_size[i-1] * 2, hidden_size[i], batch_first=True, bidirectional=True))
        
        # Define separate linear layers for each quantile
        self.fc = nn.ModuleList([nn.Linear(hidden_size[-1] * 2, self.num_steps_ahead) for _ in range(num_quantiles)])

    def forward(self, x):
        for lstm in self.lstms:
            x, _ = lstm(x)
        
        lstm_out = x[:, -1, :]  # Use the output of the last time step
        
        # Compute the outputs for each quantile
        quantile_outputs = [fc(lstm_out) for fc in self.fc]
        
        # Stack the quantile outputs
        output = torch.stack(quantile_outputs, dim=2)
        return output

# Define functions
def split_data(data, input_size, output_size, train_ratio, seed):
    X, y = [], [] 
    total_size = input_size + output_size
    for i in range(len(data) - total_size + 1):
        X.append(features[i:i + input_size])                            
        y.append(target[i + input_size:i + total_size])        
    # Shuffle batches and split into train/test
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=train_ratio, random_state=seed)
    
    return X_train, X_test, Y_train, Y_test

# Quantile loss function
def quantile_loss(preds, targets, quantiles):
    losses = []
    for i, quantile in enumerate(quantiles):
        errors = targets[:, :, i] - preds[:, :, i]
        losses.append(torch.mean(torch.max((quantile - 1) * errors, quantile * errors)))
    return torch.mean(torch.stack(losses))

def evaluate_model(model, dataloader, quantiles):
    model.eval()  # Set the model to evaluation mode
    all_preds = []
    all_targets = []
    
    with torch.no_grad():  # Disable gradient calculation
        for inputs, targets in dataloader:
            targets = targets.unsqueeze(-1).expand(-1, -1, len(quantiles))  # Reshape targets
            outputs = model(inputs)  # Forward pass
            all_preds.append(outputs)
            all_targets.append(targets)
    
    all_preds = torch.cat(all_preds, dim=0)
    all_targets = torch.cat(all_targets, dim=0)
    
    return all_preds, all_targets

pm = "\u00B1"

# Data handling
Bitcoin = pd.read_csv('data/coin_Ethereum.csv')
data = Bitcoin.iloc[:, 4:]

features = data[['High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']]
features = MinMaxScaler().fit_transform(features)  # Normalize input
target = data['Close']
target_reshaped = np.array(target).reshape(-1, 1)  # Normalize output
scaler = MinMaxScaler(feature_range=(0, 1))
target = scaler.fit_transform(target_reshaped).flatten()

# Define parameters
input_size = 6        # 6 steps input
output_size = 5       # 5 steps output
train_ratio = 0.8
seed = 5925
num_experiments = 30  # Default: 30

rmse, mae, mape = [], [], []
rmse_steps = [[] for _ in range(output_size)]
mae_steps = [[] for _ in range(output_size)]
mape_steps = [[] for _ in range(output_size)]

# To store training evaluation
rmse_train, mae_train, mape_train = [], [], []
rmse_steps_train = [[] for _ in range(output_size)]
mae_steps_train = [[] for _ in range(output_size)]
mape_steps_train = [[] for _ in range(output_size)]

for exp in range(num_experiments):
    X_train, X_test, y_train, y_test = split_data(data, input_size, output_size, train_ratio, seed)
    train_dataset = Time_Series_Dataset(X_train, y_train)
    test_dataset = Time_Series_Dataset(X_test, y_test)
    train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)  # Changing batch size affect model accuracy significantly
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    
    # Hyperparameters
    hidden_sizes = [50, 50]
    num_layers = len(hidden_sizes)
    
    model = BDLSTM(input_size, hidden_sizes, num_layers, output_size)

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Training loop
    num_epochs = 100  # Default: 100
    for epoch in range(1, num_epochs + 1):
        model.train()
        for inputs, targets in train_dataloader:
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
        # if epoch == 1 or epoch % 10 == 0:
            # print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.6f}')

    # Evaluate the model on the test set
    model.eval()
    y_pred_train, y_train_actual = [], []
    y_pred_test, y_test_actual = [], []
    
    with torch.no_grad():
        for inputs, targets in train_dataloader:
            outputs = model(inputs)
            y_pred_train.append(outputs.numpy())
            y_train_actual.append(targets.numpy())

        for inputs, targets in test_dataloader:
            outputs = model(inputs)
            y_pred_test.append(outputs.numpy())
            y_test_actual.append(targets.numpy())
    
    # Convert lists to numpy arrays
    y_pred_train = np.concatenate(y_pred_train, axis=0)
    y_train_actual = np.concatenate(y_train_actual, axis=0)
    y_pred_test = np.concatenate(y_pred_test, axis=0)
    y_test_actual = np.concatenate(y_test_actual, axis=0)

    # Calculate train metrics
    mse_train = mean_squared_error(y_train_actual, y_pred_train)
    rmse_train.append(np.sqrt(mse_train))

    for step in range(output_size):
        mse_step_train = mean_squared_error(y_pred_train[:, step], y_train_actual[:, step])
        rmse_steps_train[step].append(np.sqrt(mse_step_train))

    predicted_values_train = scaler.inverse_transform(y_pred_train)
    actual_values_train = scaler.inverse_transform(y_train_actual)

    mae_train.append(mean_absolute_error(actual_values_train, predicted_values_train))
    mape_train.append(mean_absolute_percentage_error(actual_values_train, predicted_values_train))

    actual_values_steps_train = list(zip(*actual_values_train))
    predicted_values_steps_train = list(zip(*predicted_values_train))

    for step in range(output_size):
        mae_steps_train[step].append(mean_absolute_error(actual_values_steps_train[step], predicted_values_steps_train[step]))
        mape_steps_train[step].append(mean_absolute_percentage_error(actual_values_steps_train[step], predicted_values_steps_train[step]))

    # Calculate test metrics
    mse_test = mean_squared_error(y_test_actual, y_pred_test)
    rmse.append(np.sqrt(mse_test))

    for step in range(output_size):
        mse_step_test = mean_squared_error(y_pred_test[:, step], y_test_actual[:, step])
        rmse_steps[step].append(np.sqrt(mse_step_test))

    predicted_values_test = scaler.inverse_transform(y_pred_test)
    actual_values_test = scaler.inverse_transform(y_test_actual)

    mae.append(mean_absolute_error(actual_values_test, predicted_values_test))
    mape.append(mean_absolute_percentage_error(actual_values_test, predicted_values_test))

    actual_values_steps_test = list(zip(*actual_values_test))
    predicted_values_steps_test = list(zip(*predicted_values_test))

    for step in range(output_size):
        mae_steps[step].append(mean_absolute_error(actual_values_steps_test[step], predicted_values_steps_test[step]))
        mape_steps[step].append(mean_absolute_percentage_error(actual_values_steps_test[step], predicted_values_steps_test[step]))

    print(f"Experiment {exp+1}/{num_experiments} done")
    seed += 1

print(f"Bitcoin BD-LSTM Regression: After {num_experiments} experimental runs, here are the results:")
print(f"Test set: Across {output_size} predictive time steps, " +
      f"Avg RMSE: {np.mean(rmse):.4f} {pm} {np.std(rmse):.4f}, " +
      f"Avg MAE: {np.mean(mae):.2f} {pm} {np.std(mae):.2f}, " +
      f"Avg MAPE: {np.mean(mape)*100:.3f}% {pm} {np.std(mape)*100:.3f}%")
for step in range(output_size):
    print(
        f"At time step {step + 1}, "
        f"Avg RMSE: {np.mean(rmse_steps[step]):.4f} {pm} {np.std(rmse_steps[step]):.4f}, "
        f"Avg MAE: {np.mean(mae_steps[step]):.2f} {pm} {np.std(mae_steps[step]):.2f}, "
        f"Avg MAPE: {np.mean(mape_steps[step]) * 100:.3f}% {pm} {np.std(mape_steps[step]) * 100:.3f}%"
    )

print(f"Train set: Across {output_size} predictive time steps, " +
      f"Avg RMSE: {np.mean(rmse_train):.4f} {pm} {np.std(rmse_train):.4f}, " +
      f"Avg MAE: {np.mean(mae_train):.2f} {pm} {np.std(mae_train):.2f}, " +
      f"Avg MAPE: {np.mean(mape_train)*100:.3f}% {pm} {np.std(mape_train)*100:.3f}%")
for step in range(output_size):
    print(
        f"At time step {step + 1}, "
        f"Avg RMSE: {np.mean(rmse_steps_train[step]):.4f} {pm} {np.std(rmse_steps_train[step]):.4f}, "
        f"Avg MAE: {np.mean(mae_steps_train[step]):.2f} {pm} {np.std(mae_steps_train[step]):.2f}, "
        f"Avg MAPE: {np.mean(mape_steps_train[step]) * 100:.3f}% {pm} {np.std(mape_steps_train[step]) * 100:.3f}%"
    )


Experiment 1/30 done
Experiment 2/30 done
Experiment 3/30 done
Experiment 4/30 done
Experiment 5/30 done
Experiment 6/30 done
Experiment 7/30 done
Experiment 8/30 done
Experiment 9/30 done
Experiment 10/30 done
Experiment 11/30 done
Experiment 12/30 done
Experiment 13/30 done
Experiment 14/30 done
Experiment 15/30 done
Experiment 16/30 done
Experiment 17/30 done
Experiment 18/30 done
Experiment 19/30 done
Experiment 20/30 done
Experiment 21/30 done
Experiment 22/30 done
Experiment 23/30 done
Experiment 24/30 done
Experiment 25/30 done
Experiment 26/30 done
Experiment 27/30 done
Experiment 28/30 done
Experiment 29/30 done
Experiment 30/30 done
Bitcoin BD-LSTM Regression: After 30 experimental runs, here are the results:
Test set: Across 5 predictive time steps, Avg RMSE: 0.0205 ± 0.0031, Avg MAE: 35.39 ± 4.89, Avg MAPE: 94.927% ± 62.214%
At time step 1, Avg RMSE: 0.0135 ± 0.0027, Avg MAE: 24.89 ± 5.54, Avg MAPE: 90.782% ± 77.936%
At time step 2, Avg RMSE: 0.0173 ± 0.0025, Avg MAE: 31.25

In [2]:
pd.DataFrame(actual_values_test).to_csv('ethereum_multi_bdlstm_classic_actual.csv')
pd.DataFrame(predicted_values_test).to_csv('ethereum_multi_bdlstm_classic_pred.csv')
pd.DataFrame(rmse_steps).transpose().to_csv('ethereum_multi_bdlstm_classic_rmse.csv')
pd.DataFrame(mae_steps).transpose().to_csv('ethereum_multi_bdlstm_classic_mae.csv')
pd.DataFrame(mape_steps).transpose().to_csv('ethereum_multi_bdlstm_classic_mape.csv')