In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np

# Time Series Dataset Class
class Time_Series_Dataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Split Series Function
def split_series(series, input_size, output_size, train_ratio, seed):
    np.random.seed(seed)
    data_length = len(series)
    train_size = int(data_length * train_ratio)
    
    X = []
    y = []
    
    for i in range(data_length - input_size - output_size):
        X.append(series[i:i+input_size])
        y.append(series[i+input_size:i+input_size+output_size])
    
    X = np.array(X)
    y = np.array(y)
    
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    
    train_indices = indices[:train_size]
    test_indices = indices[train_size:]
    
    X_train = X[train_indices]
    y_train = y[train_indices]
    X_test = X[test_indices]
    y_test = y[test_indices]
    
    return X_train, X_test, y_train, y_test

# Convolutional LSTM Model for Quantile Regression with Multi-Step Prediction
class Conv_LSTM_Quantile(nn.Module):
    def __init__(self, input_size, hidden_size, num_quantiles, num_steps_ahead, num_filters, kernel_size):
        super(Conv_LSTM_Quantile, self).__init__()
        self.hidden_size = hidden_size
        self.num_quantiles = num_quantiles
        self.num_steps_ahead = num_steps_ahead

        # Convolutional layer
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=num_filters, kernel_size=kernel_size, padding='same')
        
        # Single LSTM layer
        self.lstm = nn.LSTM(num_filters, hidden_size, batch_first=True, bidirectional=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size * 2, num_quantiles * num_steps_ahead)

    def forward(self, x):
        # Convolutional layer expects input of shape (batch_size, in_channels, seq_length)
        x = x.permute(0, 2, 1)  # Change to (batch_size, in_channels, seq_length)
        x = torch.relu(self.conv1(x))
        x = x.permute(0, 2, 1)  # Change back to (batch_size, seq_length, num_filters)
        
        # LSTM layer
        out, _ = self.lstm(x)
        
        # Fully connected layer
        out = self.fc(out[:, -1, :])
        
        # Reshape output to [batch_size, num_steps_ahead, num_quantiles]
        out = out.view(out.size(0), self.num_steps_ahead, self.num_quantiles)
        return out

# Quantile loss function
def quantile_loss(preds, targets, quantiles):
    losses = []
    for i, q in enumerate(quantiles):
        errors = targets[:, :, i] - preds[:, :, i]
        losses.append(torch.max((q - 1) * errors, q * errors))
    loss = torch.mean(torch.stack(losses, dim=2))
    return loss

# Evaluate model function
def evaluate_model(model, dataloader, quantiles):
    model.eval()
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs = inputs.unsqueeze(-1).float()  # Add feature dimension
            targets = targets.unsqueeze(-1).expand(-1, targets.size(1), len(quantiles)).float()  # Reshape targets
            outputs = model(inputs)
            all_preds.append(outputs)
            all_targets.append(targets)
    
    all_preds = torch.cat(all_preds, dim=0)
    all_targets = torch.cat(all_targets, dim=0)
    
    return all_preds, all_targets

# Load and preprocess the data
Bitcoin = pd.read_csv('data/coin_Ethereum.csv')
Close_Price = Bitcoin['Close'].copy()  # Assuming the Close price is the column named 'Close'
Close_Price_reshaped = np.array(Close_Price).reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
Close_Price_scaled = scaler.fit_transform(Close_Price_reshaped).flatten()

input_size = 1
hidden_size = 20
output_size = 5
train_ratio = 0.8
seed = 5925
num_experiments = 30
quantiles = [0.05, 0.25, 0.5, 0.75, 0.95]
num_quantiles = len(quantiles)

rmse_train, mae_train, mape_train = [], [], []
rmse_005_train, rmse_025_train, rmse_050_train, rmse_075_train, rmse_095_train = [], [], [], [], []
rmse_steps_train = [[] for _ in range(output_size)]
mae_steps_train = [[] for _ in range(output_size)]
mape_steps_train = [[] for _ in range(output_size)]

rmse_test, mae_test, mape_test = [], [], []
rmse_005_test, rmse_025_test, rmse_050_test, rmse_075_test, rmse_095_test = [], [], [], [], []
rmse_steps_test = [[] for _ in range(output_size)]
mae_steps_test = [[] for _ in range(output_size)]
mape_steps_test = [[] for _ in range(output_size)]

for exp in range(num_experiments):
    X_train, X_test, y_train, y_test = split_series(Close_Price_scaled, input_size, output_size, train_ratio, seed)
    train_dataset = Time_Series_Dataset(X_train, y_train)
    test_dataset = Time_Series_Dataset(X_test, y_test)
    train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    num_filters = 64  # Number of filters for Conv1D layer
    kernel_size = 2  # Kernel size for Conv1D layer
    num_steps_ahead = 5

    model = Conv_LSTM_Quantile(input_size, hidden_size, num_quantiles, num_steps_ahead, num_filters, kernel_size)

    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
    num_epochs = 100
    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_dataloader:
            inputs = inputs.unsqueeze(-1).float()
            targets = targets.unsqueeze(-1).expand(-1, num_steps_ahead, num_quantiles).float()
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = quantile_loss(outputs, targets, quantiles)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
        
        if epoch % 10 == 0 or epoch == num_epochs:
            print(f'Epoch [{epoch}/{num_epochs}], Loss: {running_loss / len(train_dataloader):.6f}')

    # Evaluate the model on the train set
    predicted_values_train, actual_values_train = evaluate_model(model, train_dataloader, quantiles)
    predicted_values_train = predicted_values_train.numpy()
    actuals_train = actual_values_train.numpy()[:, :, 0]

    pred_005_train = predicted_values_train[:, :, 0]
    pred_025_train = predicted_values_train[:, :, 1]
    pred_050_train = predicted_values_train[:, :, 2]
    pred_075_train = predicted_values_train[:, :, 3]
    pred_095_train = predicted_values_train[:, :, 4]

    mse_005_train = mean_squared_error(pred_005_train.flatten(), actuals_train.flatten())
    mse_025_train = mean_squared_error(pred_025_train.flatten(), actuals_train.flatten())
    mse_050_train = mean_squared_error(pred_050_train.flatten(), actuals_train.flatten())
    mse_075_train = mean_squared_error(pred_075_train.flatten(), actuals_train.flatten())
    mse_095_train = mean_squared_error(pred_095_train.flatten(), actuals_train.flatten())

    rmse_005_train.append(np.sqrt(mse_005_train))
    rmse_025_train.append(np.sqrt(mse_025_train))
    rmse_050_train.append(np.sqrt(mse_050_train))
    rmse_075_train.append(np.sqrt(mse_075_train))
    rmse_095_train.append(np.sqrt(mse_095_train))

    pred_values_train = predicted_values_train.reshape(-1, 1)
    pred_values_train = scaler.inverse_transform(pred_values_train)
    pred_values_train = pred_values_train.reshape(predicted_values_train.shape)
    actual_values_train = scaler.inverse_transform(actuals_train.reshape(-1, 1)).reshape(actuals_train.shape)
    
    predicted_005_train = pred_values_train[:, :, 0]
    predicted_025_train = pred_values_train[:, :, 1]
    predicted_050_train = pred_values_train[:, :, 2]
    predicted_075_train = pred_values_train[:, :, 3]
    predicted_095_train = pred_values_train[:, :, 4]

    mae_train.append(mean_absolute_error(actual_values_train.flatten(), predicted_050_train.flatten()))
    mape_train.append(mean_absolute_percentage_error(actual_values_train.flatten(), predicted_050_train.flatten()))

    for step in range(output_size):
        mse_step_train = mean_squared_error(pred_050_train[:, step], actuals_train[:, step])
        rmse_steps_train[step].append(np.sqrt(mse_step_train))
        mae_steps_train[step].append(mean_absolute_error(predicted_050_train[:, step], actual_values_train[:, step]))
        mape_steps_train[step].append(mean_absolute_percentage_error(predicted_050_train[:, step], actual_values_train[:, step]))

    # Evaluate the model on the test set
    predicted_values_test, actual_values_test = evaluate_model(model, test_dataloader, quantiles)
    predicted_values_test = predicted_values_test.numpy()
    actuals_test = actual_values_test.numpy()[:, :, 0]

    pred_005_test = predicted_values_test[:, :, 0]
    pred_025_test = predicted_values_test[:, :, 1]
    pred_050_test = predicted_values_test[:, :, 2]
    pred_075_test = predicted_values_test[:, :, 3]
    pred_095_test = predicted_values_test[:, :, 4]

    mse_005_test = mean_squared_error(pred_005_test.flatten(), actuals_test.flatten())
    mse_025_test = mean_squared_error(pred_025_test.flatten(), actuals_test.flatten())
    mse_050_test = mean_squared_error(pred_050_test.flatten(), actuals_test.flatten())
    mse_075_test = mean_squared_error(pred_075_test.flatten(), actuals_test.flatten())
    mse_095_test = mean_squared_error(pred_095_test.flatten(), actuals_test.flatten())

    rmse_005_test.append(np.sqrt(mse_005_test))
    rmse_025_test.append(np.sqrt(mse_025_test))
    rmse_050_test.append(np.sqrt(mse_050_test))
    rmse_075_test.append(np.sqrt(mse_075_test))
    rmse_095_test.append(np.sqrt(mse_095_test))

    pred_values_test = predicted_values_test.reshape(-1, 1)
    pred_values_test = scaler.inverse_transform(pred_values_test)
    pred_values_test = pred_values_test.reshape(predicted_values_test.shape)
    actual_values_test = scaler.inverse_transform(actuals_test.reshape(-1, 1)).reshape(actuals_test.shape)
    
    predicted_005_test = pred_values_test[:, :, 0]
    predicted_025_test = pred_values_test[:, :, 1]
    predicted_050_test = pred_values_test[:, :, 2]
    predicted_075_test = pred_values_test[:, :, 3]
    predicted_095_test = pred_values_test[:, :, 4]

    mae_test.append(mean_absolute_error(actual_values_test.flatten(), predicted_050_test.flatten()))
    mape_test.append(mean_absolute_percentage_error(actual_values_test.flatten(), predicted_050_test.flatten()))

    for step in range(output_size):
        mse_step_test = mean_squared_error(pred_050_test[:, step], actuals_test[:, step])
        rmse_steps_test[step].append(np.sqrt(mse_step_test))
        mae_steps_test[step].append(mean_absolute_error(predicted_050_test[:, step], actual_values_test[:, step]))
        mape_steps_test[step].append(mean_absolute_percentage_error(predicted_050_test[:, step], actual_values_test[:, step]))

    print(f"Experiment {exp+1}/{num_experiments} done")
    seed += 1
pm = "\u00B1"
print(f'Average RMSE across {output_size} time steps at different quantiles (Train):')
print(f'Quantile 0.05: {np.mean(rmse_005_train):.6f} {pm} {np.std(rmse_005_train):.6f}')
print(f'Quantile 0.25: {np.mean(rmse_025_train):.6f} {pm} {np.std(rmse_025_train):.6f}')
print(f'Quantile 0.50: {np.mean(rmse_050_train):.6f} {pm} {np.std(rmse_050_train):.6f}')
print(f'Quantile 0.75: {np.mean(rmse_075_train):.6f} {pm} {np.std(rmse_075_train):.6f}')
print(f'Quantile 0.95: {np.mean(rmse_095_train):.6f} {pm} {np.std(rmse_095_train):.6f}')

print(f'Average RMSE across {output_size} time steps at different quantiles (Test):')
print(f'Quantile 0.05: {np.mean(rmse_005_test):.6f} {pm} {np.std(rmse_005_test):.6f}')
print(f'Quantile 0.25: {np.mean(rmse_025_test):.6f} {pm} {np.std(rmse_025_test):.6f}')
print(f'Quantile 0.50: {np.mean(rmse_050_test):.6f} {pm} {np.std(rmse_050_test):.6f}')
print(f'Quantile 0.75: {np.mean(rmse_075_test):.6f} {pm} {np.std(rmse_075_test):.6f}')
print(f'Quantile 0.95: {np.mean(rmse_095_test):.6f} {pm} {np.std(rmse_095_test):.6f}')

print("--------------------------------------------")

print(f"Univariate Quantile Regression Average Performance (Train): RMSE: {np.mean(rmse_050_train):.4f} {pm} {np.std(rmse_050_train):.4f}, MAE: {np.mean(mae_train):.4f} {pm} {np.std(mae_train):.4f}, MAPE: {np.mean(mape_train)*100:.4f}% {pm} {np.std(mape_train)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1} (Train), predictions have RMSE: {np.mean(rmse_steps_train[step]):.4f} {pm} {np.std(rmse_steps_train[step]):.4f}, MAE: {np.mean(mae_steps_train[step]):.4f} {pm} {np.std(mae_steps_train[step]):.4f} and MAPE: {np.mean(mape_steps_train[step])*100:.4f}% {pm} {np.std(mape_steps_train[step])*100:.4f}%")

print(f"Univariate Quantile Regression Average Performance (Test): RMSE: {np.mean(rmse_050_test):.4f} {pm} {np.std(rmse_050_test):.4f}, MAE: {np.mean(mae_test):.4f} {pm} {np.std(mae_test):.4f}, MAPE: {np.mean(mape_test)*100:.4f}% {pm} {np.std(mape_test)*100:.4f}%")
for step in range(output_size):
    print(f"At time step {step + 1} (Test), predictions have RMSE: {np.mean(rmse_steps_test[step]):.4f} {pm} {np.std(rmse_steps_test[step]):.4f}, MAE: {np.mean(mae_steps_test[step]):.4f} {pm} {np.std(mae_steps_test[step]):.4f} and MAPE: {np.mean(mape_steps_test[step])*100:.4f}% {pm} {np.std(mape_steps_test[step])*100:.4f}%")


  return F.conv1d(input, weight, bias, self.stride,


Epoch [10/100], Loss: 0.005287
Epoch [20/100], Loss: 0.002311
Epoch [30/100], Loss: 0.002207
Epoch [40/100], Loss: 0.002194
Epoch [50/100], Loss: 0.002176
Epoch [60/100], Loss: 0.002161
Epoch [70/100], Loss: 0.002160
Epoch [80/100], Loss: 0.002153
Epoch [90/100], Loss: 0.002146
Epoch [100/100], Loss: 0.002142
Experiment 1/30 done
Epoch [10/100], Loss: 0.008608
Epoch [20/100], Loss: 0.002539
Epoch [30/100], Loss: 0.002401
Epoch [40/100], Loss: 0.002361
Epoch [50/100], Loss: 0.002335
Epoch [60/100], Loss: 0.002319
Epoch [70/100], Loss: 0.002313
Epoch [80/100], Loss: 0.002306
Epoch [90/100], Loss: 0.002300
Epoch [100/100], Loss: 0.002295
Experiment 2/30 done
Epoch [10/100], Loss: 0.013843
Epoch [20/100], Loss: 0.002545
Epoch [30/100], Loss: 0.002347
Epoch [40/100], Loss: 0.002312
Epoch [50/100], Loss: 0.002299
Epoch [60/100], Loss: 0.002285
Epoch [70/100], Loss: 0.002275
Epoch [80/100], Loss: 0.002270
Epoch [90/100], Loss: 0.002267
Epoch [100/100], Loss: 0.002266
Experiment 3/30 done
Epoc

In [2]:
quantile_dfs = []
for i in range(pred_values_test.shape[2]):
    quantile_df = pd.DataFrame(
        pred_values_test[:, :, i], 
        columns=[f'Quantile_{i+1}_Timestep_{j+1}' for j in range(pred_values_test.shape[1])]
    )
    quantile_dfs.append(quantile_df)
predicted_df = pd.concat(quantile_dfs, axis=1)

predicted_df.to_csv('ethereum_uni_clstm_quantile_results.csv')
pd.DataFrame(actual_values_test).to_csv('ethereum_uni_clstm_quantile_actual.csv')
pd.DataFrame(rmse_steps_test).transpose().to_csv('ethereum_uni_clstm_quantile_rmse.csv')
pd.DataFrame(mae_steps_test).transpose().to_csv('ethereum_uni_clstm_quantile_mae.csv')
pd.DataFrame(mape_steps_test).transpose().to_csv('ethereum_uni_clstm_quantile_mape.csv')