In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import numpy as np

def quantile_loss(outputs, targets, quantile):
    """
    Calculate the quantile loss for a specific quantile of the target distribution.
    
    Args:
        outputs (torch.Tensor): The predictions from the model.
        targets (torch.Tensor): The actual values of the targets.
        quantile (float): The quantile to calculate loss for.
    
    Returns:
        torch.Tensor: The calculated quantile loss.
    """
    errors = targets - outputs
    loss = torch.max((quantile - 1) * errors, quantile * errors)
    return torch.mean(loss)

file_path = '/home/yui/Downloads/p122_synthetic_large.csv'
data = pd.read_csv(file_path)

# Feature target split
target = data['value']
features = data.drop(['value',"draw","scen"], axis=1)

# Convert features and target to tensors
X = torch.tensor(features.values, dtype=torch.float32)
y = torch.tensor(target.values, dtype=torch.float32).unsqueeze(1)  # Ensure y is the correct shape

# Initialize the scaler
scaler = StandardScaler()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the scaler on your TRAINING data only
X_train_scaled = scaler.fit_transform(X_train.numpy())  # Convert to NumPy array to fit
X_test_scaled = scaler.transform(X_test.numpy())  # Apply the same transform to the test data

# Convert scaled features back to tensors
X_train = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test = torch.tensor(X_test_scaled, dtype=torch.float32)

# Create DataLoader for training
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the Neural Network Model
class NeuralNet(nn.Module):
    def __init__(self, output_size):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(features.shape[1], 64)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, output_size)
        
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.output_layer(x)
        return x

# Instantiate the models for each quantile
#quantiles = [0.05,0.25, 0.5,0.75, 0.95]
quantiles = np.concatenate([
    np.linspace(0.01, 0.49, num=24),  # Quantiles up to just below the median
    [0.5],  # The median
    np.linspace(0.51, 0.99, num=25)  # Quantiles above the median
])
models = [NeuralNet(1) for _ in quantiles]

# Instantiate the loss function and optimizer for each model
criterion = nn.MSELoss()
optimizers = [optim.Adam(model.parameters(), lr=0.01) for model in models]

# Training the Models
num_epochs = 100
for epoch in range(num_epochs):
    total_losses = [0] * len(quantiles)
    for inputs, labels in train_loader:
        for i, quantile in enumerate(quantiles):
            models[i].train()  # Set the model to training mode
            # Forward pass
            outputs = models[i](inputs)
            loss = quantile_loss(outputs, labels, quantile)

            # Backward and optimize
            optimizers[i].zero_grad()
            loss.backward()
            optimizers[i].step()
            total_losses[i] += loss.item()

    # Average loss per epoch
    avg_losses = [total_loss / len(train_loader) for total_loss in total_losses]
    print(f'Epoch [{epoch+1}/{num_epochs}], Losses: {avg_losses}')

# Evaluate the models
for i, model in enumerate(models):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        predictions = model(X_test)
        predictions = predictions.numpy()  # Convert to numpy array for comparison
        r2 = r2_score(y_test.numpy(), predictions)
        print(f'Quantile: {quantiles[i]}, R-squared: {r2:.4f}')

Epoch [1/100], Losses: [18439038.35260566, 56753337.369918086, 90315876.8624554, 127695060.70013568, 160342947.65686718, 201638952.9405498, 230934393.59163776, 266547472.30574402, 300141241.11080956, 362856308.1811146, 373378851.4751495, 404259449.1682999, 425743054.3106689, 461652265.89999497, 483490706.7800392, 517149114.0266345, 605315563.8651189, 580056122.9460777, 594858306.4523846, 644405350.1329715, 665665428.3833358, 692687413.87326, 769883048.5761094, 756046143.2787577, 777459189.8298407, 798975164.2410171, 842415228.132067, 821316563.9041158, 854981738.2308658, 929392149.2585557, 914539302.9808533, 970931004.3632343, 945885491.6560631, 1009018820.572692, 1026293670.8670788, 1034014856.5367104, 1105652010.443138, 1086802202.5436454, 1167091696.812905, 1145019163.3931353, 1153599646.658023, 1209209771.7951655, 1175344632.7843611, 1228400551.708528, 1185192072.5612342, 1137699241.85939, 1278821629.35444, 1363820384.4082618, 1314811675.442183, 1312039463.3696668]
Epoch [2/100], L