In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as dist
import torch.nn.utils as utils

class GammaNAMLSS(nn.Module):
    def __init__(self, n_covariates, hidden_size=8, intercept=False):
        super(GammaNAMLSS, self).__init__()

        self.submodules = nn.ModuleList([
            nn.Sequential(
                nn.Linear(1, hidden_size),
                nn.Tanh(),
                nn.Linear(hidden_size, 2)
            ) for _ in range(n_covariates)
        ])

        self.use_intercept = intercept
        if self.use_intercept:
            self.intercept = nn.Parameter(torch.zeros(2))

    def forward(self, x):
        param_mat_list = [self.submodules[i](x[:, i:i + 1]) for i in range(x.shape[1])]
        x = torch.stack(param_mat_list, dim=1)
        alpha_components = F.softplus(x[:, :, 0])
        beta_components = F.softplus(x[:, :, 1])

        alpha = torch.sum(alpha_components, dim=1).unsqueeze(dim=1) + 1e-10
        beta = torch.sum(beta_components, dim=1).unsqueeze(dim=1) + 1e-10

        if self.use_intercept:
            alpha = alpha + F.softplus(self.intercept[0])
            beta = beta + F.softplus(self.intercept[1])

        return alpha, beta


    def nll_loss(self, alpha, beta, y_true, robustness_factor=None):
        # gamma_dist = dist.Gamma(alpha, beta)
        # log_likelihood = gamma_dist.log_prob(y_true)

        # log_likelihood = (alpha - 1) * torch.log(y_true) - beta * y_true - torch.lgamma(alpha) + alpha * torch.log(beta)
        log_likelihood = torch.xlogy(alpha - 1, y_true) - beta * y_true - torch.lgamma(alpha) + alpha * torch.log(beta)

        if robustness_factor is not None:
            log_likelihood = torch.log((1 + torch.exp(log_likelihood + robustness_factor)) / (1 + torch.exp(robustness_factor)))

        nll = -log_likelihood.mean()
        return nll

    def fit(self, X_train, y_train, X_val = None, y_val = None, n_epochs = 10000, lr = 1e-3, weight_decay = 0.0, 
            early_stopping_patience = 10, robustness_factor = None, gradient_clip_value = 1.0):

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(device)

        X_train, y_train = X_train.to(device), y_train.to(device)
        if X_val is not None and y_val is not None:
            X_val, y_val = X_val.to(device), y_val.to(device)

        optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)

        best_val_loss = float('inf')
        patience_counter = 0

        for epoch in range(n_epochs):
            self.train()

            # Forward pass and loss computation
            alpha, beta = self.forward(X_train)
            train_loss = self.nll_loss(alpha, beta, y_train, robustness_factor)

            optimizer.zero_grad()
            train_loss.backward()

            # utils.clip_grad_norm_(self.parameters(), gradient_clip_value)

            max_grad = max(p.grad.abs().max().item() if p.grad is not None else 0 for p in self.parameters())
            mean_grad = sum(p.grad.abs().mean().item() if p.grad is not None else 0 for p in self.parameters()) / len(list(self.parameters()))

            if epoch % 10 == 0:
                print(f"Epoch {epoch} - Train Loss: {train_loss.item():.4f} - Max Grad: {max_grad:.4f} - Mean Grad: {mean_grad:.4f}")

            optimizer.step()

            val_loss = None
            if X_val is not None and y_val is not None:
                self.eval()
                with torch.no_grad():
                    alpha_val, beta_val = self.forward(X_val)
                    val_loss = self.nll_loss(alpha_val, beta_val, y_val, robustness_factor).item()

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_model_state = self.state_dict()
                else:
                    patience_counter += 1

                if (patience_counter >= early_stopping_patience) and (epoch >= 1000):
                    print(f"Early stopping at epoch {epoch}. Best validation loss: {best_val_loss:.4f}")
                    self.load_state_dict(best_model_state)
                    break

            if epoch % 100 == 0:
                print(f"Epoch {epoch} - Train Loss: {train_loss.item():.4f} - Val Loss: {val_loss:.4f}" if val_loss else f"Epoch {epoch} - Train Loss: {train_loss.item():.4f}")
                
                
        return self
    
    def predict(self, x):
        alpha, beta = self.forward(x)

        alpha = alpha.detach()
        beta = beta.detach()

        return alpha, beta
        

    def marginal_effects(self, x):
        with torch.no_grad():
            param_mat_list = [self.submodules[i](x[:, i:i + 1]) for i in range(x.shape[1])]
            x = torch.stack(param_mat_list, dim=1)
            alpha_components = F.softplus(x[:, :, 0]).detach().cpu().numpy()
            beta_components = F.softplus(x[:, :, 1]).detach().cpu().numpy()

            mean_components = alpha_components * beta_components
            variance_components = alpha_components * beta_components ** 2

        return mean_components, variance_components

In [None]:
import pandas as pd

# parking_df = pd.read_csv("C:/Users/Tobias/Desktop/Workspace/Robust Neural Networks/Pytorch-Workspace/parking_ohe.csv", sep = ";")
parking_df = pd.read_csv("parking_sample.csv", sep = ";")
parking_df.head()

In [None]:
import matplotlib.pyplot as plt
import random
import numpy as np

parking = parking_df.to_numpy(dtype=float)
parking[:,0] = parking[:,0]/3600 # Konvertiert Sekunden in Stunden

train_indices = random.sample(range(1, len(parking)), 10000)
val_indices = random.sample(range(1, len(parking)), 10000)
test_indices = random.sample(range(1, len(parking)), 10000)

train_sample = parking[train_indices,:]
val_sample = parking[val_indices,:]
test_sample = parking[test_indices,:]

plt.plot(train_sample[:,2], np.log(train_sample[:,0]), "o", markersize = 1)
plt.xlabel("Day")
plt.ylabel("log(Parking Duration)")

In [None]:
X_train = torch.tensor(train_sample[:,range(2,4)], dtype = torch.float32) # range interval is [a,b)
y_train = torch.tensor(train_sample[:,0], dtype = torch.float32)

X_val = torch.tensor(val_sample[:,range(2,4)], dtype = torch.float32)
y_val = torch.tensor(val_sample[:,0], dtype = torch.float32)

X_test = torch.tensor(test_sample[:,range(2,4)], dtype = torch.float32)
y_test = torch.tensor(test_sample[:0], dtype = torch.float32)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = torch.tensor(scaler.fit_transform(X_train), dtype = torch.float32)
X_val_scaled = torch.tensor(scaler.transform(X_val), dtype = torch.float32)
X_test_scaled = torch.tensor(scaler.transform(X_test), dtype = torch.float32)

In [None]:
import cProfile
import pstats

gamma_namlss = GammaNAMLSS(X_train.shape[1])

with cProfile.Profile() as pr:
    gamma_namlss.fit(X_train_scaled, y_train, X_val_scaled, y_val)

stats = pstats.Stats(pr)
stats.strip_dirs().sort_stats("cumulative").print_stats(20) 

In [None]:
# gamma_namlss = GammaNAMLSS(X_train.shape[1])
# gamma_namlss.fit(X_train_scaled, y_train, X_val_scaled, y_val)

In [None]:
# alpha, beta = gamma_namlss.predict(X_test_scaled)
# gamma_dist = dist.Gamma(alpha, beta)

In [None]:
# from scipy.stats import gamma

# quantiles = gamma.ppf([0.025, 0.975], alpha, scale = 1/beta)
# quantiles

In [None]:
# lower = quantiles[:,0]
# upper = quantiles[:,1]

In [None]:
# import numpy as np

# sorted_indices = np.argsort(X_test[:,0])
# X_test_sorted = X_test[sorted_indices, 0]
# y_test_sorted = y_test[sorted_indices]

# plt.plot(X_test_sorted, y_test_sorted, "o", markersize = 1)
# plt.plot(X_test_sorted, lower[sorted_indices], color = "red")
# plt.plot(X_test_sorted, upper[sorted_indices], color = "red")
# plt.ylim((0, 50000))

In [None]:
# robust_gamlss = GammaNAMLSS(X_train.shape[1])
# robust_gamlss.fit(X_train_scaled, y_train, X_val_scaled, y_val, robustness_factor = torch.tensor(10))

In [None]:
# robust_alpha, robust_beta = robust_gamlss.predict(X_test_scaled)

In [None]:
# from scipy.stats import gamma

# quantiles = gamma.ppf([0.025, 0.975], robust_alpha, scale = 1/robust_beta)
# quantiles

# robust_lower = quantiles[:,0]
# robust_upper = quantiles[:,1]

In [None]:
# plt.plot(X_test_sorted, y_test_sorted, "o", markersize = 1)
# plt.plot(X_test_sorted, robust_lower[sorted_indices], color = "red")
# plt.plot(X_test_sorted, robust_upper[sorted_indices], color = "red")
# plt.ylim((0,4000))