In [41]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from statsmodels.tsa.stattools import adfuller

# Load dataset
data = pd.read_csv('../datasets/CropSDEData/METEO_DEKADS_NUTS2_NL.csv')

# Feature Selection
features = ['TAVG', 'VPRES', 'WSPD', 'RELH', 'RAD']
target = 'PREC'

# Drop missing values
data = data.dropna(subset=features + [target])

# Prepare data
X = data[features]
y = data[target]

# Ensure stationarity of target variable
if adfuller(y)[1] > 0.05:
    print("Target variable is non-stationary. Applying log transformation and differencing...")
    y = np.log1p(y).diff().dropna()
    X = X.iloc[1:]

# Align X and y
X, y = X.iloc[:len(y)], y.iloc[:len(X)]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ---- Optimized Heston Model MLE ----
def heston_log_likelihood(params, data):
    alpha, beta, kappa, theta, xi = params
    dt = 1
    log_likelihood = 0
    for t in range(1, len(data)):
        residual = data[t] - (data[t-1] + alpha * (beta - data[t-1]) * dt)
        log_likelihood += -0.5 * (residual ** 2) / (2 * theta)
    return -log_likelihood  # Negative for minimization

# Optimize Heston Parameters
initial_guess = [0.1, np.mean(y), 0.5, np.var(y), 0.5]
bounds = [(1e-5, None), (None, None), (1e-5, None), (1e-5, None), (1e-5, None)]
res_mle = minimize(heston_log_likelihood, initial_guess, args=(y.values,), method='L-BFGS-B', bounds=bounds)

# Extract and scale parameters
alpha_mle, beta_mle, kappa_mle, theta_mle, xi_mle = res_mle.x
alpha_mle /= 10
beta_mle /= 10
theta_mle /= 10
xi_mle /= 10

print("\nEstimated Heston Parameters using Maximum Likelihood Estimation (MLE):")
print(f"Alpha: {alpha_mle}, Beta: {beta_mle}, Kappa: {kappa_mle}, Theta: {theta_mle}, Xi: {xi_mle}")

# ---- Optimized Neural Network ----
class HestonNN(nn.Module):
    def __init__(self, input_size):
        super(HestonNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 1)
        self.activation = nn.SiLU()
        self.layernorm1 = nn.LayerNorm(256)
        self.layernorm2 = nn.LayerNorm(128)
        self.layernorm3 = nn.LayerNorm(64)
    
    def forward(self, x):
        x = self.activation(self.layernorm1(self.fc1(x)))
        x = self.activation(self.layernorm2(self.fc2(x)))
        x = self.activation(self.layernorm3(self.fc3(x)))
        x = self.activation(self.fc4(x))
        x = self.fc5(x)
        return x

# Initialize Neural Network
input_size = X_scaled.shape[1]
model = HestonNN(input_size)

# Optimizer & Scheduler
criterion = nn.SmoothL1Loss()  # Log-Cosh Approximation Loss
optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y.values, test_size=0.2, random_state=42)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# ---- Efficient Training Loop ----
epochs = 2000
batch_size = 512
accumulation_steps = 4

for epoch in range(epochs):
    model.train()
    permutation = torch.randperm(X_train_tensor.size(0))
    epoch_loss = 0

    for i in range(0, X_train_tensor.size(0), batch_size):
        indices = permutation[i:i + batch_size]
        batch_X, batch_y = X_train_tensor[indices], y_train_tensor[indices]

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y) / accumulation_steps
        loss.backward()
        if (i // batch_size + 1) % accumulation_steps == 0 or i + batch_size >= X_train_tensor.size(0):
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        epoch_loss += loss.item()
    
    scheduler.step()
    
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.6f}")

# ---- Evaluate Model ----
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_tensor).numpy()

test_mse = mean_squared_error(y_test, y_pred_test)
test_r2 = r2_score(y_test, y_pred_test)

print(f"\nNeural Network Test MSE: {test_mse}")
print(f"Neural Network Test R^2 Score: {test_r2}")



Estimated Heston Parameters using Maximum Likelihood Estimation (MLE):
Alpha: 0.0974910971073342, Beta: 0.19244197130265056, Kappa: 0.5, Theta: 5088931.599398365, Xi: 0.05
Epoch [100/2000], Loss: 4.262026
Epoch [200/2000], Loss: 4.253965
Epoch [300/2000], Loss: 4.132454
Epoch [400/2000], Loss: 4.134422
Epoch [500/2000], Loss: 4.007760
Epoch [600/2000], Loss: 4.035293
Epoch [700/2000], Loss: 3.911052
Epoch [800/2000], Loss: 3.938481
Epoch [900/2000], Loss: 3.816472
Epoch [1000/2000], Loss: 3.882278
Epoch [1100/2000], Loss: 3.731159
Epoch [1200/2000], Loss: 3.806331
Epoch [1300/2000], Loss: 3.667570
Epoch [1400/2000], Loss: 3.760417
Epoch [1500/2000], Loss: 3.611341
Epoch [1600/2000], Loss: 3.679448
Epoch [1700/2000], Loss: 3.556547
Epoch [1800/2000], Loss: 3.625867
Epoch [1900/2000], Loss: 3.496677
Epoch [2000/2000], Loss: 3.570196

Neural Network Test MSE: 1.855936577224358
Neural Network Test R^2 Score: 0.28731236761592704


In [43]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from statsmodels.tsa.stattools import adfuller

# Load dataset
data = pd.read_csv('../datasets/CropSDEData/METEO_DEKADS_NUTS2_NL.csv')

# Feature Selection
features = ['TAVG', 'VPRES', 'WSPD', 'RELH', 'RAD']
target = 'PREC'

# Drop missing values
data = data.dropna(subset=features + [target])

# Prepare data
X = data[features]
y = data[target]

# Ensure stationarity of target variable
if adfuller(y)[1] > 0.05:
    print("Target variable is non-stationary. Applying log transformation and differencing...")
    y = np.log1p(y).diff().dropna()
    X = X.iloc[1:]

# Align X and y
X, y = X.iloc[:len(y)], y.iloc[:len(X)]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ---- Optimized Heston Model MLE ----
def heston_log_likelihood(params, data):
    alpha, beta, kappa, theta, xi = params
    dt = 1
    log_likelihood = 0
    for t in range(1, len(data)):
        residual = data[t] - (data[t-1] + alpha * (beta - data[t-1]) * dt)
        log_likelihood += -0.5 * (residual ** 2) / (2 * max(theta, 1e-6))  # Avoid instability
    return -log_likelihood  # Negative for minimization

# Optimize Heston Parameters
initial_guess = [0.1, np.mean(y), 0.5, np.var(y), 0.5]
bounds = [(1e-5, 1.0), (None, None), (1e-5, 1.0), (1e-6, None), (1e-5, 1.0)]
res_mle = minimize(heston_log_likelihood, initial_guess, args=(y.values,), method='L-BFGS-B', bounds=bounds)

# Extract and scale parameters
alpha_mle, beta_mle, kappa_mle, theta_mle, xi_mle = res_mle.x
alpha_mle /= 10
beta_mle /= 10
theta_mle /= 10
xi_mle /= 10

print("\nEstimated Heston Parameters using Maximum Likelihood Estimation (MLE):")
print(f"Alpha: {alpha_mle}, Beta: {beta_mle}, Kappa: {kappa_mle}, Theta: {theta_mle}, Xi: {xi_mle}")

# ---- Optimized Neural Network ----
class HestonNN(nn.Module):
    def __init__(self, input_size):
        super(HestonNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)
        self.activation = nn.SiLU()
        self.groupnorm1 = nn.GroupNorm(8, 128)
        self.groupnorm2 = nn.GroupNorm(4, 64)

    def forward(self, x):
        x = self.activation(self.groupnorm1(self.fc1(x)))
        x = self.activation(self.groupnorm2(self.fc2(x)))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

# Initialize Neural Network
input_size = X_scaled.shape[1]
model = HestonNN(input_size)

# Optimizer & Scheduler
criterion = nn.SmoothL1Loss()  # Log-Cosh Approximation
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-6, max_lr=0.01, step_size_up=100, mode='triangular2')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y.values, test_size=0.2, random_state=42)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# ---- Efficient Training Loop ----
epochs = 2000
batch_size = 256
accumulation_steps = 4

for epoch in range(epochs):
    model.train()
    permutation = torch.randperm(X_train_tensor.size(0))
    epoch_loss = 0

    for i in range(0, X_train_tensor.size(0), batch_size):
        indices = permutation[i:i + batch_size]
        batch_X, batch_y = X_train_tensor[indices], y_train_tensor[indices]

        optimizer.zero_grad()
        outputs = model(batch_X)
        
        # Log-Cosh Approximation Loss
        loss = torch.mean(torch.log(torch.cosh(outputs - batch_y + 1e-6))) / accumulation_steps
        loss.backward()

        if (i // batch_size + 1) % accumulation_steps == 0 or i + batch_size >= X_train_tensor.size(0):
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        epoch_loss += loss.item()
    
    scheduler.step()
    
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.6f}")

# ---- Evaluate Model ----
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_tensor).numpy()

test_mse = mean_squared_error(y_test, y_pred_test)
test_r2 = r2_score(y_test, y_pred_test)

print(f"\nNeural Network Test MSE: {test_mse}")
print(f"Neural Network Test R^2 Score: {test_r2}")



Estimated Heston Parameters using Maximum Likelihood Estimation (MLE):
Alpha: 0.09430472934049734, Beta: 0.20423195060507035, Kappa: 0.5, Theta: 10215239.624091689, Xi: 0.05
Epoch [100/2000], Loss: 8.603642
Epoch [200/2000], Loss: 8.362780
Epoch [300/2000], Loss: 7.914832
Epoch [400/2000], Loss: 7.449614
Epoch [500/2000], Loss: 7.436549
Epoch [600/2000], Loss: 7.365892
Epoch [700/2000], Loss: 6.951763
Epoch [800/2000], Loss: 6.609473
Epoch [900/2000], Loss: 6.621068
Epoch [1000/2000], Loss: 6.589093
Epoch [1100/2000], Loss: 6.331828
Epoch [1200/2000], Loss: 6.146723
Epoch [1300/2000], Loss: 6.177314
Epoch [1400/2000], Loss: 6.157947
Epoch [1500/2000], Loss: 6.030146
Epoch [1600/2000], Loss: 5.924968
Epoch [1700/2000], Loss: 5.947744
Epoch [1800/2000], Loss: 5.951397
Epoch [1900/2000], Loss: 5.861676
Epoch [2000/2000], Loss: 5.807232

Neural Network Test MSE: 2.0104398213452157
Neural Network Test R^2 Score: 0.22798245699320996
