In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

class MLP:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.num_layers = len(hidden_sizes) + 1

        # Initialize weights and biases
        self.weights = []
        self.biases = []
        sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(1, self.num_layers + 1):
            self.weights.append(np.random.randn(sizes[i], sizes[i - 1]) * np.sqrt(2 / sizes[i - 1]))
            self.biases.append(np.zeros((sizes[i], 1)))

    def forward(self, X):
        self.activations = [X]
        self.z = []
        for i in range(self.num_layers):
            z = np.dot(self.weights[i], self.activations[i]) + self.biases[i]
            self.z.append(z)
            if i < self.num_layers - 1:
                a = np.tanh(z)  # Tanh for hidden layers
            else:
                a = z  # Linear activation for output
            self.activations.append(a)
        return self.activations[-1]

    def backward(self, X, y):
        m = X.shape[1]
        gradients = []
        dZ = self.activations[-1] - y
        for i in range(self.num_layers - 1, -1, -1):
            dW = (1 / m) * np.dot(dZ, self.activations[i].T)
            db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
            gradients.append((dW, db))
            if i > 0:
                dA = np.dot(self.weights[i].T, dZ)
                dZ = dA * (1 - np.tanh(self.z[i - 1]) ** 2)  # Derivative of tanh
        return gradients[::-1]

    def update_parameters(self, gradients, learning_rate):
        for i in range(self.num_layers):
            self.weights[i] -= learning_rate * gradients[i][0]
            self.biases[i] -= learning_rate * gradients[i][1]

# Load dataset
df = pd.read_csv("../datasets/CropSDEData/METEO_DEKADS_NUTS2_NL.csv")
df = df.dropna(subset=['PREC', 'VPRES', 'WSPD', 'RELH'])

# Use estimated parameters from Merton Model
mu_hat = -0.006374441588979891
sigma_hat = 0.7458928682802284
lambda_hat = 0.10250856908232264
delta_hat = 0.06891821761948404

# Compute log-returns of PREC using log1p (better stability)
df['log_PREC'] = np.log1p(df['PREC'])
df['returns'] = df['log_PREC'].diff().dropna()
df = df.dropna(subset=['returns'])

# Simulate variance using Merton Jump-Diffusion Model
df['merton_variance'] = np.nan
df['merton_variance'].iloc[0] = max(df['returns'].var(), 1e-6)

np.random.seed(42)
for t in range(1, len(df)):
    vt = max(df['merton_variance'].iloc[t - 1], 1e-6)  # Ensure non-negative variance
    epsilon = np.random.normal(0, 1)
    
    # Simulate Jump occurrence
    jump_occurred = np.random.poisson(lambda_hat) > 0
    jump_size = np.random.normal(0, delta_hat) if jump_occurred else 0
    
    vt_new = vt + mu_hat + sigma_hat * np.sqrt(vt) * epsilon + jump_size
    df.loc[df.index[t], 'merton_variance'] = max(vt_new, 1e-6)

# Feature Engineering: Add rolling mean & std deviation
df['rolling_mean_PREC'] = df['PREC'].rolling(window=30).mean()
df['rolling_std_PREC'] = df['PREC'].rolling(window=30).std()
df = df.dropna(subset=['rolling_mean_PREC', 'rolling_std_PREC'])

# Define Input (X) and Target (y)
X = df[['VPRES', 'WSPD', 'RELH', 'merton_variance', 'rolling_mean_PREC', 'rolling_std_PREC']].values
y = np.log1p(df['PREC'].values).reshape(-1, 1)  # Log-transform target for stability

# Split Data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
X_train_mean = X_train.mean(axis=0)
X_train_std = X_train.std(axis=0)
X_train = (X_train - X_train_mean) / X_train_std
X_val = (X_val - X_train_mean) / X_train_std

# Define MLP model
input_size = X_train.shape[1]
hidden_sizes = [128, 64, 32]  # Increased hidden layers
output_size = y_train.shape[1]
mlp = MLP(input_size, hidden_sizes, output_size)

# Training parameters
num_epochs = 5000
learning_rate = 0.01
min_learning_rate = 0.0001
learning_rate_decay = 0.99

best_loss = float("inf")
early_stopping_threshold = 100
patience = 0

# Training loop with adaptive learning rate
for epoch in range(num_epochs):
    outputs = mlp.forward(X_train.T)
    gradients = mlp.backward(X_train.T, y_train.T)
    mlp.update_parameters(gradients, learning_rate)
    
    loss = np.mean((outputs - y_train.T) ** 2)
    
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1} - Loss: {loss}")

    # Early stopping
    if loss < best_loss:
        best_loss = loss
        patience = 0
    else:
        patience += 1

    if patience > early_stopping_threshold:
        print(f"Early stopping at epoch {epoch+1}")
        break

    # Decay learning rate
    learning_rate = max(min_learning_rate, learning_rate * learning_rate_decay)

# Testing
test_outputs = mlp.forward(X_val.T)
test_loss = np.mean((test_outputs - y_val.T) ** 2)

# Convert back to original scale
test_outputs_original = np.expm1(test_outputs.T)  # Reverse log-transform
y_val_original = np.expm1(y_val)

final_mse = np.mean((test_outputs_original - y_val_original) ** 2)
print("Final Test MSE (Original Scale):", final_mse)


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['merton_variance'].iloc[0] = max(df['returns'].var(), 1e-6)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  

Epoch 100 - Loss: 0.2831600841078976
Epoch 200 - Loss: 0.26660845156980173
Epoch 300 - Loss: 0.26354253829425256
Epoch 400 - Loss: 0.262591475990305
Epoch 500 - Loss: 0.2622406512730146
Epoch 600 - Loss: 0.26196047749549994
Epoch 700 - Loss: 0.26168635150931724
Epoch 800 - Loss: 0.2614179543151859
Epoch 900 - Loss: 0.2611549885610726
Epoch 1000 - Loss: 0.2608971770149848
Epoch 1100 - Loss: 0.2606442611445147
Epoch 1200 - Loss: 0.2603959997958952
Epoch 1300 - Loss: 0.26015216796579915
Epoch 1400 - Loss: 0.2599125556595421
Epoch 1500 - Loss: 0.2596769668297695
Epoch 1600 - Loss: 0.2594452183900879
Epoch 1700 - Loss: 0.25921713929847084
Epoch 1800 - Loss: 0.2589925697056053
Epoch 1900 - Loss: 0.25877136016367197
Epoch 2000 - Loss: 0.25855337089134856
Epoch 2100 - Loss: 0.25833847109111063
Epoch 2200 - Loss: 0.2581265383151681
Epoch 2300 - Loss: 0.257917457876624
Epoch 2400 - Loss: 0.25771112230267346
Epoch 2500 - Loss: 0.25750743082687816
Epoch 2600 - Loss: 0.2573062889177561
Epoch 2700 -