### Cross Validation setup

In [15]:
from sklearn.model_selection import KFold
from src.edf import edf_normalize
from src.evaluation import evaluate_fold

def cross_validate(X, y, N=4, lambda_val=1e-3, n_splits=10, seed=44):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
    results = {"softplus": [], "clip": []}

    for fold, (train_ids, test_ids) in enumerate(kf.split(X), 1):
        print(f"Fold {fold}")
        X_train, X_test = X.iloc[train_ids], X.iloc[test_ids]
        y_train, y_test = y.iloc[train_ids], y.iloc[test_ids]
        
        X_train_norm, X_test_norm, _ = edf_normalize(X_train, X_test)
        y_train_norm, y_test_norm, _ = edf_normalize(y_train, y_test)

        for method in ["softplus", "clip"]:
            ll = evaluate_fold(
                X_train_norm, X_test_norm,
                y_train_norm, y_test_norm,
                N=N,
                lambda_val=lambda_val,
                calibration_method=method
            )
            results[method].append(ll)
            print(f"  {method}: {ll:.4f}")

    return results

In [16]:
import numpy as np

def print_results(results):
    for method, values in results.items():
        values = np.array(values)
        print(f"\nCalibration: {method}")
        print(f"  per fold: {np.round(values, 2)}")
        print(f"  mean LL : {values.mean():.4f}")
        print(f"  std LL  : {values.std():.4f}")

### 10-fold CV: HL (heating load) as target

In [17]:
from ucimlrepo import fetch_ucirepo

energy_efficiency = fetch_ucirepo(id=242) 
  
X = energy_efficiency.data.features 
y = energy_efficiency.data.targets

var_df = energy_efficiency.variables
col_map = dict(zip(var_df["name"], var_df["description"]))
X = X.rename(columns=col_map)
y = y.rename(columns=col_map)

y = y[["Heating Load"]]
df = X.join(y)
df.head()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Heating Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84


In [18]:
HL_results = cross_validate(X, y)

Fold 1


  softplus: 0.7128
  clip: 0.9366
Fold 2
  softplus: 0.6903
  clip: 0.8870
Fold 3
  softplus: 0.7379
  clip: 0.9782
Fold 4
  softplus: 0.7147
  clip: 0.9222
Fold 5
  softplus: 0.7641
  clip: 0.8306
Fold 6
  softplus: 0.7560
  clip: 0.9906
Fold 7
  softplus: 0.6611
  clip: 0.8761
Fold 8
  softplus: 0.7017
  clip: 0.9250
Fold 9
  softplus: 0.7136
  clip: 0.9287
Fold 10
  softplus: 0.6483
  clip: 0.7100


In [19]:
print_results(HL_results)


Calibration: softplus
  per fold: [0.71 0.69 0.74 0.71 0.76 0.76 0.66 0.7  0.71 0.65]
  mean LL : 0.7100
  std LL  : 0.0354

Calibration: clip
  per fold: [0.94 0.89 0.98 0.92 0.83 0.99 0.88 0.93 0.93 0.71]
  mean LL : 0.8985
  std LL  : 0.0770


### 10-fold CV: CL (cooling load) as target

In [20]:
from ucimlrepo import fetch_ucirepo

energy_efficiency = fetch_ucirepo(id=242) 
  
X = energy_efficiency.data.features 
y = energy_efficiency.data.targets

var_df = energy_efficiency.variables
col_map = dict(zip(var_df["name"], var_df["description"]))
X = X.rename(columns=col_map)
y = y.rename(columns=col_map)

y = y[["Cooling Load"]]
df = X.join(y)
df.head()

Unnamed: 0,Relative Compactness,Surface Area,Wall Area,Roof Area,Overall Height,Orientation,Glazing Area,Glazing Area Distribution,Cooling Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


In [21]:
CL_results = cross_validate(X, y)

Fold 1
  softplus: 0.7337
  clip: 0.9667
Fold 2
  softplus: 0.7344
  clip: 0.9631
Fold 3
  softplus: 0.6511
  clip: 0.8762
Fold 4
  softplus: 0.7426
  clip: 0.9779
Fold 5
  softplus: 0.7457
  clip: 0.8055
Fold 6
  softplus: 0.6945
  clip: 0.9239
Fold 7
  softplus: 0.6592
  clip: 0.8790
Fold 8
  softplus: 0.6852
  clip: 0.9097
Fold 9
  softplus: 0.7111
  clip: 0.9293
Fold 10
  softplus: 0.6356
  clip: 0.8420


In [22]:
print_results(CL_results)


Calibration: softplus
  per fold: [0.73 0.73 0.65 0.74 0.75 0.69 0.66 0.69 0.71 0.64]
  mean LL : 0.6993
  std LL  : 0.0385

Calibration: clip
  per fold: [0.97 0.96 0.88 0.98 0.81 0.92 0.88 0.91 0.93 0.84]
  mean LL : 0.9074
  std LL  : 0.0537
