In [None]:
# ============================================================
# LEVEL-2 STACKING (Ridge + Lasso Meta-Model)
# ============================================================

from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
import numpy as np
import pandas as pd

# Level-1 OOF Features (aus LGBM, XGB, CatBoost)
X_stack = np.vstack([oof_lgbm, oof_xgb, oof_cat]).T
X_stack_test = np.vstack([test_lgbm, test_xgb, test_cat]).T

print("Shape Stacking train:", X_stack.shape)
print("Shape Stacking test:", X_stack_test.shape)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

# ------------------------------------------------------------
# 1) Ridge Regression (sehr stabil f√ºr Stacking)
# ------------------------------------------------------------
oof_ridge = np.zeros(len(y))
test_ridge_folds = []

for fold, (tr_idx, val_idx) in enumerate(kf.split(X_stack), 1):
    X_tr, X_val = X_stack[tr_idx], X_stack[val_idx]
    y_tr, y_val = y[tr_idx], y[val_idx]

    model_ridge = Ridge(alpha=0.1)
    model_ridge.fit(X_tr, y_tr)

    # Predictions
    oof_ridge[val_idx] = model_ridge.predict(X_val)
    test_ridge_folds.append(model_ridge.predict(X_stack_test))

    print(f"Ridge Stacking Fold {fold} MAE:",
          mean_absolute_error(y_val, oof_ridge[val_idx]))

ridge_mae = mean_absolute_error(y, oof_ridge)
print("\nRidge Stacking OOF MAE:", ridge_mae)

test_ridge = np.mean(test_ridge_folds, axis=0)

# Save stacking submission
sub_stack_ridge = sample.copy()
sub_stack_ridge["Tm"] = test_ridge
sub_stack_ridge.to_csv("Submissions/submission_stack_ridge.csv", index=False)

print("Saved submission_stack_ridge.csv")


# ------------------------------------------------------------
# 2) Lasso Regression (macht Feature-Selektion zwischen Modellen)
# ------------------------------------------------------------
oof_lasso = np.zeros(len(y))
test_lasso_folds = []

for fold, (tr_idx, val_idx) in enumerate(kf.split(X_stack), 1):
    X_tr, X_val = X_stack[tr_idx], X_stack[val_idx]
    y_tr, y_val = y[tr_idx], y[val_idx]

    model_lasso = Lasso(alpha=0.001)
    model_lasso.fit(X_tr, y_tr)

    oof_lasso[val_idx] = model_lasso.predict(X_val)
    test_lasso_folds.append(model_lasso.predict(X_stack_test))

    print(f"Lasso Stacking Fold {fold} MAE:",
          mean_absolute_error(y_val, oof_lasso[val_idx]))

lasso_mae = mean_absolute_error(y, oof_lasso)
print("\nLasso Stacking OOF MAE:", lasso_mae)

test_lasso = np.mean(test_lasso_folds, axis=0)

sub_stack_lasso = sample.copy()
sub_stack_lasso["Tm"] = test_lasso
sub_stack_lasso.to_csv("Submissions/submission_stack_lasso.csv", index=False)

print("Saved submission_stack_lasso.csv")


üîÑ Optuna: Trials=54 | Best MAE=30.5479
Final Params: {'lr': 0.09378091789016234, 'leaves': 68, 'ff': 0.8941347612779904, 'bf': 0.8403947088901286, 'bfreq': 1, 'minleaf': 11, 'objective': 'regression_l1', 'metric': 'l1'}
‚úîÔ∏è Saved submission_rdkit_lgbm.csv

üéØ Optuna finished!
Best Validation MAE during tuning: 30.5479
Best Parameters:
  lr: 0.09378091789016234
  leaves: 68
  ff: 0.8941347612779904
  bf: 0.8403947088901286
  bfreq: 1
  minleaf: 11
  objective: regression_l1
  metric: l1

üìä Final model MAE on validation split: 17.2874
üìò MAE on full training data: 16.9838

‚úîÔ∏è Saved submission_rdkit_lgbm.csv

‚ö†Ô∏è Hinweis:
Der MAE f√ºr die erzeugte submission.csv kann NICHT berechnet werden,
weil Kaggle die echten Testlabels nicht ver√∂ffentlicht.
Nur Kaggle selbst kann die Test-MAE nach Upload auswerten.
