In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
import lightgbm as lgb
import optuna

print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

print("Starting Level 1 training...")
model_names = [
    'Ridge', 'Lasso', 'ElasticNet', 'BayesianRidge', 'Huber',
    'RandomForest', 'ExtraTrees', 'AdaBoost', 'GradientBoost', 'Bagging',
    'KNN', 'SVR', 'XGB', 'LGBM'
]
models_oof = {name: np.zeros(y.shape) for name in model_names}
models_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in model_names}

for t in range(y.shape[1]):
    print(f"Training for target BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        print(f" Fold {fold+1}/5")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        Ridge(alpha=1.0).fit(X_tr, y_tr)
        models_oof['Ridge'][val_idx, t] = Ridge().fit(X_tr, y_tr).predict(X_val)
        models_test['Ridge'][:, t] += Ridge().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        Lasso(alpha=0.1).fit(X_tr, y_tr)
        models_oof['Lasso'][val_idx, t] = Lasso().fit(X_tr, y_tr).predict(X_val)
        models_test['Lasso'][:, t] += Lasso().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        ElasticNet(alpha=0.1).fit(X_tr, y_tr)
        models_oof['ElasticNet'][val_idx, t] = ElasticNet().fit(X_tr, y_tr).predict(X_val)
        models_test['ElasticNet'][:, t] += ElasticNet().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        BayesianRidge().fit(X_tr, y_tr)
        models_oof['BayesianRidge'][val_idx, t] = BayesianRidge().fit(X_tr, y_tr).predict(X_val)
        models_test['BayesianRidge'][:, t] += BayesianRidge().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        HuberRegressor().fit(X_tr, y_tr)
        models_oof['Huber'][val_idx, t] = HuberRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['Huber'][:, t] += HuberRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        RandomForestRegressor(n_estimators=50).fit(X_tr, y_tr)
        models_oof['RandomForest'][val_idx, t] = RandomForestRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['RandomForest'][:, t] += RandomForestRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        ExtraTreesRegressor(n_estimators=50).fit(X_tr, y_tr)
        models_oof['ExtraTrees'][val_idx, t] = ExtraTreesRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['ExtraTrees'][:, t] += ExtraTreesRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        AdaBoostRegressor().fit(X_tr, y_tr)
        models_oof['AdaBoost'][val_idx, t] = AdaBoostRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['AdaBoost'][:, t] += AdaBoostRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        GradientBoostingRegressor().fit(X_tr, y_tr)
        models_oof['GradientBoost'][val_idx, t] = GradientBoostingRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['GradientBoost'][:, t] += GradientBoostingRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        BaggingRegressor().fit(X_tr, y_tr)
        models_oof['Bagging'][val_idx, t] = BaggingRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['Bagging'][:, t] += BaggingRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        KNeighborsRegressor().fit(X_tr, y_tr)
        models_oof['KNN'][val_idx, t] = KNeighborsRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['KNN'][:, t] += KNeighborsRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        SVR().fit(X_tr, y_tr)
        models_oof['SVR'][val_idx, t] = SVR().fit(X_tr, y_tr).predict(X_val)
        models_test['SVR'][:, t] += SVR().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        xgb.XGBRegressor().fit(X_tr, y_tr)
        models_oof['XGB'][val_idx, t] = xgb.XGBRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['XGB'][:, t] += xgb.XGBRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

        lgb.LGBMRegressor().fit(X_tr, y_tr)
        models_oof['LGBM'][val_idx, t] = lgb.LGBMRegressor().fit(X_tr, y_tr).predict(X_val)
        models_test['LGBM'][:, t] += lgb.LGBMRegressor().fit(X_tr, y_tr).predict(X_test) / kf.n_splits

print("Stacking Level 1 outputs...")
stack_X = np.concatenate([models_oof[name] for name in model_names], axis=1)
stack_X_test = np.concatenate([models_test[name] for name in model_names], axis=1)

print("Starting Level 2 stacking...")
mlp_oof = np.zeros(y.shape)
mlp_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    print(f" MLP stacking for BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        mlp = MLPRegressor(hidden_layer_sizes=(512, 256, 128), activation='relu', max_iter=500)
        mlp.fit(X_tr, y_tr)
        mlp_oof[val_idx, t] = mlp.predict(X_val)
        mlp_test[:, t] += mlp.predict(stack_X_test) / kf.n_splits

print("Starting Level 3 final combiner...")
final = 0.5 * mlp_oof + 0.5 * models_oof['XGB']
final_test = 0.5 * mlp_test + 0.5 * models_test['XGB']



def objective(trial):
    w1 = trial.suggest_float("w1", 0.0, 1.0)
    w2 = 1.0 - w1
    blended = w1 * mlp_oof + w2 * models_oof['XGB']
    return np.mean(np.abs((y - blended) / y))

print("Optimizing final blend weights with Optuna...")
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

w1 = study.best_params['w1']
w2 = 1.0 - w1
print(f"Best weights: w1={w1:.4f}, w2={w2:.4f}")

final = w1 * mlp_oof + w2 * models_oof['XGB']
final_test = w1 * mlp_test + w2 * models_test['XGB']

mape = np.mean(np.abs((y - final) / y)) * 100
print(f'Optimized Stacked CV MAPE: {mape:.4f}%')

sub = pd.DataFrame(final_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
sub.insert(0, 'ID', test['ID'])
sub.to_csv('submission_final_level3.csv', index=False)
print('Optimized stacked submission saved.')



Loading data...
Starting Level 1 training...
Training for target BlendProperty1...
 Fold 1/5
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000841 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12981
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 55
[LightGBM] [Info] Start training from score -0.007867
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000635 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12981
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 55
[LightGBM] [Info] Start training from score -0.007867
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12981
[LightGBM] [Info] Number

[I 2025-07-23 01:56:07,262] A new study created in memory with name: no-name-3281147e-f154-4807-96a9-faf4c0f0c1a8
[I 2025-07-23 01:56:07,268] Trial 0 finished with value: 1.2073292144339336 and parameters: {'w1': 0.799227120213006}. Best is trial 0 with value: 1.2073292144339336.
[I 2025-07-23 01:56:07,274] Trial 1 finished with value: 1.3644432469967303 and parameters: {'w1': 0.35235159654962933}. Best is trial 0 with value: 1.2073292144339336.
[I 2025-07-23 01:56:07,277] Trial 2 finished with value: 1.559912919849576 and parameters: {'w1': 0.19368726063557373}. Best is trial 0 with value: 1.2073292144339336.
[I 2025-07-23 01:56:07,280] Trial 3 finished with value: 1.3358389849568222 and parameters: {'w1': 0.9048836206246473}. Best is trial 0 with value: 1.2073292144339336.
[I 2025-07-23 01:56:07,283] Trial 4 finished with value: 1.2091648592041933 and parameters: {'w1': 0.5557010357789028}. Best is trial 0 with value: 1.2073292144339336.
[I 2025-07-23 01:56:07,286] Trial 5 finished w

Starting Level 3 final combiner...
Optimizing final blend weights with Optuna...


[I 2025-07-23 01:56:07,441] Trial 28 finished with value: 1.2630555890654958 and parameters: {'w1': 0.8473382647573313}. Best is trial 21 with value: 1.1220946677321255.
[I 2025-07-23 01:56:07,445] Trial 29 finished with value: 1.457636596612297 and parameters: {'w1': 0.9956938145926535}. Best is trial 21 with value: 1.1220946677321255.
[I 2025-07-23 01:56:07,445] Trial 30 finished with value: 1.1215361106265742 and parameters: {'w1': 0.708787892373475}. Best is trial 30 with value: 1.1215361106265742.
[I 2025-07-23 01:56:07,459] Trial 31 finished with value: 1.1207905413050983 and parameters: {'w1': 0.7208239277948116}. Best is trial 31 with value: 1.1207905413050983.
[I 2025-07-23 01:56:07,463] Trial 32 finished with value: 1.233204035203317 and parameters: {'w1': 0.8219145273709683}. Best is trial 31 with value: 1.1207905413050983.
[I 2025-07-23 01:56:07,475] Trial 33 finished with value: 1.1716241890088 and parameters: {'w1': 0.7673159637174208}. Best is trial 31 with value: 1.1207

Best weights: w1=0.7208, w2=0.2792
Optimized Stacked CV MAPE: 112.0791%
Optimized stacked submission saved.
