We are gonna revese it

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_percentage_error
import xgboost as xgb
import lightgbm as lgb
import optuna
import torch
from tabpfn import TabPFNRegressor

print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

print("🚀 Starting Level 1 training (MLP + TabPFN)...")

# Level 1: MLP and TabPFN as base models
level1_names = ['MLP', 'TabPFN']
level1_oof = {name: np.zeros(y.shape) for name in level1_names}
level1_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in level1_names}

for t in range(y.shape[1]):
    print(f"🎯 Training Level 1 for target BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        print(f"  Fold {fold+1}/5")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        # MLP
        mlp = MLPRegressor(hidden_layer_sizes=(512, 256, 128), activation='relu', max_iter=500, random_state=42)
        mlp.fit(X_tr, y_tr)
        level1_oof['MLP'][val_idx, t] = mlp.predict(X_val)
        level1_test['MLP'][:, t] += mlp.predict(X_test) / kf.n_splits

        # TabPFN
        tabpfn = TabPFNRegressor(device='cuda' if torch.cuda.is_available() else 'cpu')
        tabpfn.fit(X_tr, y_tr.values)
        level1_oof['TabPFN'][val_idx, t] = tabpfn.predict(X_val)
        level1_test['TabPFN'][:, t] += tabpfn.predict(X_test) / kf.n_splits

print("\n📊 Level 1 MAPE Scores:")
for name in level1_names:
    mape = mean_absolute_percentage_error(y, level1_oof[name])
    print(f"  {name}: {mape:.6f}")

print("\n🔄 Preparing Level 2 inputs from Level 1 outputs...")
stack_X = np.concatenate([level1_oof[name] for name in level1_names], axis=1)
stack_X_test = np.concatenate([level1_test[name] for name in level1_names], axis=1)

print("\n🚀 Starting Level 2 stacking (Traditional ML Models)...")

# Level 2: Traditional ML models as stacking models
level2_names = [
    'Ridge', 'Lasso', 'ElasticNet', 'BayesianRidge', 'Huber',
    'RandomForest', 'ExtraTrees', 'AdaBoost', 'GradientBoost', 'Bagging',
    'KNN', 'SVR', 'XGB', 'LGBM'
]
level2_oof = {name: np.zeros(y.shape) for name in level2_names}
level2_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in level2_names}

for t in range(y.shape[1]):
    print(f"🎯 Level 2 stacking for BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        print(f"  Fold {fold+1}/5")
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        # Ridge
        ridge = Ridge(alpha=1.0)
        ridge.fit(X_tr, y_tr)
        level2_oof['Ridge'][val_idx, t] = ridge.predict(X_val)
        level2_test['Ridge'][:, t] += ridge.predict(stack_X_test) / kf.n_splits

        # Lasso
        lasso = Lasso(alpha=0.1)
        lasso.fit(X_tr, y_tr)
        level2_oof['Lasso'][val_idx, t] = lasso.predict(X_val)
        level2_test['Lasso'][:, t] += lasso.predict(stack_X_test) / kf.n_splits

        # ElasticNet
        elastic = ElasticNet(alpha=0.1)
        elastic.fit(X_tr, y_tr)
        level2_oof['ElasticNet'][val_idx, t] = elastic.predict(X_val)
        level2_test['ElasticNet'][:, t] += elastic.predict(stack_X_test) / kf.n_splits

        # BayesianRidge
        bayesian = BayesianRidge()
        bayesian.fit(X_tr, y_tr)
        level2_oof['BayesianRidge'][val_idx, t] = bayesian.predict(X_val)
        level2_test['BayesianRidge'][:, t] += bayesian.predict(stack_X_test) / kf.n_splits

        # Huber
        huber = HuberRegressor()
        huber.fit(X_tr, y_tr)
        level2_oof['Huber'][val_idx, t] = huber.predict(X_val)
        level2_test['Huber'][:, t] += huber.predict(stack_X_test) / kf.n_splits

        # RandomForest
        rf = RandomForestRegressor(n_estimators=50, random_state=42)
        rf.fit(X_tr, y_tr)
        level2_oof['RandomForest'][val_idx, t] = rf.predict(X_val)
        level2_test['RandomForest'][:, t] += rf.predict(stack_X_test) / kf.n_splits

        # ExtraTrees
        et = ExtraTreesRegressor(n_estimators=50, random_state=42)
        et.fit(X_tr, y_tr)
        level2_oof['ExtraTrees'][val_idx, t] = et.predict(X_val)
        level2_test['ExtraTrees'][:, t] += et.predict(stack_X_test) / kf.n_splits

        # AdaBoost
        ada = AdaBoostRegressor(random_state=42)
        ada.fit(X_tr, y_tr)
        level2_oof['AdaBoost'][val_idx, t] = ada.predict(X_val)
        level2_test['AdaBoost'][:, t] += ada.predict(stack_X_test) / kf.n_splits

        # GradientBoosting
        gb = GradientBoostingRegressor(random_state=42)
        gb.fit(X_tr, y_tr)
        level2_oof['GradientBoost'][val_idx, t] = gb.predict(X_val)
        level2_test['GradientBoost'][:, t] += gb.predict(stack_X_test) / kf.n_splits

        # Bagging
        bag = BaggingRegressor(random_state=42)
        bag.fit(X_tr, y_tr)
        level2_oof['Bagging'][val_idx, t] = bag.predict(X_val)
        level2_test['Bagging'][:, t] += bag.predict(stack_X_test) / kf.n_splits

        # KNN
        knn = KNeighborsRegressor()
        knn.fit(X_tr, y_tr)
        level2_oof['KNN'][val_idx, t] = knn.predict(X_val)
        level2_test['KNN'][:, t] += knn.predict(stack_X_test) / kf.n_splits

        # SVR
        svr = SVR()
        svr.fit(X_tr, y_tr)
        level2_oof['SVR'][val_idx, t] = svr.predict(X_val)
        level2_test['SVR'][:, t] += svr.predict(stack_X_test) / kf.n_splits

        # XGBoost
        xgb_model = xgb.XGBRegressor(random_state=42)
        xgb_model.fit(X_tr, y_tr)
        level2_oof['XGB'][val_idx, t] = xgb_model.predict(X_val)
        level2_test['XGB'][:, t] += xgb_model.predict(stack_X_test) / kf.n_splits

        # LightGBM
        lgb_model = lgb.LGBMRegressor(random_state=42, verbose=-1)
        lgb_model.fit(X_tr, y_tr)
        level2_oof['LGBM'][val_idx, t] = lgb_model.predict(X_val)
        level2_test['LGBM'][:, t] += lgb_model.predict(stack_X_test) / kf.n_splits

print("\n📊 Level 2 MAPE Scores:")
for name in level2_names:
    mape = mean_absolute_percentage_error(y, level2_oof[name])
    print(f"  {name}: {mape:.6f}")

print("\n🔍 Starting Level 3 optimization...")

# Prepare Level 3 inputs - select top performers from Level 2
level3_oof = np.concatenate([
    level2_oof['XGB'], 
    level2_oof['LGBM'], 
    level2_oof['RandomForest'],
    level2_oof['ExtraTrees']
], axis=1)

level3_test = np.concatenate([
    level2_test['XGB'], 
    level2_test['LGBM'], 
    level2_test['RandomForest'],
    level2_test['ExtraTrees']
], axis=1)

def objective(trial):
    # Suggest weights for each component
    w_xgb = trial.suggest_float('w_xgb', 0.0, 1.0)
    w_lgbm = trial.suggest_float('w_lgbm', 0.0, 1.0) 
    w_rf = trial.suggest_float('w_rf', 0.0, 1.0)
    w_et = trial.suggest_float('w_et', 0.0, 1.0)
    
    # Normalize weights
    total_weight = w_xgb + w_lgbm + w_rf + w_et
    if total_weight == 0:
        return float('inf')
    
    w_xgb /= total_weight
    w_lgbm /= total_weight
    w_rf /= total_weight
    w_et /= total_weight
    
    # Create weighted ensemble
    ensemble_pred = (w_xgb * level2_oof['XGB'] + 
                    w_lgbm * level2_oof['LGBM'] + 
                    w_rf * level2_oof['RandomForest'] + 
                    w_et * level2_oof['ExtraTrees'])
    
    # Calculate MAPE
    mape = mean_absolute_percentage_error(y, ensemble_pred)
    return mape

# Optimize weights
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print(f"\n✅ Best MAPE: {study.best_value:.6f}")
print("🎯 Best weights:")
best_params = study.best_params
for param, value in best_params.items():
    print(f"  {param}: {value:.4f}")

# Normalize best weights
total_weight = sum(best_params.values())
normalized_weights = {k: v/total_weight for k, v in best_params.items()}

print("\n📊 Normalized weights:")
for param, value in normalized_weights.items():
    print(f"  {param}: {value:.4f}")

# Create final ensemble predictions
final_test = (normalized_weights['w_xgb'] * level2_test['XGB'] + 
              normalized_weights['w_lgbm'] * level2_test['LGBM'] + 
              normalized_weights['w_rf'] * level2_test['RandomForest'] + 
              normalized_weights['w_et'] * level2_test['ExtraTrees'])

# Final validation score
final_oof = (normalized_weights['w_xgb'] * level2_oof['XGB'] + 
             normalized_weights['w_lgbm'] * level2_oof['LGBM'] + 
             normalized_weights['w_rf'] * level2_oof['RandomForest'] + 
             normalized_weights['w_et'] * level2_oof['ExtraTrees'])

final_mape = mean_absolute_percentage_error(y, final_oof)
print(f"\n🎉 Final ensemble MAPE: {final_mape:.6f}")

print("\n📊 Individual target MAPE scores:")
for i in range(y.shape[1]):
    target_mape = mean_absolute_percentage_error(y.iloc[:, i], final_oof[:, i])
    print(f"  BlendProperty{i+1}: {target_mape:.6f}")

# Save submission
submission = pd.DataFrame(final_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
submission.insert(0, 'ID', test['ID'])
submission.to_csv("submission_reversed_ensemble.csv", index=False)
print("\n💾 Submission file saved as 'submission_reversed_ensemble.csv'")

print("\n🎯 Reversed Ensemble Summary:")
print(f"  Level 1: MLP + TabPFN as base models")
print(f"  Level 2: {len(level2_names)} traditional ML models stacking on Level 1 outputs") 
print(f"  Level 3: Optimized weighted ensemble of top Level 2 performers")
print(f"  Final MAPE: {final_mape:.6f}")

Loading data...
🚀 Starting Level 1 training (MLP + TabPFN)...
🎯 Training Level 1 for target BlendProperty1...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty2...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty3...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty4...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty5...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty6...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty7...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty8...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training Level 1 for target BlendProperty9...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training 

[I 2025-07-23 03:14:47,462] A new study created in memory with name: no-name-02ab7686-470c-483e-a30b-9f53bf99ed74
[I 2025-07-23 03:14:47,462] Trial 0 finished with value: 0.5594341312675966 and parameters: {'w_xgb': 0.519481090906018, 'w_lgbm': 0.16222674045768037, 'w_rf': 0.08312604633267351, 'w_et': 0.6893924306329895}. Best is trial 0 with value: 0.5594341312675966.
[I 2025-07-23 03:14:47,462] Trial 1 finished with value: 0.5816394331188701 and parameters: {'w_xgb': 0.3001512942721799, 'w_lgbm': 0.7576615790201968, 'w_rf': 0.2519779222362064, 'w_et': 0.02490959201378462}. Best is trial 0 with value: 0.5594341312675966.
[I 2025-07-23 03:14:47,470] Trial 2 finished with value: 0.5802200191740488 and parameters: {'w_xgb': 0.948304718579511, 'w_lgbm': 0.15010133137238368, 'w_rf': 0.7645435789618803, 'w_et': 0.1764361878836218}. Best is trial 0 with value: 0.5594341312675966.
[I 2025-07-23 03:14:47,473] Trial 3 finished with value: 0.5457927272684723 and parameters: {'w_xgb': 0.323465145


📊 Level 2 MAPE Scores:
  Ridge: 0.523277
  Lasso: 0.497542
  ElasticNet: 0.533716
  BayesianRidge: 0.523995
  Huber: 0.513186
  RandomForest: 0.557062
  ExtraTrees: 0.527078
  AdaBoost: 0.697516
  GradientBoost: 0.587350
  Bagging: 0.498358
  KNN: 2.241079
  SVR: 0.726280
  XGB: 0.644326
  LGBM: 0.606273

🔍 Starting Level 3 optimization...


[I 2025-07-23 03:14:47,624] Trial 17 finished with value: 0.5413981875723721 and parameters: {'w_xgb': 0.358730479024323, 'w_lgbm': 0.0247183034335219, 'w_rf': 0.6748545017825933, 'w_et': 0.8372817873352338}. Best is trial 16 with value: 0.5335159392656905.
[I 2025-07-23 03:14:47,638] Trial 18 finished with value: 0.541998599017287 and parameters: {'w_xgb': 0.38218213498320835, 'w_lgbm': 0.005438582609265125, 'w_rf': 0.706483837855704, 'w_et': 0.8451814651829908}. Best is trial 16 with value: 0.5335159392656905.
[I 2025-07-23 03:14:47,651] Trial 19 finished with value: 0.5574144020326823 and parameters: {'w_xgb': 0.6433510949229798, 'w_lgbm': 0.030929483658237545, 'w_rf': 0.6422058073620432, 'w_et': 0.622162605965395}. Best is trial 16 with value: 0.5335159392656905.
[I 2025-07-23 03:14:47,668] Trial 20 finished with value: 0.5527633784822318 and parameters: {'w_xgb': 0.6206041135834943, 'w_lgbm': 0.20846659445899965, 'w_rf': 0.8099144146215248, 'w_et': 0.7840756723700335}. Best is tri


✅ Best MAPE: 0.524831
🎯 Best weights:
  w_xgb: 0.0013
  w_lgbm: 0.0259
  w_rf: 0.2292
  w_et: 0.6203

📊 Normalized weights:
  w_xgb: 0.0015
  w_lgbm: 0.0296
  w_rf: 0.2614
  w_et: 0.7075

🎉 Final ensemble MAPE: 0.524831

📊 Individual target MAPE scores:
  BlendProperty1: 1.275035
  BlendProperty2: 0.318356
  BlendProperty3: 0.585924
  BlendProperty4: 0.412230
  BlendProperty5: 0.054391
  BlendProperty6: 0.347585
  BlendProperty7: 0.531392
  BlendProperty8: 0.487688
  BlendProperty9: 0.774070
  BlendProperty10: 0.461636

💾 Submission file saved as 'submission_reversed_ensemble.csv'

🎯 Reversed Ensemble Summary:
  Level 1: MLP + TabPFN as base models
  Level 2: 14 traditional ML models stacking on Level 1 outputs
  Level 3: Optimized weighted ensemble of top Level 2 performers
  Final MAPE: 0.524831
