In [7]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_percentage_error
import xgboost as xgb
import lightgbm as lgb
import optuna
import torch
from tabpfn import TabPFNRegressor

print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

print("🚀 Starting Level 1 training...")
model_names = [
    'Ridge', 'Lasso', 'ElasticNet', 'BayesianRidge', 'Huber',
    'RandomForest', 'ExtraTrees', 'AdaBoost', 'GradientBoost', 'Bagging',
    'KNN', 'SVR', 'XGB', 'LGBM'
]
models_oof = {name: np.zeros(y.shape) for name in model_names}
models_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in model_names}

for t in range(y.shape[1]):
    print(f"🎯 Training for target BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        print(f"  Fold {fold+1}/5")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        # Ridge
        ridge = Ridge(alpha=1.0)
        ridge.fit(X_tr, y_tr)
        models_oof['Ridge'][val_idx, t] = ridge.predict(X_val)
        models_test['Ridge'][:, t] += ridge.predict(X_test) / kf.n_splits

        # Lasso
        lasso = Lasso(alpha=0.1)
        lasso.fit(X_tr, y_tr)
        models_oof['Lasso'][val_idx, t] = lasso.predict(X_val)
        models_test['Lasso'][:, t] += lasso.predict(X_test) / kf.n_splits

        # ElasticNet
        elastic = ElasticNet(alpha=0.1)
        elastic.fit(X_tr, y_tr)
        models_oof['ElasticNet'][val_idx, t] = elastic.predict(X_val)
        models_test['ElasticNet'][:, t] += elastic.predict(X_test) / kf.n_splits

        # BayesianRidge
        bayesian = BayesianRidge()
        bayesian.fit(X_tr, y_tr)
        models_oof['BayesianRidge'][val_idx, t] = bayesian.predict(X_val)
        models_test['BayesianRidge'][:, t] += bayesian.predict(X_test) / kf.n_splits

        # Huber
        huber = HuberRegressor()
        huber.fit(X_tr, y_tr)
        models_oof['Huber'][val_idx, t] = huber.predict(X_val)
        models_test['Huber'][:, t] += huber.predict(X_test) / kf.n_splits

        # RandomForest
        rf = RandomForestRegressor(n_estimators=50, random_state=42)
        rf.fit(X_tr, y_tr)
        models_oof['RandomForest'][val_idx, t] = rf.predict(X_val)
        models_test['RandomForest'][:, t] += rf.predict(X_test) / kf.n_splits

        # ExtraTrees
        et = ExtraTreesRegressor(n_estimators=50, random_state=42)
        et.fit(X_tr, y_tr)
        models_oof['ExtraTrees'][val_idx, t] = et.predict(X_val)
        models_test['ExtraTrees'][:, t] += et.predict(X_test) / kf.n_splits

        # AdaBoost
        ada = AdaBoostRegressor(random_state=42)
        ada.fit(X_tr, y_tr)
        models_oof['AdaBoost'][val_idx, t] = ada.predict(X_val)
        models_test['AdaBoost'][:, t] += ada.predict(X_test) / kf.n_splits

        # GradientBoosting
        gb = GradientBoostingRegressor(random_state=42)
        gb.fit(X_tr, y_tr)
        models_oof['GradientBoost'][val_idx, t] = gb.predict(X_val)
        models_test['GradientBoost'][:, t] += gb.predict(X_test) / kf.n_splits

        # Bagging
        bag = BaggingRegressor(random_state=42)
        bag.fit(X_tr, y_tr)
        models_oof['Bagging'][val_idx, t] = bag.predict(X_val)
        models_test['Bagging'][:, t] += bag.predict(X_test) / kf.n_splits

        # KNN
        knn = KNeighborsRegressor()
        knn.fit(X_tr, y_tr)
        models_oof['KNN'][val_idx, t] = knn.predict(X_val)
        models_test['KNN'][:, t] += knn.predict(X_test) / kf.n_splits

        # SVR
        svr = SVR()
        svr.fit(X_tr, y_tr)
        models_oof['SVR'][val_idx, t] = svr.predict(X_val)
        models_test['SVR'][:, t] += svr.predict(X_test) / kf.n_splits

        # XGBoost
        xgb_model = xgb.XGBRegressor(random_state=42)
        xgb_model.fit(X_tr, y_tr)
        models_oof['XGB'][val_idx, t] = xgb_model.predict(X_val)
        models_test['XGB'][:, t] += xgb_model.predict(X_test) / kf.n_splits

        # LightGBM
        lgb_model = lgb.LGBMRegressor(random_state=42, verbose=-1)
        lgb_model.fit(X_tr, y_tr)
        models_oof['LGBM'][val_idx, t] = lgb_model.predict(X_val)
        models_test['LGBM'][:, t] += lgb_model.predict(X_test) / kf.n_splits

print("\n📊 Level 1 MAPE Scores:")
for name in model_names:
    mape = mean_absolute_percentage_error(y, models_oof[name])
    print(f"  {name}: {mape:.6f}")

print("\n🔄 Stacking Level 1 outputs...")
stack_X = np.concatenate([models_oof[name] for name in model_names], axis=1)
stack_X_test = np.concatenate([models_test[name] for name in model_names], axis=1)

print("\n🚀 Starting Level 2 stacking (MLP + TabPFN)...")

# MLP Stacking
mlp_oof = np.zeros(y.shape)
mlp_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    print(f"🧠 MLP stacking for BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        print(f"    Fold {fold+1}/5")
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        mlp = MLPRegressor(hidden_layer_sizes=(512, 256, 128), activation='relu', max_iter=500, random_state=42)
        mlp.fit(X_tr, y_tr)
        mlp_oof[val_idx, t] = mlp.predict(X_val)
        mlp_test[:, t] += mlp.predict(stack_X_test) / kf.n_splits

# TabPFN Stacking
print("\n🔥 TabPFN stacking...")
tabpfn_oof = np.zeros(y.shape)
tabpfn_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    print(f"🎯 TabPFN stacking for BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        print(f"    Fold {fold+1}/5")
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        # Initialize TabPFN
        tabpfn = TabPFNRegressor(device='cuda' if torch.cuda.is_available() else 'cpu')
        tabpfn.fit(X_tr, y_tr.values)
        
        tabpfn_oof[val_idx, t] = tabpfn.predict(X_val)
        tabpfn_test[:, t] += tabpfn.predict(stack_X_test) / kf.n_splits

print("\n📊 Level 2 MAPE Scores:")
mlp_mape = mean_absolute_percentage_error(y, mlp_oof)
tabpfn_mape = mean_absolute_percentage_error(y, tabpfn_oof)
print(f"  MLP: {mlp_mape:.6f}")
print(f"  TabPFN: {tabpfn_mape:.6f}")

print("\n🔍 Starting Level 3 optimization...")

# Prepare Level 3 inputs
level3_oof = np.concatenate([
    models_oof['XGB'], 
    models_oof['LGBM'], 
    mlp_oof, 
    tabpfn_oof
], axis=1)

level3_test = np.concatenate([
    models_test['XGB'], 
    models_test['LGBM'], 
    mlp_test, 
    tabpfn_test
], axis=1)

def objective(trial):
    # Suggest weights for each component
    w_xgb = trial.suggest_float('w_xgb', 0.0, 1.0)
    w_lgbm = trial.suggest_float('w_lgbm', 0.0, 1.0) 
    w_mlp = trial.suggest_float('w_mlp', 0.0, 1.0)
    w_tabpfn = trial.suggest_float('w_tabpfn', 0.0, 1.0)
    
    # Normalize weights
    total_weight = w_xgb + w_lgbm + w_mlp + w_tabpfn
    if total_weight == 0:
        return float('inf')
    
    w_xgb /= total_weight
    w_lgbm /= total_weight
    w_mlp /= total_weight
    w_tabpfn /= total_weight
    
    # Create weighted ensemble
    ensemble_pred = (w_xgb * models_oof['XGB'] + 
                    w_lgbm * models_oof['LGBM'] + 
                    w_mlp * mlp_oof + 
                    w_tabpfn * tabpfn_oof)
    
    # Calculate MAPE
    mape = mean_absolute_percentage_error(y, ensemble_pred)
    return mape

# Optimize weights
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print(f"\n✅ Best MAPE: {study.best_value:.6f}")
print("🎯 Best weights:")
best_params = study.best_params
for param, value in best_params.items():
    print(f"  {param}: {value:.4f}")

# Normalize best weights
total_weight = sum(best_params.values())
normalized_weights = {k: v/total_weight for k, v in best_params.items()}

print("\n📊 Normalized weights:")
for param, value in normalized_weights.items():
    print(f"  {param}: {value:.4f}")

# Create final ensemble predictions
final_test = (normalized_weights['w_xgb'] * models_test['XGB'] + 
              normalized_weights['w_lgbm'] * models_test['LGBM'] + 
              normalized_weights['w_mlp'] * mlp_test + 
              normalized_weights['w_tabpfn'] * tabpfn_test)

# Final validation score
final_oof = (normalized_weights['w_xgb'] * models_oof['XGB'] + 
             normalized_weights['w_lgbm'] * models_oof['LGBM'] + 
             normalized_weights['w_mlp'] * mlp_oof + 
             normalized_weights['w_tabpfn'] * tabpfn_oof)

final_mape = mean_absolute_percentage_error(y, final_oof)
print(f"\n🎉 Final ensemble MAPE: {final_mape:.6f}")

print("\n📊 Individual target MAPE scores:")
for i in range(y.shape[1]):
    target_mape = mean_absolute_percentage_error(y.iloc[:, i], final_oof[:, i])
    print(f"  BlendProperty{i+1}: {target_mape:.6f}")

# Save submission
submission = pd.DataFrame(final_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
submission.insert(0, 'ID', test['ID'])
submission.to_csv("submission_optimized_ensemble.csv", index=False)
print("\n💾 Submission file saved as 'submission_optimized_ensemble.csv'")

print("\n🎯 Ensemble Summary:")
print(f"  Level 1: {len(model_names)} base models")
print(f"  Level 2: MLP + TabPFN stacking") 
print(f"  Level 3: Optimized weighted ensemble")
print(f"  Final MAPE: {final_mape:.6f}")

Loading data...
🚀 Starting Level 1 training...
🎯 Training for target BlendProperty1...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty2...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty3...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty4...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty5...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty6...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty7...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty8...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty9...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5
🎯 Training for target BlendProperty10...
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

📊

[I 2025-07-23 02:57:50,740] A new study created in memory with name: no-name-cd8f5522-ae98-452c-ab7c-476ea950830f
[I 2025-07-23 02:57:50,746] Trial 0 finished with value: 1.0672458799116427 and parameters: {'w_xgb': 0.7441962503772418, 'w_lgbm': 0.8345495198605164, 'w_mlp': 0.9820962408124294, 'w_tabpfn': 0.12927815301654044}. Best is trial 0 with value: 1.0672458799116427.
[I 2025-07-23 02:57:50,749] Trial 1 finished with value: 1.0445995333613882 and parameters: {'w_xgb': 0.5715363862615984, 'w_lgbm': 0.7349567706425825, 'w_mlp': 0.2268260506946207, 'w_tabpfn': 0.8373104781254307}. Best is trial 1 with value: 1.0445995333613882.
[I 2025-07-23 02:57:50,752] Trial 2 finished with value: 1.3186304317788138 and parameters: {'w_xgb': 0.7238414692076094, 'w_lgbm': 0.4992369648199598, 'w_mlp': 0.016416712708327896, 'w_tabpfn': 0.12446809388530355}. Best is trial 1 with value: 1.0445995333613882.
[I 2025-07-23 02:57:50,753] Trial 3 finished with value: 0.9879936296500016 and parameters: {'w_


📊 Level 2 MAPE Scores:
  MLP: 1.546973
  TabPFN: 0.988756

🔍 Starting Level 3 optimization...


[I 2025-07-23 02:57:50,932] Trial 18 finished with value: 1.0952725930314742 and parameters: {'w_xgb': 0.10044703804480465, 'w_lgbm': 0.4021102909243912, 'w_mlp': 0.6670012471693011, 'w_tabpfn': 0.6109834210947006}. Best is trial 16 with value: 0.9767388308016169.
[I 2025-07-23 02:57:50,949] Trial 19 finished with value: 0.9851685044084448 and parameters: {'w_xgb': 0.26414860717870886, 'w_lgbm': 0.8577773168620451, 'w_mlp': 0.49829576138450715, 'w_tabpfn': 0.7375815713095109}. Best is trial 16 with value: 0.9767388308016169.
[I 2025-07-23 02:57:50,965] Trial 20 finished with value: 1.2517672346506958 and parameters: {'w_xgb': 0.6172530297765135, 'w_lgbm': 0.17140174752770082, 'w_mlp': 0.19485213466957596, 'w_tabpfn': 0.20522457756619977}. Best is trial 16 with value: 0.9767388308016169.
[I 2025-07-23 02:57:50,981] Trial 21 finished with value: 0.979609190240781 and parameters: {'w_xgb': 0.14204998063580637, 'w_lgbm': 0.6188385649098682, 'w_mlp': 0.10837292529485903, 'w_tabpfn': 0.40059


✅ Best MAPE: 0.950313
🎯 Best weights:
  w_xgb: 0.1569
  w_lgbm: 0.9728
  w_mlp: 0.1384
  w_tabpfn: 0.9736

📊 Normalized weights:
  w_xgb: 0.0700
  w_lgbm: 0.4340
  w_mlp: 0.0617
  w_tabpfn: 0.4343

🎉 Final ensemble MAPE: 0.950313

📊 Individual target MAPE scores:
  BlendProperty1: 0.954313
  BlendProperty2: 0.899590
  BlendProperty3: 1.358098
  BlendProperty4: 0.957680
  BlendProperty5: 0.114740
  BlendProperty6: 0.823275
  BlendProperty7: 1.064241
  BlendProperty8: 0.960809
  BlendProperty9: 1.672063
  BlendProperty10: 0.698321

💾 Submission file saved as 'submission_optimized_ensemble.csv'

🎯 Ensemble Summary:
  Level 1: 14 base models
  Level 2: MLP + TabPFN stacking
  Level 3: Optimized weighted ensemble
  Final MAPE: 0.950313


In [None]:
from tabpfn import TabPFNRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_percentage_error
import pandas as pd
import numpy as np
import torch

# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Define inputs and targets
X = train.drop(columns=[f'BlendProperty{i}' for i in range(1, 11)])
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(columns=['ID'])

# 5-Fold setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Placeholders for out-of-fold predictions and test predictions
tabpfn_oof = np.zeros(y.shape)
tabpfn_test = np.zeros((X_test.shape[0], y.shape[1]))

print("🚀 Starting TabPFN multi-output regression...")

# Loop over each target
for t in range(y.shape[1]):
    print(f"\n🎯 Training for target: BlendProperty{t+1}")
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
        print(f"  Fold {fold+1}/5")
        X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[train_idx, t], y.iloc[val_idx, t]

        # Initialize and train TabPFN
        model = TabPFNRegressor(device='cuda' if torch.cuda.is_available() else 'cpu')
        model.fit(X_tr.values, y_tr.values)

        # Predictions
        tabpfn_oof[val_idx, t] = model.predict(X_val.values)
        tabpfn_test[:, t] += model.predict(X_test.values) / kf.n_splits

# ---------------------------
# 🧮 Evaluate MAPE
# ---------------------------
print("\n📊 MAPE Scores:")
overall_mape = mean_absolute_percentage_error(y, tabpfn_oof)
print(f"✅ Overall MAPE: {overall_mape:.6f}")

for i in range(y.shape[1]):
    target_mape = mean_absolute_percentage_error(y.iloc[:, i], tabpfn_oof[:, i])
    print(f"  MAPE for BlendProperty{i+1}: {target_mape:.6f}")

# ---------------------------
# 💾 Save Submission
# ---------------------------
submission = pd.DataFrame(tabpfn_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
submission.insert(0, 'ID', test['ID'])
submission.to_csv("submission_tabpfn.csv", index=False)
print("\n✅ Submission file saved as 'submission_tabpfn.csv'")


🚀 Starting TabPFN multi-output regression...

🎯 Training for target: BlendProperty1
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty2
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty3
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty4
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty5
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty6
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty7
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty8
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty9
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

🎯 Training for target: BlendProperty10
  Fold 1/5
  Fold 2/5
  Fold 3/5
  Fold 4/5
  Fold 5/5

📊 MAPE Scores