In [4]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import optuna

from sklearn.model_selection import KFold
from sklearn.linear_model import *
from sklearn.ensemble import *
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.isotonic import IsotonicRegression
from sklearn.cross_decomposition import PLSRegression
import xgboost as xgb
import lightgbm as lgb
try:
    import catboost as cb
    CATBOOST_AVAILABLE = True
except ImportError:
    CATBOOST_AVAILABLE = False
    print("CatBoost not available, skipping...")

from tabpfn import TabPFNClassifier

print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Expanded model list with more diverse algorithms
model_names = [
    # Linear Models
    'Ridge', 'Lasso', 'ElasticNet', 'BayesianRidge', 'Huber',
    'ARD', 'SGD', 'PassiveAggressive', 'RANSAC', 'TheilSen',
    'Lars', 'LarsCV', 'LassoLars', 'OrthogonalMP',
    'Ridge_alpha01', 'Ridge_alpha10', 'Lasso_alpha01', 'ElasticNet_l1_07',
    
    # Tree-based Models
    'RandomForest', 'ExtraTrees', 'DecisionTree', 'ExtraTree',
    'AdaBoost', 'GradientBoost', 'Bagging', 'HistGB',
    'XGB', 'LGBM', 'RandomForest_200', 'ExtraTrees_200',
    
    # Instance-based
    'KNN', 'KNN_uniform', 'KNN_distance', 'KNN_3', 'KNN_15',
    
    # Kernel Methods
    'SVR', 'SVR_rbf', 'SVR_poly', 'KernelRidge', 'SVR_linear',
    
    # Neural Networks
    'MLP_small', 'MLP_medium',
    
    # Other Methods
    'PLS', 'GaussianProcess'
]

# Add CatBoost if available
if CATBOOST_AVAILABLE:
    model_names.append('CatBoost')

models_oof = {name: np.zeros(y.shape) for name in model_names}
models_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in model_names}

print(f"Training {len(model_names)} Level 1 models...")

for t in range(y.shape[1]):
    print(f"Training models for target {t+1}/10...")
    
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        models = {
            # Linear Models
            'Ridge': Ridge(alpha=1.0),
            'Lasso': Lasso(alpha=0.1),
            'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5),
            'BayesianRidge': BayesianRidge(),
            'Huber': HuberRegressor(epsilon=1.35),
            'ARD': ARDRegression(),
            'SGD': SGDRegressor(random_state=42),
            'PassiveAggressive': PassiveAggressiveRegressor(random_state=42),
            'RANSAC': RANSACRegressor(random_state=42),
            'TheilSen': TheilSenRegressor(random_state=42),
            'Lars': Lars(),
            'LarsCV': LarsCV(),
            'LassoLars': LassoLars(),
            'OrthogonalMP': OrthogonalMatchingPursuit(),
            # Additional linear model variants
            'Ridge_alpha01': Ridge(alpha=0.1),
            'Ridge_alpha10': Ridge(alpha=10.0),
            'Lasso_alpha01': Lasso(alpha=0.01),
            'ElasticNet_l1_07': ElasticNet(alpha=0.1, l1_ratio=0.7),
            
            # Tree-based Models
            'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
            'ExtraTrees': ExtraTreesRegressor(n_estimators=100, random_state=42),
            'DecisionTree': DecisionTreeRegressor(random_state=42, max_depth=10),
            'ExtraTree': ExtraTreeRegressor(random_state=42, max_depth=10),
            'AdaBoost': AdaBoostRegressor(random_state=42),
            'GradientBoost': GradientBoostingRegressor(random_state=42),
            'Bagging': BaggingRegressor(random_state=42),
            'HistGB': HistGradientBoostingRegressor(random_state=42),
            'XGB': xgb.XGBRegressor(random_state=42, verbosity=0),
            'LGBM': lgb.LGBMRegressor(random_state=42, verbosity=-1),
            # Additional tree variants
            'RandomForest_200': RandomForestRegressor(n_estimators=200, random_state=42),
            'ExtraTrees_200': ExtraTreesRegressor(n_estimators=200, random_state=42),
            
            # Instance-based
            'KNN': KNeighborsRegressor(n_neighbors=5),
            'KNN_uniform': KNeighborsRegressor(n_neighbors=7, weights='uniform'),
            'KNN_distance': KNeighborsRegressor(n_neighbors=10, weights='distance'),
            'KNN_3': KNeighborsRegressor(n_neighbors=3),
            'KNN_15': KNeighborsRegressor(n_neighbors=15),
            
            # Kernel Methods
            'SVR': SVR(kernel='rbf', C=1.0),
            'SVR_rbf': SVR(kernel='rbf', C=10.0, gamma='scale'),
            'SVR_poly': SVR(kernel='poly', degree=2, C=1.0),
            'KernelRidge': KernelRidge(alpha=1.0, kernel='rbf'),
            'SVR_linear': SVR(kernel='linear', C=1.0),
            
            # Neural Networks
            'MLP_small': MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=300, random_state=42),
            'MLP_medium': MLPRegressor(hidden_layer_sizes=(128, 64, 32), max_iter=300, random_state=42),
            
            # Other Methods
            'PLS': PLSRegression(n_components=min(10, X_tr.shape[1])),
            'GaussianProcess': GaussianProcessRegressor(random_state=42)
        }
        
        # Add CatBoost if available
        if CATBOOST_AVAILABLE:
            models['CatBoost'] = cb.CatBoostRegressor(
                iterations=100, 
                random_state=42, 
                verbose=False,
                allow_writing_files=False
            )

        for name, model in models.items():
            try:
                model.fit(X_tr, y_tr)
                models_oof[name][val_idx, t] = model.predict(X_val)
                models_test[name][:, t] += model.predict(X_test) / kf.n_splits
            except Exception as e:
                print(f"Error with {name}: {e}")
                # Fill with mean as fallback
                models_oof[name][val_idx, t] = np.mean(y_tr)
                models_test[name][:, t] += np.mean(y_tr) / kf.n_splits

print("Level 1 complete. Now training MLP for Level 2 stacking...")

stack_X = np.concatenate([models_oof[name] for name in model_names], axis=1)
stack_X_test = np.concatenate([models_test[name] for name in model_names], axis=1)

mlp_oof = np.zeros(y.shape)
mlp_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        mlp = MLPRegressor(hidden_layer_sizes=(512, 256, 128), max_iter=500, random_state=42)
        mlp.fit(X_tr, y_tr)
        mlp_oof[val_idx, t] = mlp.predict(X_val)
        mlp_test[:, t] += mlp.predict(stack_X_test) / kf.n_splits

print("Training TabPFN...")

tabpfn_oof = np.zeros(y.shape)
tabpfn_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        try:
            clf = TabPFNClassifier()
            clf.fit(X_tr.values, y_tr.values)
            tabpfn_oof[val_idx, t] = clf.predict(X_val.values)
            tabpfn_test[:, t] += clf.predict(X_test.values) / kf.n_splits
        except:
            # Fallback if TabPFN fails
            print(f"TabPFN failed for target {t+1}, using mean")
            tabpfn_oof[val_idx, t] = np.mean(y_tr)
            tabpfn_test[:, t] += np.mean(y_tr) / kf.n_splits

print("Optimizing blend weights with Optuna...")

def objective(trial):
    # Include more models in the final blend
    w_mlp = trial.suggest_float("w_mlp", 0.0, 1.0)
    w_xgb = trial.suggest_float("w_xgb", 0.0, 1.0)
    w_lgbm = trial.suggest_float("w_lgbm", 0.0, 1.0)
    w_tab = trial.suggest_float("w_tab", 0.0, 1.0)
    w_rf = trial.suggest_float("w_rf", 0.0, 1.0)
    w_et = trial.suggest_float("w_et", 0.0, 1.0)
    w_histgb = trial.suggest_float("w_histgb", 0.0, 1.0)
    w_ridge = trial.suggest_float("w_ridge", 0.0, 1.0)
    
    weights = [w_mlp, w_xgb, w_lgbm, w_tab, w_rf, w_et, w_histgb, w_ridge]
    total = sum(weights)
    weights = [w / total for w in weights]
    
    blended = (
        weights[0] * mlp_oof +
        weights[1] * models_oof['XGB'] +
        weights[2] * models_oof['LGBM'] +
        weights[3] * tabpfn_oof +
        weights[4] * models_oof['RandomForest'] +
        weights[5] * models_oof['ExtraTrees'] +
        weights[6] * models_oof['HistGB'] +
        weights[7] * models_oof['Ridge']
    )
    return np.mean(np.abs((y - blended) / y))

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

w = study.best_params
print(f"Best weights: {w}")

# Normalize weights
weights = [w['w_mlp'], w['w_xgb'], w['w_lgbm'], w['w_tab'], 
          w['w_rf'], w['w_et'], w['w_histgb'], w['w_ridge']]
total = sum(weights)
weights = [weight / total for weight in weights]

final = (
    weights[0] * mlp_oof +
    weights[1] * models_oof['XGB'] +
    weights[2] * models_oof['LGBM'] +
    weights[3] * tabpfn_oof +
    weights[4] * models_oof['RandomForest'] +
    weights[5] * models_oof['ExtraTrees'] +
    weights[6] * models_oof['HistGB'] +
    weights[7] * models_oof['Ridge']
)

final_test = (
    weights[0] * mlp_test +
    weights[1] * models_test['XGB'] +
    weights[2] * models_test['LGBM'] +
    weights[3] * tabpfn_test +
    weights[4] * models_test['RandomForest'] +
    weights[5] * models_test['ExtraTrees'] +
    weights[6] * models_test['HistGB'] +
    weights[7] * models_test['RandomForest']
)

mape = np.mean(np.abs((y - final) / y)) * 100
print(f'Final CV MAPE: {mape:.4f}%')

# Print model performance analysis
print("\nIndividual model performance (MAPE):")
for name in model_names:
    model_mape = np.mean(np.abs((y - models_oof[name]) / y)) * 100
    print(f"{name}: {model_mape:.4f}%")

sub = pd.DataFrame(final_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
sub.insert(0, 'ID', test['ID'])
sub.to_csv('submission_enhanced_fullstack.csv', index=False)
print("Enhanced submission saved.")

Loading data...
Training 45 Level 1 models...
Training models for target 1/10...
Training models for target 2/10...
Training models for target 3/10...
Training models for target 4/10...
Training models for target 5/10...
Training models for target 6/10...
Training models for target 7/10...
Training models for target 8/10...
Training models for target 9/10...
Training models for target 10/10...
Level 1 complete. Now training MLP for Level 2 stacking...
Training TabPFN...
TabPFN failed for target 1, using mean
TabPFN failed for target 1, using mean
TabPFN failed for target 1, using mean
TabPFN failed for target 1, using mean
TabPFN failed for target 1, using mean
TabPFN failed for target 2, using mean
TabPFN failed for target 2, using mean
TabPFN failed for target 2, using mean
TabPFN failed for target 2, using mean
TabPFN failed for target 2, using mean
TabPFN failed for target 3, using mean
TabPFN failed for target 3, using mean
TabPFN failed for target 3, using mean
TabPFN failed for 

[I 2025-07-23 02:28:18,150] A new study created in memory with name: no-name-03e4c335-e34f-4d3d-9de0-859a41e6d9eb
[I 2025-07-23 02:28:18,154] Trial 0 finished with value: 1.288933009253339 and parameters: {'w_mlp': 0.006038369487319506, 'w_xgb': 0.3121986804678084, 'w_lgbm': 0.816446742120769, 'w_tab': 0.6926461427074679, 'w_rf': 0.8329990792045698, 'w_et': 0.47164685808822393, 'w_histgb': 0.10928066310607754, 'w_ridge': 0.16635460765328425}. Best is trial 0 with value: 1.288933009253339.
[I 2025-07-23 02:28:18,156] Trial 1 finished with value: 1.3167973225707277 and parameters: {'w_mlp': 0.26272715116784295, 'w_xgb': 0.8015256368291662, 'w_lgbm': 0.2793783991024432, 'w_tab': 0.35934779899238833, 'w_rf': 0.9362766474167654, 'w_et': 0.6627431212842376, 'w_histgb': 0.8838605378901759, 'w_ridge': 0.19568403062729745}. Best is trial 0 with value: 1.288933009253339.
[I 2025-07-23 02:28:18,158] Trial 2 finished with value: 1.2300166698162363 and parameters: {'w_mlp': 0.630639457887889, 'w_xg

TabPFN failed for target 10, using mean
Optimizing blend weights with Optuna...


[I 2025-07-23 02:28:18,346] Trial 15 finished with value: 1.1030208286425676 and parameters: {'w_mlp': 0.36369477344489043, 'w_xgb': 0.42367063191123683, 'w_lgbm': 0.8071497211206954, 'w_tab': 0.7706384824001812, 'w_rf': 0.22301156847371406, 'w_et': 0.6000280371776611, 'w_histgb': 0.6819185929005513, 'w_ridge': 0.6967614262844753}. Best is trial 4 with value: 0.9971340936150337.
[I 2025-07-23 02:28:18,371] Trial 16 finished with value: 1.1600252177897 and parameters: {'w_mlp': 0.6641502263094277, 'w_xgb': 0.2313068352563496, 'w_lgbm': 0.6122155476017399, 'w_tab': 0.4578267818504398, 'w_rf': 0.13652707051888735, 'w_et': 0.980938124178497, 'w_histgb': 0.9795748550527875, 'w_ridge': 0.44471301067188146}. Best is trial 4 with value: 0.9971340936150337.
[I 2025-07-23 02:28:18,396] Trial 17 finished with value: 1.083957643011538 and parameters: {'w_mlp': 0.42200335214927565, 'w_xgb': 0.1953818616923897, 'w_lgbm': 0.8909270509502084, 'w_tab': 0.2637051634357524, 'w_rf': 0.36062960313594805, '

Best weights: {'w_mlp': 0.31808149668382757, 'w_xgb': 0.4453660156313012, 'w_lgbm': 0.04399777948713789, 'w_tab': 0.7092272864696741, 'w_rf': 0.006118058750581561, 'w_et': 0.09204984241343696, 'w_histgb': 0.9935168692465959, 'w_ridge': 0.17360617159685726}
Final CV MAPE: 92.5729%

Individual model performance (MAPE):
Ridge: 231.0334%
Lasso: 214.9103%
ElasticNet: 236.6510%
BayesianRidge: 232.2658%
Huber: 192.4173%
ARD: 224.6246%
SGD: 299.5015%
PassiveAggressive: 321.7947%
RANSAC: 314.2892%
TheilSen: 240.6690%
Lars: 795.1859%
LarsCV: 222.4960%
LassoLars: 106.1802%
OrthogonalMP: 371.8635%
Ridge_alpha01: 234.4048%
Ridge_alpha10: 273.3929%
Lasso_alpha01: 206.1706%
ElasticNet_l1_07: 230.9308%
RandomForest: 199.2291%
ExtraTrees: 224.1295%
DecisionTree: 383.9870%
ExtraTree: 388.6908%
AdaBoost: 270.7657%
GradientBoost: 124.9835%
Bagging: 250.6021%
HistGB: 127.3042%
XGB: 187.0102%
LGBM: 105.4876%
RandomForest_200: 216.5622%
ExtraTrees_200: 217.2626%
KNN: 441.3873%
KNN_uniform: 450.5450%
KNN_dist