# Last Stand

In [15]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch
  Using cached https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp310-cp310-win_amd64.whl (2449.4 MB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cu121/torchvision-0.20.1%2Bcu121-cp310-cp310-win_amd64.whl (6.1 MB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cu121/torchaudio-2.5.1%2Bcu121-cp310-cp310-win_amd64.whl (4.1 MB)
Collecting filelock (from torch)
  Using cached https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)
Collecting networkx (from torch)
  Using cached https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting jinja2 (from torch)
  Using cached https://download.pytorch.org/whl/Jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch)
  Using cached https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Collecting sympy==1.

In [16]:
pip install scikit-learn pandas numpy xgboost lightgbm catboost optuna

Collecting scikit-learn
  Using cached scikit_learn-1.7.1-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting pandas
  Using cached pandas-2.3.1-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting xgboost
  Using cached xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Collecting catboost
  Using cached catboost-1.2.8-cp310-cp310-win_amd64.whl.metadata (1.5 kB)
Collecting optuna
  Using cached optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Using cached scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Collecting python-dateutil>=2.8.2 (from pandas)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.m

In [17]:
# 🚀 Shell.ai Hackathon 2025 — Winning Blueprint Starter Notebook
# Author: ChatGPT (based on proven Kaggle gold strategies)

# =============================
# 1️⃣ Imports & Config
# =============================

print("📦 Importing libraries...")
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb
import catboost
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import optuna
import warnings
warnings.filterwarnings('ignore')

# Optional: torch for MLP stacker
import torch
import torch.nn as nn

# =============================
# 2️⃣ Load Data
# =============================

print("📂 Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

print(f"✅ Train shape: {X.shape}, Test shape: {X_test.shape}, Targets shape: {y.shape}")

# =============================
# 3️⃣ Feature Engineering Example
# =============================

print("🛠️ Starting feature engineering...")

# Weighted blend estimates
for i in range(1, 11):
    blend_weighted = 0
    for j in range(1, 6):
        frac_col = f'Component{j}_fraction'
        prop_col = f'Component{j}_Property{i}'
        blend_weighted += X[frac_col] * X[prop_col]
    X[f'Blend_estimate_Property{i}'] = blend_weighted

    blend_weighted_test = 0
    for j in range(1, 6):
        frac_col = f'Component{j}_fraction'
        prop_col = f'Component{j}_Property{i}'
        blend_weighted_test += X_test[frac_col] * X_test[prop_col]
    X_test[f'Blend_estimate_Property{i}'] = blend_weighted_test

# Statistical summaries per property
for i in range(1, 11):
    prop_cols = [f'Component{j}_Property{i}' for j in range(1, 6)]
    X[f'Property{i}_mean'] = X[prop_cols].mean(axis=1)
    X[f'Property{i}_std'] = X[prop_cols].std(axis=1)
    X[f'Property{i}_min'] = X[prop_cols].min(axis=1)
    X[f'Property{i}_max'] = X[prop_cols].max(axis=1)

    X_test[f'Property{i}_mean'] = X_test[prop_cols].mean(axis=1)
    X_test[f'Property{i}_std'] = X_test[prop_cols].std(axis=1)
    X_test[f'Property{i}_min'] = X_test[prop_cols].min(axis=1)
    X_test[f'Property{i}_max'] = X_test[prop_cols].max(axis=1)

print("✅ Feature engineering completed.")

# =============================
# 4️⃣ Cross-Validation Setup
# =============================

print("🔁 Setting up 5-fold cross-validation...")

NFOLDS = 5
kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=42)

# Containers for OOF & test preds
oof_preds = []
test_preds = []

# =============================
# 5️⃣ Baseline Linear Model + Residuals
# =============================

print("🔧 Training Ridge (Linear) model...")
ridge = Ridge(alpha=1.0)
ridge_oof = np.zeros(y.shape)
ridge_test = np.zeros((X_test.shape[0], y.shape[1]))

for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
    print(f"  ➤ Ridge Fold {fold + 1}/{NFOLDS}")
    X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
    y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]
    
    ridge.fit(X_tr, y_tr)
    ridge_oof[val_idx] = ridge.predict(X_val)
    ridge_test += ridge.predict(X_test) / NFOLDS

residuals = y - ridge_oof
print("✅ Ridge model done. Residuals calculated.")

# =============================
# 6️⃣ XGBoost on Residuals
# =============================

print("🌲 Training XGBoost models on residuals...")

xgb_oof = np.zeros(y.shape)
xgb_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    print(f"  ▶️ Training for BlendProperty{t + 1}")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        print(f"    ➤ XGBoost Fold {fold + 1}/{NFOLDS}")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = residuals.iloc[tr_idx, t], residuals.iloc[val_idx, t]

        model = xgb.XGBRegressor(learning_rate=0.03, max_depth=8, n_estimators=2000, tree_method='gpu_hist')
        model.fit(X_tr, y_tr,
                  eval_set=[(X_val, y_val)],
                  # early_stopping_rounds=50,
                  verbose=False)

        xgb_oof[val_idx, t] = model.predict(X_val)
        xgb_test[:, t] += model.predict(X_test) / NFOLDS

print("✅ XGBoost training completed.")

# Combine linear + residual
final_oof = ridge_oof + xgb_oof
final_test = ridge_test + xgb_test

score = mean_absolute_percentage_error(y, final_oof)
print(f"📉 CV MAPE: {score:.6f}")

# =============================
# 7️⃣ Save Submission
# =============================

print("💾 Saving submission...")

sub = pd.DataFrame(final_test, columns=[f'target_{i}' for i in range(1, 11)])
sub.insert(0, 'ID', test['ID'])
sub.to_csv('submission.csv', index=False)

print("✅ Submission saved as submission.csv")

# =============================
# 8️⃣ TODO: Expand ➡️ Add more L1 models, MLP stacker, Optuna tuning, final combiner.
# =============================

print("📌 Finished baseline pipeline. Ready to expand with more models and stacking!")


📦 Importing libraries...
📂 Loading data...
✅ Train shape: (2000, 55), Test shape: (500, 55), Targets shape: (2000, 10)
🛠️ Starting feature engineering...
✅ Feature engineering completed.
🔁 Setting up 5-fold cross-validation...
🔧 Training Ridge (Linear) model...
  ➤ Ridge Fold 1/5
  ➤ Ridge Fold 2/5
  ➤ Ridge Fold 3/5
  ➤ Ridge Fold 4/5
  ➤ Ridge Fold 5/5
✅ Ridge model done. Residuals calculated.
🌲 Training XGBoost models on residuals...
  ▶️ Training for BlendProperty1
    ➤ XGBoost Fold 1/5
    ➤ XGBoost Fold 2/5
    ➤ XGBoost Fold 3/5
    ➤ XGBoost Fold 4/5
    ➤ XGBoost Fold 5/5
  ▶️ Training for BlendProperty2
    ➤ XGBoost Fold 1/5
    ➤ XGBoost Fold 2/5
    ➤ XGBoost Fold 3/5
    ➤ XGBoost Fold 4/5
    ➤ XGBoost Fold 5/5
  ▶️ Training for BlendProperty3
    ➤ XGBoost Fold 1/5
    ➤ XGBoost Fold 2/5
    ➤ XGBoost Fold 3/5
    ➤ XGBoost Fold 4/5
    ➤ XGBoost Fold 5/5
  ▶️ Training for BlendProperty4
    ➤ XGBoost Fold 1/5
    ➤ XGBoost Fold 2/5
    ➤ XGBoost Fold 3/5
    ➤ XGBoost

In [4]:
# ✅ Explanation for the LightGBM warning
# "No further splits with positive gain" means LightGBM tried to grow trees but found that no splits improved the objective function.
# This is normal if the target has low variance or the learning rate is too small or max_depth is small.
# It’s not harmful but you can suppress LightGBM's built-in warnings with verbose=-1.

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, Lasso
import xgboost as xgb
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train.drop([f'BlendProperty{i}' for i in range(1, 11)], axis=1)
y = train[[f'BlendProperty{i}' for i in range(1, 11)]]
X_test = test.drop(['ID'], axis=1)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

print("Starting Level 1 training...")
model_names = ['Ridge', 'Lasso', 'XGB', 'LGBM', 'SVR']
models_oof = {name: np.zeros(y.shape) for name in model_names}
models_test = {name: np.zeros((X_test.shape[0], y.shape[1])) for name in model_names}

for t in range(y.shape[1]):
    print(f"Training for target BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(X)):
        print(f" Fold {fold+1}/5")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        ridge = Ridge(alpha=1.0)
        ridge.fit(X_tr, y_tr)
        models_oof['Ridge'][val_idx, t] = ridge.predict(X_val)
        models_test['Ridge'][:, t] += ridge.predict(X_test) / kf.n_splits

        lasso = Lasso(alpha=0.1)
        lasso.fit(X_tr, y_tr)
        models_oof['Lasso'][val_idx, t] = lasso.predict(X_val)
        models_test['Lasso'][:, t] += lasso.predict(X_test) / kf.n_splits

        model_xgb = xgb.XGBRegressor(learning_rate=0.03, max_depth=8, tree_method='hist', device='cuda')
        model_xgb.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
        models_oof['XGB'][val_idx, t] = model_xgb.predict(X_val)
        models_test['XGB'][:, t] += model_xgb.predict(X_test) / kf.n_splits

        # model_lgbm = lgb.LGBMRegressor(learning_rate=0.03, num_leaves=128, verbose=-1)
        # model_lgbm.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
        # models_oof['LGBM'][val_idx, t] = model_lgbm.predict(X_val)
        # models_test['LGBM'][:, t] += model_lgbm.predict(X_test) / kf.n_splits

        svr = SVR(C=1.0)
        svr.fit(X_tr, y_tr)
        models_oof['SVR'][val_idx, t] = svr.predict(X_val)
        models_test['SVR'][:, t] += svr.predict(X_test) / kf.n_splits

print("Stacking Level 1 outputs...")
stack_X = np.concatenate([models_oof[name] for name in model_names], axis=1)
stack_X_test = np.concatenate([models_test[name] for name in model_names], axis=1)

print("Starting Level 2 stacking...")
mlp_oof = np.zeros(y.shape)
mlp_test = np.zeros((X_test.shape[0], y.shape[1]))

for t in range(y.shape[1]):
    print(f" MLP stacking for BlendProperty{t+1}...")
    for fold, (tr_idx, val_idx) in enumerate(kf.split(stack_X)):
        X_tr, X_val = stack_X[tr_idx], stack_X[val_idx]
        y_tr, y_val = y.iloc[tr_idx, t], y.iloc[val_idx, t]

        mlp = MLPRegressor(hidden_layer_sizes=(512, 256, 128), activation='relu', max_iter=500)
        mlp.fit(X_tr, y_tr)
        mlp_oof[val_idx, t] = mlp.predict(X_val)
        mlp_test[:, t] += mlp.predict(stack_X_test) / kf.n_splits

print("Combining all levels...")
final = (mlp_oof + sum(models_oof.values())) / (1 + len(model_names))
final_test = (mlp_test + sum(models_test.values())) / (1 + len(model_names))

mape = np.mean(np.abs((y - final) / y)) * 100
print(f'Stacked CV MAPE: {mape:.4f}%')

print("Saving submission...")
sub = pd.DataFrame(final_test, columns=[f'BlendProperty{i}' for i in range(1, 11)])
sub.insert(0, 'ID', test['ID'])
sub.to_csv('submission_final.csv', index=False)
print('Enhanced stacked submission saved as submission_final.csv.')


Loading data...
Starting Level 1 training...
Training for target BlendProperty1...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty2...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty3...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty4...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty5...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty6...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty7...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty8...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty9...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Training for target BlendProperty10...
 Fold 1/5
 Fold 2/5
 Fold 3/5
 Fold 4/5
 Fold 5/5
Stacking Level 1 outputs...
Starting Level 2 stacking...
 MLP stacking for