In [9]:
# %% [1] Imports & Setup
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from itertools import product
from scipy.optimize import minimize
import warnings; warnings.filterwarnings('ignore')

results = {}  # üß† to store all F1 results


In [10]:
# %% [2] Data Loading
df = pd.read_csv("train.csv")
X = df.drop(columns=["ASI_category", "ID"])
y = df["ASI_category"].astype('category').cat.codes

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
print(f"‚úÖ Data: {X_train.shape} | Classes: {len(np.unique(y))}")

‚úÖ Data: (14522, 19) | Classes: 3


In [11]:
xgb = XGBClassifier(n_estimators=600, max_depth=8, learning_rate=0.05, random_state=42)
lgb = LGBMClassifier(n_estimators=400, max_depth=12, learning_rate=0.06, random_state=42)
rf  = RandomForestClassifier(n_estimators=500, max_depth=10, random_state=42)

for model in [xgb, lgb, rf]:
    model.fit(X_train, y_train)
print("‚úÖ Base models trained.")

# Base model F1s
results["XGBoost"] = f1_score(y_val, xgb.predict(X_val), average="macro")
results["LightGBM"] = f1_score(y_val, lgb.predict(X_val), average="macro")
results["RandomForest"] = f1_score(y_val, rf.predict(X_val), average="macro")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001447 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3847
[LightGBM] [Info] Number of data points in the train set: 14522, number of used features: 19
[LightGBM] [Info] Start training from score -1.755382
[LightGBM] [Info] Start training from score -0.355142
[LightGBM] [Info] Start training from score -2.070802
‚úÖ Base models trained.


In [12]:
# %% [4] Weighted Blending
xgb_p, lgb_p, rf_p = xgb.predict_proba(X_val), lgb.predict_proba(X_val), rf.predict_proba(X_val)
grid = np.arange(0.1, 1.1, 0.1); best_f1, best_w = 0, None
for w1, w2, w3 in product(grid, repeat=3):
    probs = (w1*xgb_p + w2*lgb_p + w3*rf_p) / (w1+w2+w3)
    f1 = f1_score(y_val, np.argmax(probs, axis=1), average="macro")
    if f1 > best_f1: best_f1, best_w = f1, (w1, w2, w3)
results["Weighted Ensemble"] = best_f1
print(f"üî• Weighted Avg F1={best_f1:.4f}, Weights={best_w}")


üî• Weighted Avg F1=0.9224, Weights=(np.float64(0.5), np.float64(0.30000000000000004), np.float64(0.9))


In [13]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
oof_preds = np.zeros((len(X_train), len(np.unique(y))))
for train_idx, val_idx in kf.split(X_train):
    for model in [XGBClassifier(), LGBMClassifier(), RandomForestClassifier()]:
        model.fit(X_train.iloc[train_idx], y_train.iloc[train_idx])
        oof_preds[val_idx] += model.predict_proba(X_train.iloc[val_idx])
oof_preds /= 3
meta_lr = LogisticRegression(max_iter=1000, multi_class='multinomial')
meta_lr.fit(oof_preds, y_train)
stack_preds = meta_lr.predict(np.mean([xgb_p, lgb_p, rf_p], axis=0))
results["OOF Stacking"] = f1_score(y_val, stack_preds, average='macro')


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001488 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3822
[LightGBM] [Info] Number of data points in the train set: 11617, number of used features: 19
[LightGBM] [Info] Start training from score -1.764838
[LightGBM] [Info] Start training from score -0.354574
[LightGBM] [Info] Start training from score -2.061103
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3825
[LightGBM] [Info] Number of data points in the train set: 11617, number of used features: 19
[LightGBM] [Info] Start training from score -1.743449
[LightGBM] [Info] Start training from score -0.360482
[LightGBM] [Info] Start training from score -2.057728
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000915 sec

In [14]:
meta_probs = meta_lr.predict_proba(np.mean([xgb_p, lgb_p, rf_p], axis=0))
conf = np.max(meta_probs, axis=1)
pseudo_idx = np.where(conf >= 0.95)[0]
if len(pseudo_idx):
    X_aug = pd.concat([X_train, X_val.iloc[pseudo_idx]])
    y_aug = np.concatenate([y_train, y_val.iloc[pseudo_idx]])
    meta_lr.fit(X_aug[X_train.columns], y_aug)
    pseudo_f1 = f1_score(y_val, meta_lr.predict(X_val), average="macro")
    results["Pseudo-Labeling"] = pseudo_f1
    print(f"üöÄ Pseudo-labels added ({len(pseudo_idx)}), F1={pseudo_f1:.4f}")
else:
    results["Pseudo-Labeling"] = results["OOF Stacking"]
    print("‚ö†Ô∏è No pseudo-labels confident enough.")


üöÄ Pseudo-labels added (3096), F1=0.5532


In [15]:
try:
    from tensorflow.keras import models, layers
    X_meta = np.hstack([xgb_p, lgb_p, rf_p])
    meta_nn = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(X_meta.shape[1],)),
        layers.Dropout(0.2),
        layers.Dense(len(np.unique(y)), activation='softmax')
    ])
    meta_nn.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    meta_nn.fit(X_meta, y_val, epochs=30, batch_size=64, verbose=0)
    nn_preds = np.argmax(meta_nn.predict(X_meta), axis=1)
    results["Neural Stacker"] = f1_score(y_val, nn_preds, average='macro')
    print("üß† Neural Stacker F1:", results["Neural Stacker"])
except Exception as e:
    results["Neural Stacker"] = np.nan
    print("üí§ Skipped NN stacking:", e)


[1m114/114[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2ms/step
üß† Neural Stacker F1: 0.9231923585751202


In [16]:
# %% [8] Bayesian Averaging
def objective(w):
    w = np.clip(w, 0, 1)
    p = (w[0]*xgb_p + w[1]*lgb_p + w[2]*rf_p) / np.sum(w)
    return -f1_score(y_val, np.argmax(p, axis=1), average="macro")

res = minimize(objective, [0.33,0.33,0.33], bounds=[(0,1)]*3)
opt_w = res.x
p = (opt_w[0]*xgb_p + opt_w[1]*lgb_p + opt_w[2]*rf_p)/np.sum(opt_w)
results["Bayesian Weighted"] = f1_score(y_val, np.argmax(p,axis=1),average='macro')

# %% [9] Knowledge Distillation
teacher_probs = (xgb_p + lgb_p + rf_p) / 3
student = LogisticRegression(max_iter=2000, multi_class='multinomial')
student.fit(X_val, np.argmax(teacher_probs, axis=1))
results["Distilled Student"] = f1_score(y_val, student.predict(X_val), average='macro')



In [17]:
leaderboard = pd.DataFrame.from_dict(results, orient='index', columns=['F1 Score']).sort_values(by='F1 Score', ascending=False)
print("\nüèÜ Ensemble Leaderboard:")
display(leaderboard.style.background_gradient(cmap='viridis'))


üèÜ Ensemble Leaderboard:


Unnamed: 0,F1 Score
Neural Stacker,0.923192
Weighted Ensemble,0.922397
LightGBM,0.921792
OOF Stacking,0.920343
Bayesian Weighted,0.919533
XGBoost,0.918236
RandomForest,0.898304
Pseudo-Labeling,0.553179
Distilled Student,0.543091
