In [32]:
import numpy as np
import pandas as pd
import time
import joblib

from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical

RANDOM_STATE = 42

print("Loading data...")
X_train = np.load("../data/processed/X_train.npy")
X_test = np.load("../data/processed/X_test.npy")
y_train = np.load("../data/processed/y_train.npy")
y_test = np.load("../data/processed/y_test.npy")

print("X_train shape:", X_train.shape)
print("Classes:", np.unique(y_train))

Loading data...
X_train shape: (125973, 80)
Classes: [0 1 2 3]


In [33]:
# 1. Bring back YOUR brilliant amplified weights
classes = np.unique(y_train)
weights_array = compute_class_weight('balanced', classes=classes, y=y_train)
custom_weights = dict(zip(classes, weights_array))

# Amplify minority classes for Probe and Privilege
custom_weights[2] *= 1.3
custom_weights[3] *= 1.75

# Fix for Keras: ensure keys are native Python ints
keras_weights = {int(k): float(v) for k, v in custom_weights.items()}
print(f"Using Custom Amplified Weights: {keras_weights}")

Using Custom Amplified Weights: {0: 0.46765439615104765, 1: 0.685724083872232, 2: 3.5124592484557313, 3: 52.639147564469916}


In [34]:
# 2. Fast Base Models Setup
knn_stack = KNeighborsClassifier(n_neighbors=7, weights='distance', n_jobs=-1)

dt_stack = DecisionTreeClassifier(
    max_depth=None, min_samples_split=8, min_samples_leaf=2,
    class_weight=custom_weights, random_state=RANDOM_STATE
)

lr_stack = LogisticRegression(
    max_iter=1000, solver='lbfgs', class_weight=custom_weights, random_state=RANDOM_STATE
)

rf_stack = RandomForestClassifier(
    n_estimators=400, min_samples_split=3, min_samples_leaf=1,
    max_features='sqrt', class_weight=custom_weights, random_state=RANDOM_STATE, n_jobs=-1
)

models = [knn_stack, dt_stack, lr_stack, rf_stack]

# 3. Fast OOF Generation
print("\nGenerating OOF meta-features...")
start_time = time.time()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
n_classes = len(classes)
n_models = len(models)

X_meta_train = np.zeros((X_train.shape[0], n_models * n_classes))
X_meta_test = np.zeros((X_test.shape[0], n_models * n_classes))

for i, model in enumerate(models):
    print(f"Training base model {i+1}/{n_models} ({model.__class__.__name__})...")
    meta_test_fold = np.zeros((X_test.shape[0], n_classes))

    for train_idx, val_idx in skf.split(X_train, y_train):
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr = y_train[train_idx]

        model.fit(X_tr, y_tr)
        X_meta_train[val_idx, i*n_classes:(i+1)*n_classes] = model.predict_proba(X_val)
        meta_test_fold += model.predict_proba(X_test)

    X_meta_test[:, i*n_classes:(i+1)*n_classes] = meta_test_fold / skf.n_splits

print(f"OOF Feature Generation Complete in {(time.time() - start_time)/60:.2f} minutes.")


Generating OOF meta-features...
Training base model 1/4 (KNeighborsClassifier)...
Training base model 2/4 (DecisionTreeClassifier)...
Training base model 3/4 (LogisticRegression)...
Training base model 4/4 (RandomForestClassifier)...
OOF Feature Generation Complete in 2.02 minutes.


In [35]:
# 4. Your Proven Keras Meta-Learner
print("\nTraining Meta-Learner (Keras Neural Network)...")

y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

meta_model = Sequential([
    Input(shape=(X_meta_train.shape[1],)), # Warning fix applied here
    Dense(128, activation='relu'),
    Dropout(0.35),
    Dense(64, activation='relu'),
    Dropout(0.25),
    Dense(n_classes, activation='softmax')
])

meta_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train Keras with the amplified weights
meta_model.fit(
    X_meta_train, 
    y_train_cat, 
    epochs=35, 
    batch_size=128, 
    validation_split=0.2, 
    class_weight=keras_weights, 
    verbose=0 
)
print("Meta-Learner training complete.")


Training Meta-Learner (Keras Neural Network)...
Meta-Learner training complete.


In [36]:
# 5. Initial Evaluation (Standard argmax)
print("\n===== ULTIMATE HYBRID RESULTS (BASELINE) =====")

y_pred_proba = meta_model.predict(X_meta_test, verbose=0)
y_pred_meta = np.argmax(y_pred_proba, axis=1)

acc = accuracy_score(y_test, y_pred_meta)
macro_f1 = f1_score(y_test, y_pred_meta, average='macro')

print(f"Accuracy:  {acc:.5f}")
print(f"Macro F1:  {macro_f1:.5f}")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_meta))


===== ULTIMATE HYBRID RESULTS (BASELINE) =====
Accuracy:  0.82310
Macro F1:  0.76849

Classification Report:

              precision    recall  f1-score   support

           0       0.74      0.97      0.84      9711
           1       0.96      0.83      0.89      7458
           2       0.84      0.76      0.80      2421
           3       0.95      0.39      0.55      2954

    accuracy                           0.82     22544
   macro avg       0.87      0.73      0.77     22544
weighted avg       0.85      0.82      0.81     22544



In [37]:
# 6. Automated Threshold Finder (CLEAN VALIDATION SET METHOD)
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

print("\n===== AUTOMATED THRESHOLD FINDER (NO DATA LEAKAGE) =====")

# 1. Extract the exact 20% validation set that Keras used during training
val_size = int(len(X_meta_train) * 0.2)
X_meta_val = X_meta_train[-val_size:]
y_val = y_train[-val_size:]

# 2. Predict on the VALIDATION set, NOT the Test set
y_pred_proba_val = meta_model.predict(X_meta_val, verbose=0)

best_class3_f1 = 0
best_multiplier = 1.0
best_report = None
best_acc = 0

# Test multipliers from 1.0 to 15.0 in steps of 0.5
for mult in np.arange(1.0, 15.5, 0.5):
    temp_proba = y_pred_proba_val.copy()
    
    # Boost Class 3
    temp_proba[:, 3] *= mult 
    
    temp_pred = np.argmax(temp_proba, axis=1)
    
    report_dict = classification_report(y_val, temp_pred, output_dict=True, zero_division=0)
    current_class3_f1 = report_dict['3']['f1-score']
    
    if current_class3_f1 > best_class3_f1:
        best_class3_f1 = current_class3_f1
        best_multiplier = mult
        best_report = classification_report(y_val, temp_pred, zero_division=0)
        best_acc = accuracy_score(y_val, temp_pred)

print(f"üî• Optimal Class 3 Multiplier Found: {best_multiplier}")
print(f"Validation Accuracy with this multiplier: {best_acc:.5f}")
print(f"Maximized Validation Class 3 F1: {best_class3_f1:.5f}")


===== AUTOMATED THRESHOLD FINDER (NO DATA LEAKAGE) =====
üî• Optimal Class 3 Multiplier Found: 1.0
Validation Accuracy with this multiplier: 0.99714
Maximized Validation Class 3 F1: 0.87500


In [38]:
import joblib
joblib.dump({'class_3_multiplier': best_multiplier}, "../outputs/models/optimal_thresholds.pkl")
print(f"‚úÖ Optimal Clean Threshold ({best_multiplier}x) saved successfully!")

‚úÖ Optimal Clean Threshold (1.0x) saved successfully!


In [None]:
'''# 7. Saving the Winning Pipeline
import joblib

print("\n===== SAVING WINNING PIPELINE =====")

# Save the Keras Meta-Learner
meta_model.save("../outputs/models/meta_model_ultimate.keras")
print("‚úÖ Keras Meta-Learner saved successfully!")

# Save the Base Models 
joblib.dump(models, "../outputs/models/fast_base_models.pkl")
print("‚úÖ Fast Base Models saved successfully!")

# Save your custom weights 
joblib.dump(custom_weights, "../outputs/models/ultimate_class_weights.pkl")
print("‚úÖ Custom Amplified Weights saved successfully!")


print("\nüèÜ Notebook 03 Complete. Ready for Explainability.")'''

'# 7. Saving the Winning Pipeline\nimport joblib\n\nprint("\n===== SAVING WINNING PIPELINE =====")\n\n# Save the Keras Meta-Learner\nmeta_model.save("../outputs/models/meta_model_ultimate.keras")\nprint("‚úÖ Keras Meta-Learner saved successfully!")\n\n# Save the Base Models \njoblib.dump(models, "../outputs/models/fast_base_models.pkl")\nprint("‚úÖ Fast Base Models saved successfully!")\n\n# Save your custom weights \njoblib.dump(custom_weights, "../outputs/models/ultimate_class_weights.pkl")\nprint("‚úÖ Custom Amplified Weights saved successfully!")\n\n\nprint("\nüèÜ Notebook 03 Complete. Ready for Explainability.")'