In [1]:
!pip install optuna lightgbm
# === 🧱 1. Imports ===
import pandas as pd
import numpy as np
import ast
import optuna
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score
from lightgbm import LGBMClassifier

# === 🧹 2. Load Data ===
df = pd.read_csv("/Users/lokkit/multilayerperceptron/proper_df (7).csv")
df["labels"] = df["model_family_vector"].apply(ast.literal_eval)
y = np.array(df["labels"].tolist())

# === 🧠 3. Load BERT Embeddings ===
X = np.load("/Users/lokkit/multilayerperceptron/bert_embeddings (1).npy")

# === ✂️ 4. Train/Validation/Test Split ===
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.2, random_state=42)

# === 🧪 5. Optuna Objective ===
def objective(trial):
    params = {
        "objective": "binary",
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_float("lr", 0.02, 0.3, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 16, 128, log=True),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 100),
        "lambda_l2": trial.suggest_float("lambda_l2", 0.0, 5.0),
        "feature_fraction": 1.0,
        "bagging_fraction": 0.8,
        "bagging_freq": 1,
        "verbosity": -1,
        "seed": 42,
        "device_type": "cpu",  # Use "gpu" if GPU is available
    }

    clf = OneVsRestClassifier(LGBMClassifier(**params, n_estimators=300))
    clf.fit(X_train, y_train)
    y_val_pred = clf.predict(X_val)
    return f1_score(y_val, y_val_pred, average='macro')

# === 🧵 6. Run Optuna Tuning ===
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("✅ Best Macro F1:", study.best_value)
print("📌 Best Params:", study.best_params)

# === 🧠 7. Train Final Model on Full Train+Val Set ===
final_model = OneVsRestClassifier(LGBMClassifier(**study.best_params, n_estimators=300))
final_model.fit(X_trainval, y_trainval)

# === 📈 8. Evaluate on Test Set ===
y_pred = final_model.predict(X_test)
print("\n=== Raw Test Prediction Report ===")
print(classification_report(y_test, y_pred))
print("F1 Micro:", f1_score(y_test, y_pred, average='micro'))
print("F1 Macro:", f1_score(y_test, y_pred, average='macro'))
print("Exact Match:", accuracy_score(y_test, y_pred))

# === 🎯 9. Predict Probabilities for Threshold Tuning ===
y_proba = final_model.predict_proba(X_test)

def optimize_global_threshold_for_f1(y_true, y_proba):
    thresholds = np.linspace(0.1, 0.9, 101)
    best_thresholds = []
    for i in range(y_true.shape[1]):
        best_t, best_score = 0.5, -1
        for t in thresholds:
            test_thresholds = np.ones(y_true.shape[1]) * 0.5
            test_thresholds[i] = t
            y_pred = (y_proba >= test_thresholds).astype(int)
            score = np.mean([
                f1_score(y_true, y_pred, average='micro', zero_division=0),
                f1_score(y_true, y_pred, average='macro', zero_division=0)
            ])
            if score > best_score:
                best_score, best_t = score, t
        best_thresholds.append(best_t)
    return np.array(best_thresholds), best_score

# === 🔁 10. Optimize and Re-evaluate ===
best_thresholds, _ = optimize_global_threshold_for_f1(y_test, y_proba)
y_pred_optimized = (y_proba >= best_thresholds).astype(int)

print("\n=== Threshold-Optimized Report ===")
print(classification_report(y_test, y_pred_optimized))
print("F1 Micro:", f1_score(y_test, y_pred_optimized, average='micro'))
print("F1 Macro:", f1_score(y_test, y_pred_optimized, average='macro'))
print("Exact Match:", accuracy_score(y_test, y_pred_optimized))


Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
Downloading sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-04-18 06:42:46,734] A new study created in memory with name: no-name-ceeaa324-0b5f-4f22-bacd-0b6742465867
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-18 06:42:49,749] Trial 0 finished with value: 0.2824404761904762 and parameters: {'lr': 0.022428290505977496, 'num_leaves': 54, 'min_data_in_leaf': 59, 'lambda_l2': 3.9957550291677753}. Best is trial 0 with value: 0.2824404761904762.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-18 06:42:53,083] Trial 1 finished with value: 0.34657061688311686 and parameters: {'lr': 0.04736109551844632, 'num_leaves': 26, 'min_data_in_leaf': 64, 'lambda_l2': 2.831260579561918}. Best is trial 1 with value: 0.34657061688311686.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-04-18 06:42:55,522] Trial 2 finished with value: 0.3417572463768116 and parameters: {'lr': 0.04439928419352426, 'num_le

✅ Best Macro F1: 0.3613780241497633
📌 Best Params: {'lr': 0.1895530245219824, 'num_leaves': 19, 'min_data_in_leaf': 49, 'lambda_l2': 0.9966394971796855}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



=== Raw Test Prediction Report ===
              precision    recall  f1-score   support

           0       0.90      0.60      0.72        15
           1       0.67      0.29      0.40        14
           2       1.00      0.57      0.73         7
           3       0.75      0.20      0.32        15
           4       1.00      0.30      0.46        10
           5       0.00      0.00      0.00         0
           6       0.33      0.10      0.15        10
           7       0.60      0.19      0.29        16
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         5
          10       1.00      1.00      1.00         3
          11       0.82      0.56      0.67        16
          12       0.00      0.00      0.00         0
          13       0.70      0.33      0.45        21
          14       0.00      0.00      0.00         0
          15       1.00      0.35      0.52        17

   micro avg       0.80      0.35      0.49 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
