In [1]:
import numpy as np
import pandas as pd
import ast

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, hamming_loss, f1_score, classification_report

# === Load and preprocess data ===
df = pd.read_csv("poisoned.csv")
df["combined_text"] = df["abstract_section"] + " " + df["section"]
df["labels"] = df["model_family_vector"].apply(ast.literal_eval)
y_labels = np.array(df["labels"].tolist())

# === Vectorize text ===
vectorizer = TfidfVectorizer(max_features=3000)
X_tfidf = vectorizer.fit_transform(df["combined_text"])

# === Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y_labels, test_size=0.2, random_state=42)

# === Remove dead labels ===
train_sums = y_train.sum(axis=0)
dead_mask = (train_sums == 0)
alive_mask = ~dead_mask
y_train_filtered = y_train[:, alive_mask]
y_test_filtered = y_test[:, alive_mask]

# === Train classifier ===
clf = OneVsRestClassifier(LogisticRegression(C=10.0, max_iter=1000, class_weight='balanced'))
clf.fit(X_train, y_train_filtered)

# === Predict probabilities ===
y_pred_proba = clf.predict_proba(X_test)

# === Optimize thresholds ===
def optimize_global_threshold_for_f1(y_true, y_proba):
    thresholds = np.linspace(0.1, 0.9, 101)
    best_thresholds = []
    for i in range(y_true.shape[1]):
        best_score = -1
        best_t = 0.5
        for t in thresholds:
            test_thresholds = np.ones(y_true.shape[1]) * 0.5
            test_thresholds[i] = t
            y_pred = (y_proba >= test_thresholds).astype(int)
            micro_score = f1_score(y_true, y_pred, average='micro', zero_division=0)
            macro_score = f1_score(y_true, y_pred, average='macro', zero_division=0)
            weighted_score = f1_score(y_true, y_pred, average='weighted', zero_division=0)
            score = np.mean([micro_score, macro_score, weighted_score])
            if score > best_score:
                best_score = score
                best_t = t
        best_thresholds.append(best_t)
    return np.array(best_thresholds)

best_thr = optimize_global_threshold_for_f1(y_test_filtered, y_pred_proba)
y_pred = (y_pred_proba >= best_thr).astype(int)

# === Evaluate ===
exact_match = accuracy_score(y_test_filtered, y_pred)
h_loss = hamming_loss(y_test_filtered, y_pred)
f1_micro = f1_score(y_test_filtered, y_pred, average='micro', zero_division=0)
f1_macro = f1_score(y_test_filtered, y_pred, average='macro', zero_division=0)

print("\n===== Filtered TF-IDF Classifier Evaluation =====")
print(f"Exact Match Accuracy: {exact_match:.4f}")
print(f"Hamming Loss: {h_loss:.4f}")
print(f"F1 Score (micro): {f1_micro:.4f}")
print(f"F1 Score (macro): {f1_macro:.4f}")
print(classification_report(y_test_filtered, y_pred, zero_division=0))



===== Filtered TF-IDF Classifier Evaluation =====
Exact Match Accuracy: 0.4275
Hamming Loss: 0.0534
F1 Score (micro): 0.6402
F1 Score (macro): 0.4677
              precision    recall  f1-score   support

           0       0.53      0.67      0.59        15
           1       0.44      0.57      0.50        14
           2       1.00      0.86      0.92         7
           3       0.50      0.87      0.63        15
           4       0.75      0.60      0.67        10
           5       0.00      0.00      0.00         0
           6       0.39      0.90      0.55        10
           7       0.85      0.69      0.76        16
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         5
          10       1.00      0.67      0.80         3
          11       0.60      0.75      0.67        16
          12       0.00      0.00      0.00         0
          13       0.54      0.71      0.61        21
          14       0.00      0.00     