In [3]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Reload your validation data
seed   = pd.read_csv('../data/seed_labels.csv', encoding='utf-8')
pseudo = pd.read_csv('../data/pseudo_labels.csv', encoding='utf-8')
df_all = pd.concat([seed, pseudo], ignore_index=True)

# Recreate the same split (10%) for val
from sklearn.model_selection import train_test_split
_, idx = train_test_split(df_all.index, test_size=0.1,
                          stratify=df_all['label'], random_state=42)
val = df_all.loc[idx].reset_index(drop=True)

# Load your final model & tokenizer
tok = AutoTokenizer.from_pretrained('outputs/with_pseudo/model', use_fast=False)
mdl = AutoModelForSequenceClassification.from_pretrained('outputs/with_pseudo/model')
mdl.eval()

# Predict
def predict(texts, batch_size=64):
    preds = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        inputs = tok(batch, padding=True, truncation=True,
                     max_length=128, return_tensors='pt')
        with torch.no_grad():
            logits = mdl(**inputs).logits
        preds.extend(logits.argmax(dim=1).cpu().numpy())
    return np.array(preds)

y_true = val['label'].to_numpy()
y_pred = predict(val['text'].tolist())

print(classification_report(y_true, y_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))


              precision    recall  f1-score   support

           0     0.9983    0.9977    0.9980      1734
           1     0.8095    0.8500    0.8293        20

    accuracy                         0.9960      1754
   macro avg     0.9039    0.9238    0.9136      1754
weighted avg     0.9961    0.9960    0.9961      1754

Confusion matrix:
 [[1730    4]
 [   3   17]]
