In [1]:
# Cell 1 â€” Load saved model and run quick predictions
%run ./00_config.ipynb

import torch, numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_dir = cfg.paths.model_dir
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()

def predict_proba(texts, max_len=None):
    if isinstance(texts, str): texts = [texts]
    max_len = max_len or cfg.train.max_len
    out = []
    with torch.no_grad():
        for t in texts:
            enc = tokenizer(t, truncation=True, padding="max_length",
                            max_length=max_len, return_tensors="pt").to(device)
            p = torch.sigmoid(model(**enc).logits).cpu().numpy()[0]
            out.append(p)
    return np.vstack(out)

samples = [
    "I completely disagree with you but let's keep it respectful.",
    "You are an absolute idiot and a disgrace.",
    "Have a nice day!"
]
probs = predict_proba(samples, max_len=128)

import pandas as pd
df = pd.DataFrame(probs, columns=list(cfg.labels), index=[f"s{i+1}" for i in range(len(samples))])
print("Per-label probabilities:\n")
display(df.round(3))


Python version: 3.13.7 (tags/v3.13.7:bcee1c3, Aug 14 2025, 14:15:11) [MSC v.1944 64 bit (AMD64)]
CUDA available: False
Running on CPU
../../data/train_data.csv
microsoft/mdeberta-v3-base
âœ… Config loaded and random seed set to: 42
ðŸ“‚ Model directory: ../models/best
ðŸ“‚ Reports directory: ../reports
âœ… Folder setup complete.
âœ… Found: ..\..\data\train_data.csv
âœ… Found: ..\..\data\test_data.csv

All required data files are present and accessible.
âœ… Configuration snapshot saved at:
../reports\config_snapshot.json




Per-label probabilities:



Unnamed: 0,toxic,severe_toxic,obscene,threat,insult,identity_hate
s1,0.0,0.0,0.0,0.0,0.0,0.0
s2,0.986,0.044,0.823,0.002,0.825,0.054
s3,0.003,0.0,0.0,0.0,0.0,0.0


In [2]:
# Cell 2 â€” Convert probabilities to 0/1 labels with default thresholds

import numpy as np
import pandas as pd

# Default 0.5 for all labels; you can tune later from val set
thresholds = {lab: 0.5 for lab in cfg.labels}

def probs_to_labels(probs: np.ndarray, thresholds: dict):
    thr = np.array([thresholds[lab] for lab in cfg.labels], dtype=np.float32)
    return (probs >= thr).astype(int)

labels = probs_to_labels(probs, thresholds)
labels_df = pd.DataFrame(labels, columns=list(cfg.labels), index=[f"s{i+1}" for i in range(len(samples))])
print("Binary decisions (threshold=0.5):\n")
display(labels_df)


Binary decisions (threshold=0.5):



Unnamed: 0,toxic,severe_toxic,obscene,threat,insult,identity_hate
s1,0,0,0,0,0,0
s2,1,0,1,0,1,0
s3,0,0,0,0,0,0
