In [13]:
from transformers import pipeline
import torch
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

# Define the model checkpoint
model_name = "FacebookAI/roberta-large-mnli"

print("Loading model... this may take a minute.")

# 1. Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. Load Model
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# 3. Verify Label Mapping (Crucial Step)
# We expect: 0 -> Contradiction (NTA), 2 -> Entailment (YTA)
print(f"Model Label Map: {model.config.id2label}")


Loading model... this may take a minute.


Some weights of the model checkpoint at FacebookAI/roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model Label Map: {0: 'CONTRADICTION', 1: 'NEUTRAL', 2: 'ENTAILMENT'}


In [2]:
data = pd.read_csv("data/samples/sample_20000.csv")

data = data[["title", "selftext", "link_flair_text"]]

asshole_flairs = ["asshole", 
                  "slight asshole",
                  "Asshole", 
                  "asshole (a bit)", 
                  "Obvious Asshole",
                  "Asshole (but funny/justified)", 
                  "justified asshole",
                  "huge asshole", 
                  "asshole (Kind of)",
                  "asshole (tiny bit)", 
                  "Crouching Liar; hidden asshole",
                  "Not the A-hole POO Mode",
                  "Asshole POO Mode",
                  "asshole"]

not_enough_info_flairs = ["not enough info",
                          "no assholes here",
                          "ambiguous"]

not_an_asshole_flairs = ["not the asshole",
                         "not the a-hole",
                         "Not the A-hole",
                         "Not the A-hole POO Mode",
                         "justified"]

def assign_target(flair):
    if flair in asshole_flairs:
        return 1
    elif flair in not_enough_info_flairs:
        return 2
    elif flair in not_an_asshole_flairs:
        return 0
    else:
        raise ValueError("Unexpected flair: {}".format(flair))

  data = pd.read_csv("data/samples/sample_20000.csv")


In [3]:
# Assign target labels
data["target"] = data["link_flair_text"].apply(assign_target)

# Remove ambiguous cases
data = data[data["target"]!=2]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    data[["title", "selftext"]],
    data["target"],
    test_size=0.2,
    random_state=42,
    stratify=data["target"]
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size=160,
    random_state=42,
    stratify=y_train
)

X_train.shape, X_val.shape, X_test.shape

((15834, 2), (160, 2), (3999, 2))

# Modelling

In [17]:

# 2. DEFINE YOUR LABELS
# The pipeline will auto-generate hypotheses like "This text is about {label}."
candidate_labels = ["asshole", "justified", "ambiguous"]

# 3. TEST IT
post = "I broke up with my boyfriend because he screamed at me every day."

result = classifier(post, candidate_labels)

print(f"Scores: {result['scores']}")
print(f"Winner: {result['labels'][0]}")
result

Scores: [0.5896820425987244, 0.3716268837451935, 0.038691096007823944]
Winner: asshole


{'sequence': 'I broke up with my boyfriend because he screamed at me every day.',
 'labels': ['asshole', 'ambiguous', 'justified'],
 'scores': [0.5896820425987244, 0.3716268837451935, 0.038691096007823944]}

In [22]:
def predict_verdict_forced(post):
    hypothesis = "The author of this story is being an asshole."
    
    # 1. Encode
    input_ids = tokenizer.encode(
        post, 
        hypothesis, 
        return_tensors="pt",
        truncation=True,
        max_length=512
    )
    
    # 2. Get Logits
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits[0]  # Shape: [3] -> [Contradiction, Neutral, Entailment]

    # --- THE HEAVY LIFTING FIX ---
    # RoBERTa MNLI mapping: 0=Contradiction (NTA), 1=Neutral, 2=Entailment (YTA)
    
    # We slice out only indices 0 and 2
    binary_logits = torch.tensor([logits[0], logits[2]]) 
    
    # We re-calculate softmax on just these two options
    probs = F.softmax(binary_logits, dim=0)
    
    nta_prob = probs[0].item() # Contradiction
    yta_prob = probs[1].item() # Entailment (now index 1 of our binary tensor)

    # 3. Simple Decision
    if yta_prob > nta_prob:
        return 1 # YTA
    else:
        return 0 # NTA

# --- Run your loop again with this function ---

X_val['prediction'] = X_val['selftext'].apply(predict_verdict_forced)

In [23]:
print(classification_report(y_val, X_val["prediction"], zero_division=0))

              precision    recall  f1-score   support

           0       0.78      0.79      0.78       126
           1       0.18      0.18      0.18        34

    accuracy                           0.66       160
   macro avg       0.48      0.48      0.48       160
weighted avg       0.65      0.66      0.65       160

