<a href="https://colab.research.google.com/github/ma55530/SemEval2026-CLARITY-FER/blob/main/hierarchiral/TwoLayeredBinary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Installing the necessary libraries**

In [None]:
pip -q install scikit-learn torch pandas datasets transformers torchvision accelerate

In [None]:
import pandas as pd
from datasets import Dataset, load_dataset
import os
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from datasets import load_dataset
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments


# **Loading the data**

In [None]:
print("Loading dataset...")
df = load_dataset("ailsntua/QEvasion")

def clarity_to_label(row):
    mapping = {
        "Clear Reply": 0,
        "Ambivalent": 1,
        "Clear Non-Reply": 2
    }
    row["label"] = mapping[row["clarity_label"]]

    # --- FIXED SHORT/LONG BINARY LABEL ---
    # This line was incorrect: row["binary_label"] = 0 if (row["clarity_label"] == 0 or row["clarity_label"] == 1) else 1
    # It should compare with the assigned integer label:
    row["binary_label"] = 0 if (row["label"] == 0 or row["label"] == 1) else 1
    return row

df = df.map(clarity_to_label)
y_test = df["test"]["label"]

# **Defining the evaluation**

In [None]:
def evaluate(y_true, y_pred):
    print(classification_report(y_true, y_pred))
    ConfusionMatrixDisplay.from_predictions(y_true, y_pred)
    plt.show()

def compute_metrics_binary(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "precision": precision_score(labels, preds, average='binary', zero_division=0),
        "recall": recall_score(labels, preds, average='binary', zero_division=0),
        "f1": f1_score(labels, preds, average='binary', zero_division=0)
    }

def compute_metrics_fine(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "precision": precision_score(labels, preds, average='macro', zero_division=0),
        "recall": recall_score(labels, preds, average='macro', zero_division=0),
        "f1": f1_score(labels, preds, average='macro', zero_division=0)
    }


# **Loading the model and tokenizer**

In [None]:


binary_model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", num_labels=2
)

fine_model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", num_labels=2
)

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(example):
    encoded = tokenizer(
        "Question: " + example["interview_question"] +
        " Answer: " + example["interview_answer"],
        padding="max_length",
        max_length=512,
        truncation=True
    )
    encoded["label"] = example["label"]
    encoded["binary_label"] = example["binary_label"]
    return encoded

tokenized_train = df["train"].map(tokenize)
tokenized_test = df["test"].map(tokenize)

cols_to_remove = ["interview_question", "interview_answer", "clarity_label"]

tokenized_train = tokenized_train.remove_columns(cols_to_remove)
tokenized_test  = tokenized_test.remove_columns(cols_to_remove)

tokenized_train.set_format("torch")
tokenized_test.set_format("torch")



# **Preparing the data for training**

In [None]:
binary_train = tokenized_train.remove_columns(["label"]).rename_column("binary_label", "labels")
binary_test  = tokenized_test.remove_columns(["label"]).rename_column("binary_label", "labels")

train_fine = tokenized_train.filter(lambda x: x["binary_label"].item() == 0 and x["label"].item() in [0,1]).remove_columns(["binary_label"])
test_fine  = tokenized_test.filter(lambda x: x["binary_label"].item() == 0 and x["label"].item() in [0,1]).remove_columns(["binary_label"])

train_fine = train_fine.rename_column("label", "labels")
test_fine  = test_fine.rename_column("label", "labels")




labels_np = np.array(df["train"]["label"])
class_weights_np = compute_class_weight("balanced", classes=np.unique(labels_np), y=labels_np)
class_weights = torch.tensor(class_weights_np, dtype=torch.float)
print("Class weights:", class_weights)

print(set(train_fine["labels"]))

# **Defining the evaluation function and training**

In [None]:
os.environ["WANDB_DISABLED"] = "true"

class WeightedTrainer(Trainer):
    def __init__(self, weights=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.weights = weights

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs["labels"]
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fct = torch.nn.CrossEntropyLoss(weight=self.weights.to(model.device))
        loss = loss_fct(logits, labels)

        return (loss, outputs) if return_outputs else loss



trainer_binary = WeightedTrainer(
    weights=torch.tensor([1.0, 1.0]),
    model=binary_model,
    train_dataset=binary_train,
    eval_dataset=binary_test,
    compute_metrics=compute_metrics_binary,
    args=TrainingArguments(
        output_dir="binary_output",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=2e-5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        fp16=True,
        report_to="none"
    ),
)
trainer_binary.train()



In [None]:
trainer_fine = Trainer(
    model=fine_model,
    train_dataset=train_fine,
    eval_dataset=test_fine,
    compute_metrics=compute_metrics_fine,
    args=TrainingArguments(
        output_dir="fine_output_unweighted",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        learning_rate=2e-5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        fp16=True,
        report_to="none"
    ),
)

trainer_fine.train()


In [None]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

def classify_two_stage(example):
    with torch.no_grad():
        logits = binary_model(**example).logits
    binary_pred = logits.argmax(dim=-1).item()

    if binary_pred == 1:
        return 2  # Clear Non-Reply

    with torch.no_grad():
        logits = fine_model(**example).logits
    fine_pred = logits.argmax(dim=-1).item()
    return fine_pred

def plot_cm(trainer, dataset, display_labels):
    predictions = trainer.predict(dataset)
    preds = np.argmax(predictions.predictions, axis=-1)
    labels = predictions.label_ids
    ConfusionMatrixDisplay.from_predictions(labels, preds, display_labels=display_labels)
    plt.show()

plot_cm(trainer_binary, binary_test, ["Clear Reply / Ambivalent", "Clear Non-Reply"])
plot_cm(trainer_fine, test_fine, ["Clear Reply", "Ambivalent"])