sentences were taken from the book "Wonder" by R.J.Palacio

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("sentences.csv")
df.head()

Unnamed: 0,id,text,label
0,1,Dad had told me he was really proud of how I'd...,Very Positive
1,2,Mom smiled at me and her smile kind of hugged me,Very Positive
2,3,All she could see was how pretty my eyes were,Very Positive
3,4,Everyone was laughing above my head,Very Positive
4,5,I smiled even though I didn't want to let them...,Very Positive


In [3]:
label2id = {
    "Very Negative": 0,
    "Negative": 1,
    "Neutral": 2,
    "Positive": 3,
    "Very Positive": 4
}
id2label = {v: k for k, v in label2id.items()}

df["label_id"] = df["label"].map(label2id)
df.head()

Unnamed: 0,id,text,label,label_id
0,1,Dad had told me he was really proud of how I'd...,Very Positive,4
1,2,Mom smiled at me and her smile kind of hugged me,Very Positive,4
2,3,All she could see was how pretty my eyes were,Very Positive,4
3,4,Everyone was laughing above my head,Very Positive,4
4,5,I smiled even though I didn't want to let them...,Very Positive,4


In [4]:
df["label"].value_counts()

label
Positive         22
Neutral          22
Very Positive    20
Negative         19
Very Negative    17
Name: count, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

### llm

In [6]:
def to_3class(label):
    if label in ["Very Negative", "Negative"]:
        return "Negative"
    if label in ["Very Positive", "Positive"]:
        return "Positive"
    return "Neutral"

df["label_3"] = df["label"].apply(to_3class)

label2id_3 = {
    "Negative": 0,
    "Neutral": 1,
    "Positive": 2
}

df["label_3_id"] = df["label_3"].map(label2id_3)

In [7]:
print(df["label_3"].value_counts())

label_3
Positive    42
Negative    36
Neutral     22
Name: count, dtype: int64


In [8]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label_3_id"])

In [9]:
print("Rows in test_df after split:", len(test_df))

Rows in test_df after split: 20


In [10]:
#pip install requests

In [11]:
import requests

HF_TOKEN = "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"

def query_llm(text):
    url = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-xlm-roberta-base-sentiment"
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}"
    }

    response = requests.post(
        url,
        headers=headers,
        json={"inputs": text}
    )

    result = response.json()

    if isinstance(result, list):
        label = result[0][0]["label"]

        mapping = {
            "LABEL_0": "Negative",
            "LABEL_1": "Neutral",
            "LABEL_2": "Positive"
        }

        return mapping.get(label, None)

    return None


In [12]:
llm_preds = []

for sent in test_df["text"]:
    pred = query_llm(sent) 
    llm_preds.append(pred)

test_df["llm_label"] = llm_preds
test_df["llm_label_id"] = test_df["llm_label"].map(label2id)

In [13]:
test_df = test_df.dropna(subset=["llm_label_id"])

In [29]:
precision_llm = precision_score(
    test_df["label_id"],
    test_df["llm_label_id"],
    average="macro"
)

recall_llm = recall_score(
    test_df["label_id"],
    test_df["llm_label_id"],
    average="macro"
)

print("LLM Precision:", precision_llm)
print("LLM Recall:", recall_llm)

KeyError: 'llm_label_id'

### xlm roberta

In [15]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["label_id"]
)

In [17]:
model_name = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )


In [18]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(
    train_df[["text", "label_id"]]
)
test_dataset = Dataset.from_pandas(
    test_df[["text", "label_id"]]
)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_dataset = train_dataset.rename_column("label_id", "labels")
test_dataset = test_dataset.rename_column("label_id", "labels")

train_dataset.set_format("torch")
test_dataset.set_format("torch")

Map: 100%|██████████| 80/80 [00:00<00:00, 1679.23 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 1953.93 examples/s]


In [19]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=5,
    id2label=id2label,
    label2id=label2id
)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_steps=10,
    report_to="none"
)

In [21]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(
        labels,
        preds,
        average="macro",
        zero_division=0
    )

    return {
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [23]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)




  trainer = Trainer(


In [24]:
trainer.train()
results = trainer.evaluate()

print(results)



Step,Training Loss
10,1.6504
20,1.6431
30,1.6028
40,1.5457
50,1.5984




{'eval_loss': 1.5856245756149292, 'eval_precision': 0.43454545454545457, 'eval_recall': 0.32999999999999996, 'eval_f1': 0.30666666666666664, 'eval_runtime': 3.7813, 'eval_samples_per_second': 5.289, 'eval_steps_per_second': 0.793, 'epoch': 5.0}
