In [None]:
# Install Libraries

!pip install transformers datasets --quiet
!pip install -U transformers datasets accelerate

In [None]:
## Import all necessary libraries

import os
import pandas as pd
import numpy as np
import transformers
import torch
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset
from datasets import ClassLabel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.metrics import confusion_matrix
os.environ["WANDB_DISABLED"] = "true"
print(transformers.__version__)

In [None]:
## Upload the dataset

from google.colab import files
uploaded = files.upload()

In [None]:
## Load the dataset

df = pd.read_csv("bbc_news_text_complexity_summarization.csv", delimiter=',')
df.head()

In [None]:
## Convert to HuggingFace Dataset

dataset = Dataset.from_pandas(df)
dataset

In [None]:
## Encode label strings
# Create a ClassLabel object with all label names

class_label = ClassLabel(names=df["labels"].unique().tolist())

def encode_labels(example):
    example["labels"] = class_label.str2int(example["labels"])
    return example

dataset = dataset.map(encode_labels)


In [None]:
## Create the tokenizer

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")


In [None]:
# Tokenize text

#Tokenization includes: lowercasing, splitting into wordpieces, adding attention masks, and truncation/padding to max length

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

dataset = dataset.map(tokenize, batched=True)


In [None]:
## Train-Test Split

dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_ds = dataset["train"]
test_ds = dataset["test"]


In [None]:
## Set tensor format

train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


In [None]:
## Define DistilBERT classification model

num_classes = len(class_label.names)

model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=num_classes
)


In [None]:
## Training

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=15,
    weight_decay=0.01,
    report_to="none",           # disable W&B
)


In [None]:
# Compute accuracy, precision, recall, and F1 (macro) for model predictions

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)

    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='macro'
    )

    return {
        "accuracy": accuracy,
        "precision_macro": precision,
        "recall_macro": recall,
        "f1_macro": f1
    }

In [None]:
# Trainer object

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


In [None]:
## Train

trainer.train()

In [18]:
results = trainer.evaluate()
print(results)

{'eval_loss': 0.14586851000785828, 'eval_accuracy': 0.971830985915493, 'eval_precision_macro': 0.9718320079539117, 'eval_recall_macro': 0.9704769868229496, 'eval_f1_macro': 0.9709956723738122, 'eval_runtime': 2.0238, 'eval_samples_per_second': 210.491, 'eval_steps_per_second': 13.341, 'epoch': 15.0}


In [19]:
## Predict on test samples

preds_output = trainer.predict(test_ds)       # Predict on test set
y_true = preds_output.label_ids               # True labels
y_pred = np.argmax(preds_output.predictions, axis=1)  # Predicted labels

# Accuracy
accuracy = accuracy_score(y_true, y_pred)

# Macro Precision, Recall, F1
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro): {recall:.4f}")
print(f"F1 (macro): {f1:.4f}")

Test Accuracy: 0.9718
Precision (macro): 0.9718
Recall (macro): 0.9705
F1 (macro): 0.9710
