<a href="https://colab.research.google.com/github/makifcevik/python-notebooks/blob/main/NewsClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install torch evaluate datasets transformers huggingface_hub
!pip install --upgrade fsspec datasets transformers evaluate

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Collecting fsspec
  Using cached fsspec-2025.5.0-py3-none-any.whl.metadata (11 kB)


In [1]:
import torch, transformers
print(torch.__version__, transformers.__version__)

2.6.0+cu124 4.52.3


In [28]:
from transformers import BertTokenizer, AutoTokenizer, AutoModelForSequenceClassification, BertForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import evaluate

In [14]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }

In [21]:
def train_model(model_name, dataset, num_labels=None, epochs=1, batch_size=16):
    """
    Args:
        model_name: HF model path or custom model
        dataset: Must have 'train' and 'test' splits
        num_labels: Auto-detected if None
        epochs: Training epochs
        batch_size: Per-device batch size
    Returns:
        metrics: Dictionary of evaluation metrics
        trainer: Trainer object for further analysis
    """
    # Auto-detect num_labels if not specified
    if num_labels is None:
        num_labels = len(set(dataset["train"]["label"]))

    # Initialize tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels,
        ignore_mismatched_sizes=True
    )

    # Tokenization with proper text/label columns
    def tokenize(batch):
        return tokenizer(
            batch["text"],
            padding="max_length",
            truncation=True,
            max_length=128
        )

    dataset = dataset.map(tokenize, batched=True)
    dataset = dataset.rename_column("label", "labels")  # HF expects 'labels'

    # Data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{model_name.replace('/', '-')}",
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_dir="./logs",
        logging_steps=100,
        load_best_model_at_end=True,
        report_to="none",
        optim="adamw_torch",  # Better optimizer
        fp16=True,  # Enable mixed precision if GPU available
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        compute_metrics=compute_metrics,
        data_collator=data_collator,
    )

    # Training and evaluation
    trainer.train()
    metrics = trainer.evaluate()

    return metrics, trainer  # Return both metrics and trainer object

In [30]:
# Utility function to load a small portion of the dataset
def load_mini_dataset(dataset_name, sample_percent=1, seed=42):
    # Load full dataset
    full_dataset = load_dataset(dataset_name)

    # Create sampled subsets
    mini_dataset = DatasetDict({
        "train": full_dataset["train"].shuffle(seed=seed).select(
            range(int(len(full_dataset["train"]) * sample_percent // 100))
        ),
        "test": full_dataset["test"].shuffle(seed=seed).select(
            range(int(len(full_dataset["test"]) * sample_percent // 100))
        )
    })

    return mini_dataset

In [33]:
# Different datasets
datasets = {
    "ag_news": load_mini_dataset("ag_news", sample_percent=5),
    "bbc": load_mini_dataset("SetFit/bbc-news", sample_percent=5),
    "20news": load_mini_dataset("SetFit/20_newsgroups", sample_percent=5)
}

# Models with different architectures
models = [
    "distilbert-base-uncased", # Transformer (Small)
    "huawei-noah/TinyBERT_General_4L_312D", # Transformer (Tiny)
    "roberta-base" # Transformer (Big)
]

results = {}
for ds_name, dataset in datasets.items():
    results[ds_name] = {}
    for model_name in models:
        metrics, _ = train_model(model_name, dataset, epochs=2)
        results[ds_name][model_name] = metrics

Repo card metadata block was not found. Setting CardData to empty.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/380 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.3197,0.336764,0.886842,0.88843
2,0.1585,0.32968,0.9,0.900782


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/380 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.5309,0.503374,0.839474,0.839212
2,0.3274,0.489921,0.847368,0.847202


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/380 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.3707,0.360785,0.881579,0.881856
2,0.1747,0.343762,0.897368,0.898524


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.542266,0.28,0.179577
2,No log,1.490176,0.4,0.313629


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.608379,0.2,0.074576
2,No log,1.606562,0.22,0.079344


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.605742,0.32,0.18124
2,No log,1.582305,0.3,0.208185


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/565 [00:00<?, ? examples/s]

Map:   0%|          | 0/376 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,2.608339,0.319149,0.229804
2,No log,2.395048,0.340426,0.261717


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/565 [00:00<?, ? examples/s]

Map:   0%|          | 0/376 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,2.992614,0.042553,0.003474
2,No log,2.977856,0.055851,0.013834


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/565 [00:00<?, ? examples/s]

Map:   0%|          | 0/376 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,2.673898,0.220745,0.159154
2,No log,2.172055,0.396277,0.350122


In [16]:
!pip install prettytable



In [34]:
from prettytable import PrettyTable

def print_results_table(results):
    for dataset_name, model_results in results.items():
        table = PrettyTable()
        table.title = f"{dataset_name.upper()} Performance"
        table.field_names = ["Model", "Accuracy", "F1-Score", "Loss", "Time (s)"]

        for model_name, metrics in model_results.items():
            short_name = model_name.split('/')[-1][:15]  # Truncate long names
            table.add_row([
                short_name,
                f"{metrics.get('eval_accuracy', 0):.4f}",
                f"{metrics.get('eval_f1', 0):.4f}",
                f"{metrics.get('eval_loss', 0):.4f}",
                f"{metrics.get('eval_runtime', 0):.1f}"
            ])

        print(table)
        print("\n")

print_results_table(results)

+-----------------------------------------------------------+
|                    AG_NEWS Performance                    |
+-----------------+----------+----------+--------+----------+
|      Model      | Accuracy | F1-Score |  Loss  | Time (s) |
+-----------------+----------+----------+--------+----------+
| distilbert-base |  0.9000  |  0.9008  | 0.3297 |   0.5    |
| TinyBERT_Genera |  0.8474  |  0.8472  | 0.4899 |   0.3    |
|   roberta-base  |  0.8974  |  0.8985  | 0.3438 |   0.8    |
+-----------------+----------+----------+--------+----------+


+-----------------------------------------------------------+
|                      BBC Performance                      |
+-----------------+----------+----------+--------+----------+
|      Model      | Accuracy | F1-Score |  Loss  | Time (s) |
+-----------------+----------+----------+--------+----------+
| distilbert-base |  0.4000  |  0.3136  | 1.4902 |   0.1    |
| TinyBERT_Genera |  0.2200  |  0.0793  | 1.6066 |   0.1    |
|   ro