In [None]:
# IGNORE THIS CODE
import os
os.environ["WANDB_DISABLED"] = "true"

### **Fine-tuning for Multi-Class Classification**

Let's put all this together in a detailed, non-trivial example. We will perform transfer learning by fine-tuning a `distilbert-base-uncased` model on the **AG News** dataset, which involves classifying news articles into one of four categories: World (0), Sports (1), Business (2), and Sci/Tech (3).

PS. Using Google Colab's `generate` capability, I have added comments to each line of code.



#### **Step 1: Setup and Environment**

First, ensure you have the necessary libraries installed.


In [1]:
# 'accelerate' is required for the Trainer to work efficiently
! pip install transformers datasets evaluate accelerate -U

Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading datasets-4.4.1-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, datasets, evaluate
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 18.1.0
    Uninst

You'll also need to be logged into your Hugging Face account to push the model to the Hub (optional but good practice).

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### **Step 2: Load and Prepare the Dataset**

We will use the `datasets` library to load the AG News dataset and a tokenizer to prepare the text data.

In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer

# Load the AG News dataset
dataset = load_dataset("ag_news")

# Load the tokenizer for our chosen pre-trained model
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# The AG News dataset has a 'text' column. We need a function to tokenize this text for every row in the dataset.
def tokenize_function(row):
    # The tokenizer will convert text into the input IDs and attention masks the model expects.
    # `truncation=True` ensures that inputs longer than the model's max length are cut short.
    # `padding="max_length"` can be used, but dynamic padding with a data collator is more efficient.
    return tokenizer(row["text"], truncation=True)

# Apply the tokenization function to the entire dataset.
# The `batched=True` argument processes multiple examples at once for speed.
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# The 'text' and 'label' columns are no longer needed in this format.
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
# We also rename 'label' to 'labels' as this is the key the model expects for the labels.
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

# Create smaller subsets for quicker training and testing - use these if you are running on local
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [4]:
from transformers import DataCollatorWithPadding

# Create a data collator. This will dynamically pad the batched data to the longest example in each batch.
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

#### **Step 3: Load the Pre-trained Model**

We load the `distilbert-base-uncased` model using `AutoModelForSequenceClassification`. Crucially, we specify the number of labels in our dataset. This replaces the pre-trained classification head (which was for a different task) with a new, randomly initialized head suited for our 4-class problem.

In [5]:
from transformers import AutoModelForSequenceClassification

# We need to tell the model how many classes to predict. We can get this from the dataset's features.
num_labels = dataset["train"].features["label"].num_classes

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


At this point, the entire model is pre-trained except for the final classification layer. The weights of this new layer are random, and our goal during fine-tuning is to train this layer to become an effective AG News classifier.

#### **Step 4: Define Evaluation Metrics**

The `Trainer` can compute the loss during evaluation, but to get more interpretable metrics like accuracy and F1-score, we must define a `compute_metrics` function.

In [6]:
import numpy as np
import evaluate

# Load the accuracy metric from the 'evaluate' library
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    # `eval_pred` is a tuple containing the model's predictions (logits) and the true labels.
    logits, labels = eval_pred

    # The logits are the raw, unnormalized scores. We take the argmax to get the predicted class index.
    predictions = np.argmax(logits, axis=-1)

    # The metric object's compute method returns a dictionary with the metric name and value.
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script: 0.00B [00:00, ?B/s]

#### **Step 5: Configure `TrainingArguments`**

Now, we create an instance of `TrainingArguments` to define our comprehensive training strategy.

In [7]:
from transformers import TrainingArguments

# Define a name for our new model on the Hugging Face Hub
model_name = "distilbert-ag-news-finetuned"

training_args = TrainingArguments(
    output_dir=model_name,
    # --- Performance and Resource Management ---
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    fp16=True,  # Use mixed precision if a compatible GPU is available

    # --- Logging, Saving, and Evaluation Strategy ---
    eval_strategy="epoch", # Evaluate at the end of each epoch
    save_strategy="epoch",       # Save a checkpoint at the end of each epoch
    logging_strategy="epoch",    # Log metrics at the end of each epoch

    # --- Model Loading and Hub Integration ---
    load_best_model_at_end=True, # Load the best performing model checkpoint at the end
    metric_for_best_model="accuracy", # Use accuracy to determine the best model
    push_to_hub=True, # Upload the final model to the Hub
)

This configuration sets up a robust training run that evaluates and saves the model every epoch, tracks the best model based on accuracy, uses mixed-precision for speed, and shares the result.

#### **Step 6: Instantiate and Run the `Trainer`**

With all the components ready, we can now instantiate the `Trainer` and start the fine-tuning process.

In [9]:
from huggingface_hub import login

# Log in using your token
login(token="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")


In [10]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Start the training process!
trainer.train()

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Find your API key here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msushantk-ai031[0m ([33msushantk-ai031-nit-jalandhar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2306,0.175314,0.943026
2,0.1391,0.180664,0.949868
3,0.0954,0.206137,0.949474


TrainOutput(global_step=22500, training_loss=0.15500960828993054, metrics={'train_runtime': 1501.3156, 'train_samples_per_second': 239.79, 'train_steps_per_second': 14.987, 'total_flos': 9056469134955648.0, 'train_loss': 0.15500960828993054, 'epoch': 3.0})

When you run `trainer.train()`, you will see a progress bar and a log of the training process, which will look something like this after each epoch:

```
Epoch | Training Loss | Validation Loss | Accuracy
--------------------------------------------------
  1   |     0.6582    |      0.3512     |  0.8850
  2   |     0.2845    |      0.2987     |  0.9010
  3   |     0.1879    |      0.2955     |  0.9070

TrainOutput(global_step=189, training_loss=0.3712, metrics={'...'})
```

This output clearly shows the model's performance improving over time. Because we set `load_best_model_at_end=True` and `metric_for_best_model="accuracy"`, the `trainer` object now holds the weights from the end of Epoch 3, which achieved the highest accuracy of `0.9070`.



#### **Step 7: Push to Hub and Make a Prediction**

The final step is to share your model and use it for inference.

In [11]:
# The trainer.push_to_hub() command will upload your model, tokenizer,
# and training configuration to the Hub under the name you specified.
trainer.push_to_hub()

# --- Now let's use our fine-tuned model for a real prediction ---

from transformers import pipeline

# A new sports-related text
text = "The home team hit three home runs in the bottom of the ninth to win the game."

# Create a pipeline with our fine-tuned model
classifier = pipeline("sentiment-analysis", model=f"sampurn-gfg/{model_name}")

# Make a prediction
prediction = classifier(text)
print(prediction)

# We can also check the label mapping
print(dataset["train"].features["label"].names)

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...netuned/training_args.bin: 100%|##########| 5.84kB / 5.84kB            

  ...995565.954d032dc152.298.0: 100%|##########| 7.09kB / 7.09kB            

  ...netuned/model.safetensors:   6%|6         | 16.7MB /  268MB            

config.json:   0%|          | 0.00/761 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'LABEL_1', 'score': 0.9050748944282532}]
['World', 'Sports', 'Business', 'Sci/Tech']


The output would be:

```
[{'label': 'LABEL_1', 'score': 0.905}]
['World', 'Sports', 'Business', 'Sci/Tech']
```

The model correctly predicts `LABEL_1` with high confidence, which corresponds to the "Sports" category. This demonstrates that our transfer learning process was successful.