# Fine tune a Hugging Face model on a custom dataset

In [1]:
%pip install datasets transformers torch accelerate


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Users/mihirkurdekar/PycharmProjects/model fine tuning/venv/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
!where python3

/opt/homebrew/bin/python3
/Library/Frameworks/Python.framework/Versions/3.10/bin/python3
/usr/local/bin/python3
/usr/bin/python3


In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load custom sample data
dataset = load_dataset("json", data_files="./data/sample_training_data.jsonl", split="train")

# Map completion to label
def label_map(example):
    example["label"] = 1 if example["completion"] == "positive" else 0
    return example

dataset = dataset.map(label_map)

# Split dataset
dataset = dataset.train_test_split(test_size=0.2)


In [5]:
# Load tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# Tokenize prompt field
def preprocess(examples):
    return tokenizer(examples["prompt"], truncation=True, padding="max_length", max_length=128)

tokenized = dataset.map(preprocess, batched=True)


Map: 100%|██████████| 4/4 [00:00<00:00, 803.51 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 615.00 examples/s]


In [7]:
# Training arguments
args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=10,
    logging_steps=1,
    save_steps=5,
)

# Trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
)

# Train
trainer.train()



Step,Training Loss
1,0.8185
2,0.7393
3,0.6706
4,0.5885
5,0.624
6,0.5139
7,0.6622
8,0.4761
9,0.4183
10,0.5203




TrainOutput(global_step=20, training_loss=0.46026868745684624, metrics={'train_runtime': 2.9277, 'train_samples_per_second': 13.663, 'train_steps_per_second': 6.831, 'total_flos': 1324673986560.0, 'train_loss': 0.46026868745684624, 'epoch': 10.0})

In [8]:
# Test: Run inference on a sample prompt
test_prompt = "Classify the following review as positive or negative: 'The service was terrible.'"
device = torch.device("cpu")  # Force CPU to avoid MPS errors
model.to(device)
inputs = tokenizer(test_prompt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model(**inputs)
predicted_label = outputs.logits.argmax(dim=1).item()
label_str = "positive" if predicted_label == 1 else "negative"
print(f"Test prompt: {test_prompt}")
print(f"Predicted label: {label_str}")
print(f"Predicted label: {label_str}")

Test prompt: Classify the following review as positive or negative: 'The service was terrible.'
Predicted label: negative
Predicted label: negative
