### Required Installs

In [None]:
!pip install transformers datasets evaluate accelerate peft

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: peft
Successfully installed peft-0.11.1


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import evaluate
from accelerate import Accelerator
from peft import get_peft_model, LoraConfig, TaskType

# Step 1: Initializing Accelerator
accelerator = Accelerator()

# Step 2: Loading the dataset
dataset = load_dataset("imdb")

# Step 3: Loading the tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Step 4: Preprocessing the dataset
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

encoded_dataset = dataset.map(preprocess_function, batched=True)

# Step 5: Setting up QLoRA
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Sequence classification task
    inference_mode=False,
    r=8,  # Low-rank dimension
    lora_alpha=16,  # Scaling factor for low-rank updates
    lora_dropout=0.1,  # Dropout for low-rank updates
    target_modules=["q_lin", "v_lin", "k_lin", "out_lin"],  # Specifing target modules for DistilBERT
)
model = get_peft_model(model, peft_config)

# Step 6: Defining training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Ensuring evaluation and save strategies match
    save_strategy="epoch",  # Ensuring evaluation and save strategies match
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    push_to_hub=False,
    fp16=True,  # Enabling mixed precision training
)

# Step 7: Defining the evaluation metric
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    return accuracy_metric.compute(predictions=preds, references=p.label_ids)

# Step 8: Initializing Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"].shuffle(seed=42).select(range(10000)),  # Reduce dataset size for quicker training
    eval_dataset=encoded_dataset["test"].shuffle(seed=42).select(range(5000)),    # Reduce dataset size for quicker evaluation
    compute_metrics=compute_metrics
)

# Step 9: Preparing the trainer with the accelerator
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
    trainer.model, trainer.optimizer, trainer.get_train_dataloader(), trainer.get_eval_dataloader()
)

# Step 10: Training the model
trainer.train()

# Step 11: Evaluating the model
eval_results = trainer.evaluate()
print(eval_results)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3769,0.396927,0.8202
2,0.3552,0.37726,0.8324
3,0.5117,0.373403,0.8342


{'eval_loss': 0.3734031617641449, 'eval_accuracy': 0.8342, 'eval_runtime': 7.5586, 'eval_samples_per_second': 661.495, 'eval_steps_per_second': 41.41, 'epoch': 3.0}
