In [None]:

pip install peft


In [None]:
pip install datasets

In [None]:
pip install transformers

In [1]:
# Loading the model
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Evaluating the model
from datasets import load_dataset

dataset_name = "stanfordnlp/imdb"
imdb = load_dataset(dataset_name)

# Use a smaller subset of the dataset
small_train_dataset = imdb["train"].shuffle(seed=42).select(range(100))  # Further reduce to 100 samples for training
small_test_dataset = imdb["test"].shuffle(seed=42).select(range(20))    # Further reduce to 20 samples for testing

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

tokenized_train_dataset = small_train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = small_test_dataset.map(tokenize_function, batched=True)

# Convert datasets to PyTorch format
tokenized_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Evaluate the model
from transformers import EvalPrediction
import numpy as np
import torch
from torch.utils.data import DataLoader

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": (predictions == labels).mean()}

# Dummy evaluation function since `evaluate` method doesn't exist in transformers, 
# typically you should use `trainer.evaluate` instead.
def evaluate(model, dataset, batch_size=16):
    model.eval()
    eval_dataloader = DataLoader(dataset, batch_size=batch_size)
    all_logits = []
    all_labels = []

    for batch in eval_dataloader:
        inputs = {key: val.to(model.device) for key, val in batch.items() if key in tokenizer.model_input_names}
        labels = batch["label"].to(model.device)

        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits
        all_logits.append(logits.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

    all_logits = np.concatenate(all_logits, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return compute_metrics((all_logits, all_labels))

eval_results = evaluate(model, tokenized_test_dataset)
print(f"Initial Evaluation: {eval_results}")

# Performing Parameter-Efficient Fine-Tuning
from peft import LoraConfig, get_peft_model

# Creating a PEFT config
peft_config = LoraConfig(
    task_type="SEQUENCE_CLASSIFICATION",
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"],  # Adjust to match DistilBERT architecture
)

# Creating a PEFT model
peft_model = get_peft_model(model, peft_config)

# Training the model
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,  # Reduce batch size
    per_device_eval_batch_size=4,   # Reduce batch size
    num_train_epochs=1,             # Reduce the number of epochs
    weight_decay=0.01,
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

# Saving the trained model
peft_model.save_pretrained("./peft_model")

# Performing Inference with a PEFT Model
# Loading the model
peft_model = AutoModelForSequenceClassification.from_pretrained("./peft_model", trust_remote_code=True)

# Evaluating the model
eval_results = evaluate(peft_model, tokenized_test_dataset)
print(f"PEFT Model Evaluation: {eval_results}")


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Initial Evaluation: {'accuracy': 0.5}




  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'eval_runtime': 3.16, 'eval_samples_per_second': 6.329, 'eval_steps_per_second': 1.582, 'epoch': 1.0}
{'train_runtime': 32.0863, 'train_samples_per_second': 3.117, 'train_steps_per_second': 0.779, 'train_loss': 0.6906497955322266, 'epoch': 1.0}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PEFT Model Evaluation: {'accuracy': 0.45}
