In [1]:
!pip install transformers datasets accelerate
!pip install transformers peft torch datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

##Load SNLI Dataset and Sample Data

In [2]:
from datasets import load_dataset

snli_dataset = load_dataset("snli")

# Prepare the training, testing, and validation subsets
train_data = snli_dataset["train"].select([i for i in range(0, 550000, 550)][:1000])
test_data = snli_dataset["test"].select([i for i in range(0, 10000, 100)][:100])
val_data = snli_dataset["validation"].select([i for i in range(0, 10000, 100)][:100])

print(f"Training set size: {len(train_data)}")
print(f"Testing set size: {len(test_data)}")
print(f"Validation set size: {len(val_data)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/412k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/413k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/19.6M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/550152 [00:00<?, ? examples/s]

Training set size: 1000
Testing set size: 100
Validation set size: 100


##Load Model and set up QLoRA Config

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType

# Load the Phi2 model and tokenizer
model_name = "microsoft/phi-2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

# Set up QLoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

# Wrap the model with QLoRA for parameter-efficient fine-tuning
model = get_peft_model(model, lora_config)

print("Model and tokenizer are ready for fine-tuning with QLoRA.")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of PhiForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model and tokenizer are ready for fine-tuning with QLoRA.


##Pre-processing

In [None]:
import torch
from torch.utils.data import DataLoader
from transformers import AdamW, get_scheduler

# Prepare the dataset for training
# tokenizes the premise and hypothesis, truncating and padding to a maximum length of 128 tokens
def preprocess_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding='max_length', max_length=128)

train_encoded = train_data.map(preprocess_function, batched=True)
val_encoded = val_data.map(preprocess_function, batched=True)

# Make sure to include 'label' in the return
train_encoded.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_encoded.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

train_dataloader = DataLoader(train_encoded, shuffle=True, batch_size=8)
val_dataloader = DataLoader(val_encoded, batch_size=8)

# Set up the optimizer and learning rate
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 5
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)




In [None]:
del train_encoded, val_encoded  # After loading the data into DataLoader
torch.cuda.empty_cache()  # Clear the GPU cache

##Training

In [None]:
from torch.cuda.amp import autocast

for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['label'].to(model.device)

        optimizer.zero_grad()
        with autocast():  # Enable mixed precision
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

    # Save the model after each epoch
    model.save_pretrained(f"phi2_finetuned_epoch_{epoch + 1}")
    tokenizer.save_pretrained(f"phi2_finetuned_epoch_{epoch + 1}")

    print(f"Epoch {epoch + 1} completed. Model saved.")


  with autocast():  # Enable mixed precision


OR


In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        # Move batch elements to the correct device
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['label'].to(model.device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

    # Save the model after each epoch
    model.save_pretrained(f"phi2_finetuned_epoch_{epoch + 1}")
    tokenizer.save_pretrained(f"phi2_finetuned_epoch_{epoch + 1}")

    print(f"Epoch {epoch + 1} completed. Model saved.")

print("Training complete. Final model saved.")

##Evaluate both Model

In [None]:
# Load the pretrained model
from transformers import AutoModelForSequenceClassification, AutoTokenizer

pretrained_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
pretrained_model.to(model.device)

NameError: name 'model_name' is not defined

In [None]:
#Evaluate the pretrained and fine-tuned model
from sklearn.metrics import accuracy_score
import time

def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(model.device)
            attention_mask = batch['attention_mask'].to(model.device)
            labels = batch['label'].to(model.device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            preds = torch.argmax(logits, dim=-1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return accuracy_score(true_labels, predictions)

# Evaluate the pretrained model
start_time = time.time()
pretrained_accuracy = evaluate_model(pretrained_model, val_dataloader)
pretrained_time = time.time() - start_time

# Evaluate the fine-tuned model
start_time = time.time()
fine_tuned_accuracy = evaluate_model(model, val_dataloader)
fine_tuned_time = time.time() - start_time

print(f"Pretrained Model Accuracy: {pretrained_accuracy} and Time Taken: {pretrained_time}")
print(f"Fine-tuned Model Accuracy: {fine_tuned_accuracy} and Time Taken: {fine_tuned_time}")

NameError: name 'pretrained_model' is not defined

In [None]:
#Total Parameters and Fine-Tuned Parameters
total_params = sum(p.numel() for p in model.parameters())
fine_tuned_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params}")
print(f"Fine-tuned Parameters: {fine_tuned_params}")

NameError: name 'model' is not defined

##Analyzing Failure Cases

In [None]:
import torch

def analyze_failures(pretrained_model, fine_tuned_model, dataloader):
    pretrained_model.eval()
    fine_tuned_model.eval()

    corrected_cases = []  # Cases where the fine-tuned model corrects the pretrained model's failure
    uncorrected_cases = []  # Cases where the fine-tuned model fails despite the pretrained model's success
    both_failed_cases = []  # Cases where both models fail

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(fine_tuned_model.device)
            attention_mask = batch['attention_mask'].to(fine_tuned_model.device)
            labels = batch['label'].to(fine_tuned_model.device)

            # Get predictions from both models
            pretrained_outputs = pretrained_model(input_ids=input_ids, attention_mask=attention_mask)
            fine_tuned_outputs = fine_tuned_model(input_ids=input_ids, attention_mask=attention_mask)

            pretrained_preds = torch.argmax(pretrained_outputs.logits, dim=-1)
            fine_tuned_preds = torch.argmax(fine_tuned_outputs.logits, dim=-1)

            for i in range(len(labels)):
                # If pretrained model fails but fine-tuned model succeeds
                if pretrained_preds[i] != labels[i] and fine_tuned_preds[i] == labels[i]:
                    corrected_cases.append({
                        "premise": batch['premise'][i],
                        "hypothesis": batch['hypothesis'][i],
                        "pretrained_pred": pretrained_preds[i].item(),
                        "fine_tuned_pred": fine_tuned_preds[i].item(),
                        "label": labels[i].item()
                    })
                # If pretrained model succeeds but fine-tuned model fails
                elif pretrained_preds[i] == labels[i] and fine_tuned_preds[i] != labels[i]:
                    uncorrected_cases.append({
                        "premise": batch['premise'][i],
                        "hypothesis": batch['hypothesis'][i],
                        "pretrained_pred": pretrained_preds[i].item(),
                        "fine_tuned_pred": fine_tuned_preds[i].item(),
                        "label": labels[i].item()
                    })
                # If both models fail
                elif pretrained_preds[i] != labels[i] and fine_tuned_preds[i] != labels[i]:
                    both_failed_cases.append({
                        "premise": batch['premise'][i],
                        "hypothesis": batch['hypothesis'][i],
                        "pretrained_pred": pretrained_preds[i].item(),
                        "fine_tuned_pred": fine_tuned_preds[i].item(),
                        "label": labels[i].item()
                    })

    return corrected_cases, uncorrected_cases, both_failed_cases

# Analyze failure cases
corrected_failures, uncorrected_failures, both_failed = analyze_failures(pretrained_model, fine_tuned_model, val_dataloader)
print(f"Total failure cases corrected by fine-tuned model: {len(corrected_failures)}")
print(f"Total uncorrected failure cases: {len(uncorrected_failures)}")
print(f"Total cases where both models failed: {len(both_failed)}")

NameError: name 'pretrained_model' is not defined