<a href="https://colab.research.google.com/github/luknda/q-lora-bert-imdb/blob/master/GenAI_PR2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers
!pip install datasets
!pip install peft
!pip install torch
!pip install bitsandbytes

In [None]:
import torch
from transformers import BertForSequenceClassification, BertTokenizerFast, Trainer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training, AutoPeftModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
# 1. Load a pre-trained model and evaluate it
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # print(f"Real labels: {labels.tolist()}")  # Used for testing
    # print(f"Predicted labels: {preds.tolist()}")  # Used for testing
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def load_and_evaluate_model(model_name, dataset_name):

    dataset = load_dataset(dataset_name)

    # Loading model and tokenizer
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
    tokenizer = BertTokenizerFast.from_pretrained(model_name)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Sample dataset, too long otherwise and Im running low on compute units sadly
    train_dataset = tokenized_datasets["train"].shuffle(seed=41).select(range(10000)) # Not really used here
    eval_dataset = tokenized_datasets["test"].shuffle(seed=41).select(range(2000))

    # Trainer stuff, training args dont really matter here
    training_args = TrainingArguments(
        output_dir="./results_b",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir='./logs',
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    results = trainer.evaluate()

    return model, tokenizer, results


In [None]:
# 2. Perform lightweight fine-tuning using a pre-trained model
def fine_tune_with_lora(model, tokenizer, dataset_name, save_directory):

    dataset = load_dataset(dataset_name)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    train_dataset = tokenized_datasets["train"].shuffle(seed=41).select(range(10000))
    eval_dataset = tokenized_datasets["test"].shuffle(seed=41).select(range(2000))

    # LoRA config
    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
    )

    peft_model = get_peft_model(model, peft_config)

    peft_model.print_trainable_parameters()

    # Trainer stuff
    training_args = TrainingArguments(
        output_dir="./results_f",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir='./logs_f',
        logging_steps=10,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    # print(trainer.state.log_history)
    # results = trainer.evaluate()

    # Save stuff
    try:
        peft_model.save_pretrained(save_directory)
        print("Model and tokenizer saved successfully.")

    except Exception as e:
        print(f"Error saving model and tokenizer: {e}")

    return peft_model

In [None]:
# 3. Perform lightweight fine-tuning (with quantization) using a pre-trained model
def fine_tune_with_lora_q(model_name, tokenizer, dataset_name, save_directory):

    dataset = load_dataset(dataset_name)

    # Q
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16  # input type is torch.float16, default seems to be torch.float32
    )

    model_q = BertForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        quantization_config=quantization_config
    )

    # Prepare quantized model for peft training
    model_q = prepare_model_for_kbit_training(model_q)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    train_dataset = tokenized_datasets["train"].shuffle(seed=41).select(range(10000))
    eval_dataset = tokenized_datasets["test"].shuffle(seed=41).select(range(2000))

    # LoRA config
    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
    )

    peft_model = get_peft_model(model_q, peft_config)

    peft_model.print_trainable_parameters()

    # Trainer stuff
    training_args = TrainingArguments(
        output_dir="./results_fq",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir='./logs_fq',
        logging_steps=10,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    # print(trainer.state.log_history)
    # results = trainer.evaluate()

    # Save stuff
    try:
        peft_model.save_pretrained(save_directory)
        print("Model and tokenizer saved successfully.")

    except Exception as e:
        print(f"Error saving model and tokenizer: {e}")

    return peft_model, model_q

In [None]:
def inference_lora(model_name, tokenizer, save_directory, dataset_name):

    dataset = load_dataset(dataset_name)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    train_dataset = tokenized_datasets["train"].shuffle(seed=41).select(range(10000))
    eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(10000))

    base_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

    peft_model = AutoPeftModelForSequenceClassification.from_pretrained(
        save_directory,
        num_labels=2
    )

    # Trainer stuff
    training_args = TrainingArguments(
        output_dir="./results_inf",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir='./logs_inf',
        logging_steps=10,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    results = trainer.evaluate()
    print("Inference done")

    return results

In [None]:
def inference_qlora(model_name, tokenizer, save_directory, dataset_name):

    dataset = load_dataset(dataset_name)

    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    train_dataset = tokenized_datasets["train"].shuffle(seed=41).select(range(10000))
    eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(10000))

    # Q
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load
    peft_model = AutoPeftModelForSequenceClassification.from_pretrained(
        save_directory,
        quantization_config=quantization_config,
        num_labels=2
    )

    # Trainer stuff
    training_args = TrainingArguments(
        output_dir="./results_inf_q",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir='./logs_inf_q',
        logging_steps=10,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    results = trainer.evaluate()
    print("Inference done")

    return results

In [None]:
# Main
if __name__ == "__main__":
    model_name = "bert-base-uncased"
    dataset_name = "imdb"
    save_directory = "./my_fine_tuned_lora_model"
    save_directory_q = "./my_fine_tuned_lora_model_q"

    # 1. Load and evaluate bert on imdb dataset for sequence classification.
    original_model, tokenizer, original_results = load_and_evaluate_model(model_name, dataset_name)
    print("Original model performance:", original_results)

    # 2. Fine-tune bert with LoRA on imdb dataset for sequence classification, also run some evaluations
    fine_tuned_model = fine_tune_with_lora(original_model, tokenizer, dataset_name, save_directory)
    #print("Fine-tuned model performance:", fine_tuned_results)

    # 3. Fine-tune bert with QLoRA on imdb dataset for sequence classification, also run some evaluations
    fine_tuned_model_q, model_q = fine_tune_with_lora_q(model_name, tokenizer, dataset_name, save_directory_q)
    #print("Fine-tuned (with quantization) model performance:", fine_tuned_results_q)

    # 4. Load the models, proceed with inference
    results_lora = inference_lora(model_name, tokenizer, save_directory, dataset_name)
    print(f"LoRA results: {results_lora}\n")
    results_qlora = inference_qlora(model_name, tokenizer, save_directory_q, dataset_name)
    print(f"QLoRA results: {results_qlora}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]



Original model performance: {'eval_loss': 0.6871984601020813, 'eval_accuracy': 0.549, 'eval_f1': 0.6118760757314975, 'eval_precision': 0.5465026902382782, 'eval_recall': 0.6950146627565983, 'eval_runtime': 27.8569, 'eval_samples_per_second': 71.795, 'eval_steps_per_second': 1.149}


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

trainable params: 296,450 || all params: 109,780,228 || trainable%: 0.2700




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4194,0.397217,0.843,0.836629,0.894327,0.785924
2,0.2102,0.283883,0.892,0.891239,0.919003,0.865103
3,0.2621,0.261761,0.901,0.902846,0.906404,0.899316
4,0.3395,0.253376,0.9045,0.906327,0.909449,0.903226
5,0.221,0.254157,0.905,0.906312,0.914428,0.898338


Model and tokenizer saved successfully.


`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

trainable params: 296,450 || all params: 109,780,228 || trainable%: 0.2700




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4541,0.401846,0.8585,0.85598,0.892781,0.822092




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4541,0.401846,0.8585,0.85598,0.892781,0.822092
2,0.2172,0.263442,0.8995,0.901034,0.907738,0.894428
3,0.2485,0.253255,0.8995,0.903039,0.891429,0.914956
4,0.3511,0.245795,0.9085,0.910601,0.910156,0.911046
5,0.2014,0.244071,0.907,0.909268,0.907498,0.911046




Model and tokenizer saved successfully.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Inference done
LoRA results: {'eval_loss': 0.2469293773174286, 'eval_accuracy': 0.9018, 'eval_f1': 0.9033274266587912, 'eval_precision': 0.8888027896164278, 'eval_recall': 0.9183346677341874, 'eval_runtime': 156.1975, 'eval_samples_per_second': 64.022, 'eval_steps_per_second': 1.005}



Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Inference done
QLoRA results: {'eval_loss': 0.2410888671875, 'eval_accuracy': 0.9058, 'eval_f1': 0.9080796252927401, 'eval_precision': 0.885948210205636, 'eval_recall': 0.9313450760608487, 'eval_runtime': 54.1948, 'eval_samples_per_second': 184.52, 'eval_steps_per_second': 2.897}
