In [2]:
import torch

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig

In [3]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


Load pretrained `flan-t5`.

In [4]:
model_name='google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [5]:
pretrained_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

Use a tokenizer.

In [6]:
sentence = "What time is it, Tom?"

sentence_encoded = tokenizer(sentence, return_tensors='pt')

sentence_decoded = tokenizer.decode(
        sentence_encoded["input_ids"][0], 
        skip_special_tokens=True
    )

print('ENCODED SENTENCE:')
print(sentence_encoded["input_ids"][0])
print('\nDECODED SENTENCE:')
print(sentence_decoded)

ENCODED SENTENCE:
tensor([ 363,   97,   19,   34,    6, 3059,   58,    1])

DECODED SENTENCE:
What time is it, Tom?


In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(pretrained_model))

trainable model parameters: 247577856
all model parameters: 247577856
percentage of trainable model parameters: 100.00%


#### **Setup the PEFT/LoRA model for fine-tuning**

In [8]:
# Move model to GPU if available
pretrained_model = pretrained_model.to(device)

# Freeze the model parameters
for param in pretrained_model.parameters():
    param.requires_grad = False

In [9]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import BitsAndBytesConfig

lora_config = LoraConfig(
    r=8, # rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.1,
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [10]:
peft_model = get_peft_model(pretrained_model, lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 884736
all model parameters: 248462592
percentage of trainable model parameters: 0.36%


In [11]:
import json
from datasets import Dataset
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer, pipeline

In [12]:
with open("../data/dataset_blip_large.json") as f:
    dataset = json.load(f)

In [13]:
dataset = Dataset.from_dict({
    "prompt": [item["prompt"] for item in dataset],
    "response": [item["response"] for item in dataset]
})

In [14]:
def preprocess_function(example):
    start_prompt = "Provide a description of the YouTube thumbnail given the following video title.\n\n"
    end_prompt = "\n\nThumbnail description: "
    prompt = [start_prompt + title + end_prompt for title in example["prompt"]]
    example["input_ids"] = tokenizer(prompt, padding="max_length", truncation=True, max_length=128).input_ids
    example["labels"] = tokenizer(example["response"], padding="max_length", truncation=True, max_length=128).input_ids

    return example

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["prompt", "response"])

                                                                  

In [15]:
import time
import torch
from transformers import Trainer, TrainingArguments, Seq2SeqTrainer, Seq2SeqTrainingArguments

In [16]:
output_dir = f'../results/peft-thumbnail-description-training-{str(int(time.time()))}'

# peft_training_args = TrainingArguments(
#     output_dir=output_dir,
#     auto_find_batch_size=False,
#     per_device_train_batch_size=4,
#     per_device_eval_batch_size=4,
#     learning_rate=1e-3, # Higher learning rate than full fine-tuning.
#     num_train_epochs=3,
#     logging_steps=1,
#     max_steps=1    
# )

peft_training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    predict_with_generate=True,
    fp16=True,
    push_to_hub=False,
    gradient_accumulation_steps=4,
)
    
# peft_trainer = Trainer(
#     model=peft_model,
#     args=peft_training_args,
#     train_dataset=tokenized_dataset,
# )

peft_trainer = Seq2SeqTrainer(
    model=pretrained_model,
    args=peft_training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


#### **Train the model**

In [17]:
torch.cuda.empty_cache()

peft_trainer.train()

peft_model_path="../models/peft-thumbnail-description-checkpoint-local"

# peft_trainer.model.save_pretrained(peft_model_path)
# tokenizer.save_pretrained(peft_model_path)
peft_trainer.save_model(peft_model_path)

  8%|▊         | 80/1026 [07:01<1:19:24,  5.04s/it]