<a href="https://colab.research.google.com/github/manmustbecool/Experiment/blob/main/llm_finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
'''
A simple example for fine tuning LLM.
supervized learning (prompt, reponse)
optional: fine turning with LoRA with peft
optional: Partial Fine-Tuning (Adapter-based or Layer Freezing), Applies LoRA to specific layers.
'''

from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding

# Initialize tokenizer and model
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


# Sample dataset
sample_data = [
    {"prompt": "wer?", "response": "nol "},
    {"prompt": "wer wer?", "response": "nol nol nol"},
    {"prompt": "wer wer wer?", "response": "nol nol nol nol"},
]*1000

# Load dataset
dataset = Dataset.from_list(sample_data)

# Tokenization function for processing prompts and responses
def preprocess_function(example):
    tokenized = tokenizer(
        text=example["prompt"],  # Tokenizing the input prompt
        text_target=example["response"],  # Tokenizing the expected response (labels)
        truncation=True,  # sequences longer than max_length are truncated
        padding="max_length",  # Pads sequences to a fixed max_length
        max_length=64  # Defines the maximum token length for each sequence, should not exceed the model’s token limit
    )

    # Returns tokenized input IDs and labels (target responses)
    return {"input_ids": tokenized["input_ids"], "labels": tokenized["labels"]}

tokenized_dataset = dataset.map(preprocess_function, batched=True)
print(tokenized_dataset)

# Define data collator for consistent tensor shapes ????????
data_collator = DataCollatorWithPadding(tokenizer)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",            # Directory to save training results, including model checkpoints, logs, and evaluation metrics
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=4,
    weight_decay=0.01,                 # Weight decay for regularization
    save_total_limit=1,                # Limit the number of saved checkpoints
    report_to="none",                  # Disable integration with W&B
    label_names=["labels"]             # PeftModel hides the base model so need to re-specify the labels.
)

# ------------------------
# Check the model architecture to find names for attention layers, which can be use for Partial Fine-Tuning
for name, module in model.named_modules()[0:15]:
    print(name)

# Optional: LoRA reduces the number of trainable parameters, making fine-tuning efficient.
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,  # Specify the task type (causal language modeling)
    inference_mode=False,          # Set to False for training mode
    r=64,                          # Rank of the LoRA matrices (smaller for efficiency)
    lora_alpha=32,                 # Scaling factor for LoRA (Higher lora_alpha improves performance for a given rank)
    lora_dropout=0.1,              # Dropout rate for LoRA layers
    target_modules=["self_attn.q_proj", "self_attn.v_proj"]  # Applies LoRA to specific layers for Partial Fine-Tuning
)
model = get_peft_model(model, peft_config)  # Wrap the base model with the PEFT configuration
# ------------------------

# ------------------------
# Go to Runtime → Change runtime type. Select GPU from the Hardware accelerator dropdown and Click Save.
import torch
if torch.cuda.is_available():
    print("GPU Available:", torch.cuda.is_available())
    print("GPU Name:", torch.cuda.get_device_name(0))
    # Move model to GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
# ------------------------

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator  # Handles padding dynamically
)

# Fine-tune the model
trainer.train()

model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

In [None]:
# testing on the fine_tuned model
from transformers import pipeline

model_name = "./fine_tuned_model"

generator = pipeline('text-generation', model=model_name)
generator("wer wer?")

In [None]:
# testing on the origional model
from transformers import pipeline

generator = pipeline('text-generation', model="facebook/opt-125m")
generator("wer wer?")