<a href="https://colab.research.google.com/github/nxxk23/AI-Engineer/blob/main/sample/LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install transformers peft accelerate

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType

# Load the model and tokenizer
model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_original = AutoModelForCausalLM.from_pretrained(model_name)
print(model_original)

GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=False)
            (q_proj): Linear(in_features=768, out_features=768, bias=False)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear(in_fe

In [3]:
# Configure LoRA
lora_config = LoraConfig(
    r=16,  # Rank
    lora_alpha=32,
    lora_dropout=0.1,
    task_type=TaskType.CAUSAL_LM,
)

# Apply LoRA to the model
model = get_peft_model(model_original, lora_config)
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoForCausalLM(
      (transformer): GPTNeoModel(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(2048, 768)
        (drop): Dropout(p=0.0, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPTNeoBlock(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPTNeoAttention(
              (attention): GPTNeoSelfAttention(
                (attn_dropout): Dropout(p=0.0, inplace=False)
                (resid_dropout): Dropout(p=0.0, inplace=False)
                (k_proj): Linear(in_features=768, out_features=768, bias=False)
                (v_proj): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features

In [4]:
# Set model to training mode
model.train()

# Prepare your dataset (dummy example)
from torch.utils.data import Dataset

class SimpleDataset(Dataset):
    def __init__(self, texts):
        self.texts = texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        input_ids = tokenizer(self.texts[idx], return_tensors='pt').input_ids.squeeze()
        return {"input_ids": input_ids, "labels": input_ids}

In [5]:
# Sample data
texts = ["Hello, how are you?", "What is the weather today?"]
dataset = SimpleDataset(texts)

# Create DataLoader
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=2)

# Optimizer
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
for epoch in range(3):  # Number of epochs
    for batch in dataloader:
        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        print(f"Loss: {loss.item()}")

# Save the fine-tuned model
model.save_pretrained("./lora_finetuned_gpt_neo")
tokenizer.save_pretrained("./lora_finetuned_gpt_neo")



Loss: 3.1488335132598877
Loss: 3.1333861351013184
Loss: 3.1175997257232666


('./lora_finetuned_gpt_neo/tokenizer_config.json',
 './lora_finetuned_gpt_neo/special_tokens_map.json',
 './lora_finetuned_gpt_neo/vocab.json',
 './lora_finetuned_gpt_neo/merges.txt',
 './lora_finetuned_gpt_neo/added_tokens.json',
 './lora_finetuned_gpt_neo/tokenizer.json')

In [6]:
!pip -q install evaluate rouge_score

In [7]:
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType
from evaluate import load

# Load the model and tokenizer
model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set the padding token to be the same as the EOS token
tokenizer.pad_token = tokenizer.eos_token  # or add a new padding token using tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load the original model
model_original = AutoModelForCausalLM.from_pretrained(model_name)

# Configure LoRA
lora_config = LoraConfig(
    r=16,  # Rank
    lora_alpha=32,
    lora_dropout=0.1,
    task_type=TaskType.CAUSAL_LM,
)

# Apply LoRA to the model
model = get_peft_model(model_original, lora_config)

# Load metrics
bleu_metric = load("bleu")  # For BLEU score
rouge_metric = load("rouge")  # For ROUGE score

# Function to generate text and measure time
def generate_text(model, tokenizer, prompt):
    start_time = time.time()
    input_ids = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50).input_ids
    attention_mask = (input_ids != tokenizer.pad_token_id).long()  # Create the attention mask
    with torch.no_grad():
        outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=50)
    end_time = time.time()
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    inference_time = end_time - start_time
    return generated_text, inference_time

In [8]:
import time  # Correctly import the time module

# Function to generate text and measure time
def generate_text(model, tokenizer, prompt):
    start_time = time.time()  # Use time.time() correctly
    input_ids = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50).input_ids
    attention_mask = (input_ids != tokenizer.pad_token_id).long()  # Create the attention mask

    # Generate outputs
    with torch.no_grad():  # Disable gradient tracking
        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=150)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    inference_time = time.time() - start_time  # Measure inference time
    return generated_text, inference_time  # Return the generated text and inference time


In [9]:
# Function to evaluate both models
def evaluate_models(model_original, model_lora, tokenizer, prompt, reference):
    # Timing and generating outputs for the original model
    generated_original, time_original = generate_text(model_original, tokenizer, prompt)

    # Timing and generating outputs for the LoRA model
    generated_lora, time_lora = generate_text(model_lora, tokenizer, prompt)

    # Printing Outputs
    # print("Original Model Output:", generated_original)
    print("Original model Time:", time_original)

    # print("\nLoRA Fine-tuned Model Output:", generated_lora)
    print("LoRA Fine-tuned Model Time:", time_lora)

    # BLEU and ROUGE Calculation
    reference_texts = [[reference], [reference]]  # Two references for BLEU and ROUGE
    predictions = [generated_original, generated_lora]

    bleu_score = bleu_metric.compute(predictions=predictions, references=reference_texts)
    rouge_score = rouge_metric.compute(predictions=predictions, references=reference_texts)

    print("BLEU Score:", bleu_score)
    print("ROUGE Score:", rouge_score)

In [10]:
# Define your prompt and reference for evaluation
prompt = "What are the benefits of using Low-Rank Adaptation in machine learning?"
reference = "Low-Rank Adaptation can improve training efficiency and performance."

# Call the evaluation function
evaluate_models(model_original, model, tokenizer, prompt, reference)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Original model Time: 14.777280569076538
LoRA Fine-tuned Model Time: 11.838006258010864
BLEU Score: {'bleu': 0.0, 'precisions': [0.03816793893129771, 0.007692307692307693, 0.0, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 14.555555555555555, 'translation_length': 262, 'reference_length': 18}
ROUGE Score: {'rouge1': 0.07407407407407407, 'rouge2': 0.03007518796992481, 'rougeL': 0.07407407407407407, 'rougeLsum': 0.05925925925925926}
