In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
import torch

In [2]:
# Load a small model for demonstration
model_name = "microsoft/DialoGPT-small"  # Small model for quick demo
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add padding token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"üìä Original model parameters: {model.num_parameters():,}")

üìä Original model parameters: 124,439,808


In [3]:
# Configure LoRA - very simple setup
lora_config = LoraConfig(
    r=4,                    # Small rank for demo
    lora_alpha=8,          # 2x the rank (common practice)
    lora_dropout=0.1,      # 10% dropout
    bias="none",           # Don't train bias
    target_modules=["c_attn", "c_proj"],  # Target specific attention layers
    task_type="CAUSAL_LM"
)

In [4]:
# Apply LoRA to the model
peft_model = get_peft_model(model, lora_config)



In [5]:
# Show the difference
trainable_params = sum(p.numel() for p in peft_model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in peft_model.parameters())
print(f"üìà Total parameters: {total_params:,}")
print(f"üéØ Trainable parameters: {trainable_params:,}")
print(f"üí° Percentage trainable: {100 * trainable_params / total_params:.2f}%")
print()

üìà Total parameters: 124,845,312
üéØ Trainable parameters: 405,504
üí° Percentage trainable: 0.32%



In [7]:
# Test generation BEFORE any training
print("üîç BEFORE Training:")
test_prompt = "Hello, how are you"
inputs = tokenizer(test_prompt, return_tensors="pt")

with torch.no_grad():
    outputs = peft_model.generate(
        **inputs,
        max_length=inputs['input_ids'].shape[1] + 10,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Prompt: '{test_prompt}'")
print(f"Response: '{response}'")
print()

üîç BEFORE Training:
Prompt: 'Hello, how are you'
Response: 'Hello, how are you? OP, this is a scam.'



In [8]:
# Show what LoRA added to the model
print("üîß LoRA Modules Added:")
peft_model.print_trainable_parameters()

print("\n" + "=" * 50)
print("üí° Key Takeaways:")
print("‚Ä¢ LoRA only trains ~0.5% of parameters")
print("‚Ä¢ Original model weights stay frozen")
print("‚Ä¢ LoRA adds small 'adapter' layers")
print("‚Ä¢ Much faster and cheaper to train")
print("‚Ä¢ Can be easily swapped or removed")

üîß LoRA Modules Added:
trainable params: 405,504 || all params: 124,845,312 || trainable%: 0.3248

üí° Key Takeaways:
‚Ä¢ LoRA only trains ~0.5% of parameters
‚Ä¢ Original model weights stay frozen
‚Ä¢ LoRA adds small 'adapter' layers
‚Ä¢ Much faster and cheaper to train
‚Ä¢ Can be easily swapped or removed
