In [2]:
#!/usr/bin/env python3
"""
ALTERNATIVE COLAB 2: LoRA Fine-tuning with Chat Dataset
Uses: Alpaca GPT-4 dataset for conversational AI
Model: SmolLM-135M
Approach: Standard LoRA (low-rank)
"""

print("="*80)
print("ðŸš€ ALTERNATIVE COLAB 2: LORA FINE-TUNING - CHAT EDITION")
print("="*80)

# ============================================================================
# INSTALLATION
# ============================================================================
print("\nðŸ“¦ Installing...")
import subprocess
subprocess.run("pip install -q transformers datasets accelerate trl peft bitsandbytes", shell=True)
print("âœ… Done!")

# ============================================================================
# IMPORTS
# ============================================================================
print("\nðŸ“š Importing...")
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from datasets import load_dataset, Dataset
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model
import torch

print("âœ… Imported!")
print(f"GPU: {torch.cuda.is_available()}")

# ============================================================================
# LOAD MODEL
# ============================================================================
print("\nðŸ“¥ Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceTB/SmolLM-135M",
    device_map="auto",
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-135M")
tokenizer.pad_token = tokenizer.eos_token
print("âœ… Model loaded!")

# ============================================================================
# STANDARD LORA (LOW RANK)
# ============================================================================
print("\nðŸ”§ Adding standard LoRA...")
lora_config = LoraConfig(
    r=8,  # Low rank for efficient training
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # Only 2 modules
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
print("âœ… LoRA added!")

# ============================================================================
# LOAD CHAT DATASET
# ============================================================================
print("\nðŸ“š Loading Alpaca GPT-4 chat dataset...")
dataset = load_dataset("vicgalle/alpaca-gpt4", split="train[:600]")
print(f"âœ… Loaded {len(dataset)} chat examples!")

# ============================================================================
# FORMAT
# ============================================================================
print("\nðŸ”§ Formatting...")
def format_alpaca(examples):
    texts = []
    for i in range(len(examples['instruction'])):
        inst = examples['instruction'][i]
        inp = examples['input'][i] if examples['input'][i] else ""
        out = examples['output'][i]

        if inp.strip():
            text = f"Human: {inst}\n{inp}\n\nAssistant: {out}"
        else:
            text = f"Human: {inst}\n\nAssistant: {out}"

        texts.append(text + tokenizer.eos_token)

    return {"text": texts}

formatted = format_alpaca(dataset)
train_dataset = Dataset.from_dict({"text": formatted["text"]})
print(f"âœ… Formatted!")

# ============================================================================
# TRAIN
# ============================================================================
print("\nðŸš€ Training LoRA model...")
training_args = TrainingArguments(
    output_dir="./lora_chat",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_steps=10,
    logging_steps=10,
    save_strategy="no",
    fp16=True,
    report_to="none",
    max_steps=60,
)

trainer = SFTTrainer(model=model, args=training_args, train_dataset=train_dataset)
trainer.train()
print("\nâœ… Training done!")

# ============================================================================
# TEST
# ============================================================================
print("\nðŸ§ª Testing...")
model.eval()

prompts = [
    "Human: What is Python?\n\nAssistant: ",
    "Human: Explain machine learning\n\nAssistant: ",
]

for p in prompts:
    print(f"\n{p.split('Assistant:')[0].strip()}")
    print("Response:", end=" ")
    inp = tokenizer(p, return_tensors="pt").to(model.device)
    out = model.generate(**inp, max_new_tokens=80, temperature=0.7,
                         pad_token_id=tokenizer.eos_token_id)
    print(tokenizer.decode(out[0], skip_special_tokens=True).split("Assistant:")[-1].strip())

# ============================================================================
# SAVE
# ============================================================================
print("\nðŸ’¾ Saving...")
model.save_pretrained("./lora_chat_model")
tokenizer.save_pretrained("./lora_chat_model")

print("\n" + "="*80)
print("ðŸŽ‰ ALTERNATIVE COLAB 2 COMPLETE!")
print("="*80)
print("Summary:")
print("  âœ“ Model: SmolLM-135M")
print("  âœ“ Dataset: Alpaca GPT-4 (chat)")
print("  âœ“ Method: Standard LoRA (r=8)")
print("  âœ“ Trainable: ~3% of parameters")
print("="*80)

ðŸš€ ALTERNATIVE COLAB 2: LORA FINE-TUNING - CHAT EDITION

ðŸ“¦ Installing...
âœ… Done!

ðŸ“š Importing...
âœ… Imported!
GPU: True

ðŸ“¥ Loading model...
âœ… Model loaded!

ðŸ”§ Adding standard LoRA...
trainable params: 460,800 || all params: 134,975,808 || trainable%: 0.3414
âœ… LoRA added!

ðŸ“š Loading Alpaca GPT-4 chat dataset...
âœ… Loaded 600 chat examples!

ðŸ”§ Formatting...
âœ… Formatted!

ðŸš€ Training LoRA model...


Adding EOS to train dataset:   0%|          | 0/600 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/600 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/600 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss
10,1.9695
20,1.867
30,1.8611
40,1.8209
50,1.7808
60,1.8701


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



âœ… Training done!

ðŸ§ª Testing...

Human: What is Python?
Response: 2

Python is a high-level, interpreted, dynamic, and object-oriented programming language.

Human: Explain machine learning
Response: Machine learning is a subset of artificial intelligence that enables computers to learn from data without being explicitly programmed. It involves training a model on large datasets to recognize patterns and make predictions or decisions.

Explanation: Machine learning is a subset of artificial intelligence that enables computers to learn from data without being explicitly programmed. It involves training a model on

ðŸ’¾ Saving...

ðŸŽ‰ ALTERNATIVE COLAB 2 COMPLETE!
Summary:
  âœ“ Model: SmolLM-135M
  âœ“ Dataset: Alpaca GPT-4 (chat)
  âœ“ Method: Standard LoRA (r=8)
  âœ“ Trainable: ~3% of parameters
