In [2]:
# Install required libraries if not already installed:
# pip install transformers datasets peft accelerate

from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset
from peft import get_peft_model, LoraConfig

# 1. Load base model and tokenizer
model_name = "gpt2"   # you can replace with another causal LM
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure pad token exists
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 2. Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["c_attn"]  # GPT-2 uses 'c_attn' for QKV projection
)

# Wrap model with LoRA
model = get_peft_model(model, lora_config)

# 3. Create a tiny synthetic dataset
train_texts = ["Hello world", "LoRA fine-tuning is fun", "Transformers are powerful"]
eval_texts = ["Testing evaluation", "Another sample"]

train_dataset = Dataset.from_dict({"text": train_texts})
eval_dataset = Dataset.from_dict({"text": eval_texts})

# Tokenization function with labels
def tokenize_function(examples):
    tokens = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=64
    )
    tokens["labels"] = tokens["input_ids"].copy()  # labels required for loss
    return tokens

train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
eval_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# 4. Training configuration
training_args = TrainingArguments(
    output_dir="./lora-finetuned-model",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    num_train_epochs=1,   # keep small for demo
    logging_steps=1,
    save_strategy="epoch",
    fp16=True
)

# 5. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset
)

# 6. Train
trainer.train()

# 7. Save only LoRA adapter weights (few MB instead of GBs!)
model.save_pretrained("./lora-weights")

print("Training complete. LoRA weights saved in ./lora-weights")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:00<00:00, 32.01 examples/s]
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 492.35 examples/s]
  super().__init__(loader)
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


RuntimeError: 
            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'base_model.model.lm_head.weight', 'base_model.model.transformer.wte.weight'}].
            A potential way to correctly save your model is to use `save_model`.
            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
            

In [1]:
# Load best model weights
model.load_state_dict(torch.load(BEST_WEIGHTS_PATH, map_location=device))
model.to(device).eval()

test_loss, test_acc = evaluate(model, test_loader)
print(f"ðŸ“Œ Test Loss: {test_loss:.4f}")
print(f"ðŸ“Œ Test Accuracy: {test_acc:.4f}")

NameError: name 'model' is not defined