In [None]:
!pip install transformers datasets peft accelerate bitsandbytes


In [None]:
!pip install wandb

In [None]:
import wandb
wandb.login(key="***")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model, TaskType
from datasets import load_dataset
from transformers import Trainer

# Load tokenizer and model
model_name = "meta-llama/Llama-3.2-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)


tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")

# Prepare config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)


# Apply LoRA
model = get_peft_model(model, lora_config)

# Load your dataset (or use a test one)
dataset = load_dataset("Abirate/english_quotes")  # Replace with your own
def tokenize(example):
    tokens = tokenizer(
        example["quote"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize, batched=True)

# Training args
training_args = TrainingArguments(
    output_dir="./lora-llama",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_strategy="epoch",
    fp16=True,
    learning_rate=2e-4,
    warmup_steps=100,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer,
)
trainer.train()



In [None]:
model.save_pretrained("lora-llama-7b")


In [None]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
model = PeftModel.from_pretrained(base_model, "lora-llama-7b")


In [None]:
input_text = "Once upon a time,"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
