In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

Load Base model

In [None]:
from unsloth import FastLanguageModel
import torch
from google.colab import userdata

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Llama-3.2-3B",
    max_seq_length = 2048,
    dtype= None,
    load_in_4bit = False,
    token = userdata.get('HF_ACCESS_TOKEN')
    )

tokenizer.clean_up_tokenization_spaces = False

Add Adapters (Lora)

In [3]:
# Select target layes we want to add lora adapters too.
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

# Do not want to train embedding layers
train_embeddings = False

if train_embeddings:
  target_modules.append("lm_head")

# Tweak and turn lots of hyperparamters, I suggest reading Unsloth docs

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = target_modules,
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth", #uses gradient checkpointing which reduces VRAM
    random_state = 3047,
    use_rslora = False,
    loftq_config = None
)

Unsloth 2025.8.9 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


Prepare Dataset

In [None]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
  texts = []
  for i in range(len(examples["content"])):
        author = examples["author"][i] if examples["author"][i] else "Unknown Author"
        poem_name = examples["poem name"][i] if examples["poem name"][i] else "Untitled Poem"
        poem_type = examples["type"][i] if examples["type"][i] else "General"
        poem_age = examples["age"][i] if examples["age"][i] else "Unknown Period"
        content = examples["content"][i] if examples["content"][i] else ""

        # Construct the context-rich prompt and response
        formatted_text = (
            f"### Instruction: Generate a poem.\n"
            f"### Author: {author}\n"
            f"### Poem Name: {poem_name}\n"
            f"### Type: {poem_type}\n"
            f"### Age: {poem_age}\n"
            f"### Generated Poem:\n"
            f"{content}"
        )
        texts.append(formatted_text)
  return {"text": texts}

from datasets import load_dataset
dataset = load_dataset("merve/poetry", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True)

Visualize Dataset

In [None]:
for i, sample in enumerate(dataset):
  print(f"\n-----Sample { i + 1 }")
  print(sample["text"])
  if i == 3:
    break

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # process 4 batches before updating gradient
        num_train_epochs = 3, # Prevent overfitting by keeping epochs between 1 - 4
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none"
    ),
)

In [None]:
trainer_stats = trainer.train()

In [24]:
from transformers import TextStreamer

def generate_tokens(model):
  FastLanguageModel.for_inference(model)
  inputs = tokenizer("generate a renissance poem", return_tensors = "pt").to("cuda")
  text_streamer = TextStreamer(tokenizer)
  for token in model.generate(**inputs, streamer = text_streamer, max_new_tokens = 100):
    print(token)
    pass

In [None]:
for _ in range(3):
  generate_tokens(model)

Saving Model

In [None]:
from google.colab import userdata

# saving the Lora to apply at inference time
model.push_to_hub("lvogel123/Llama-3.2-3B-poem-god-lora", tokenizer, token = userdata.get('HF_ACCESS_TOKEN'))


