In [None]:
!pip install unsloth==2025.4.7 triton==3.2.0

In [None]:
from unsloth import is_bfloat16_supported
from unsloth import FastLanguageModel

import numpy as np
import pandas as pd

import torch
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from datasets import Dataset, load_dataset

from transformers import AutoTokenizer, AutoModelForSequenceClassification

import warnings
warnings.filterwarnings("ignore")

# Loading the dataset

In [None]:
train_data = load_dataset("ucirvine/reuters21578", "ModApte", split="train[:20%]", trust_remote_code=True)
test_data = load_dataset("ucirvine/reuters21578", "ModApte", split="test[:1%]", trust_remote_code=True)
print(train_data, test_data)

In [None]:
print(train_data['text'][11])
print(train_data['title'][11])

# Loading the model

In [None]:
max_seq_length = 2048
original_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare the model input

In [None]:
data_prompt = """You are an advanced AI system specialised in providing Reuters News title given a body text of the news. 
The title should be in capital letters and between 6 and 8 words in length. Please provide only the title as output and no other text or explanation."


### Input text:
{}

### Title:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompt_training(examples):
    inputs = examples["text"]
    outputs = examples["title"]
    texts = []
    for input_, output in zip(inputs, outputs):
        text = data_prompt.format(input_, output) + EOS_TOKEN
        texts.append(text)
    return { "modified_text" : texts }

In [None]:
train_data = train_data.map(formatting_prompt_training, batched=True)

In [None]:
print(train_data[11]['modified_text'])

# Inference prior to model fine-tuning

In [None]:
def formatting_prompt_inference(examples):
    inputs = examples["text"]
    texts = []
    for input in inputs:
        text = data_prompt.format(input, "")
        texts.append(text)
    return texts

In [None]:
formatted_test_texts = formatting_prompt_inference(test_data)

In [None]:
print(formatted_test_texts[1])

In [None]:
original_model_inference = FastLanguageModel.for_inference(original_model)

answers = []
for test_example in formatted_test_texts:
  inputs = tokenizer([test_example], return_tensors = "pt").to("cuda")

  outputs = original_model_inference.generate(**inputs, max_new_tokens = 50, use_cache = True)
  answer=tokenizer.batch_decode(outputs)

  answer = answer[0].split("### Title:")[-1]
  answer = answer.split("<|end_of_text|>")[0]
  answers.append(answer)

In [None]:
df = pd.DataFrame({"original_text": test_data["text"], "title": test_data["title"], "prompt_text": formatted_test_texts, "original_model_generated_title": answers})
df

In [None]:
print(df.iloc[10].title)
print(df.iloc[10].original_model_generated_title)

# PEFT model

In [None]:
peft_model = FastLanguageModel.get_peft_model(
    original_model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True, # Rank-stabilized LoRA
    use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
    random_state = 32,
    loftq_config = None,
)
print(peft_model.print_trainable_parameters())

# Start model fine-tuning

In [None]:
import os
os.environ['UNSLOTH_RETURN_LOGITS'] = '1' # Unsloth logits are empty from 2024.11 onwards. To get raw logits again, set the environment variable `UNSLOTH_RETURN_LOGITS` to `"1"`.

trainer=SFTTrainer(
    model=peft_model,
    tokenizer=tokenizer,
    train_dataset=train_data,
    args=SFTConfig(
        packing=False,
        dataset_num_proc=2,
        dataset_text_field="modified_text",
        max_seq_length=max_seq_length,
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=1,
        output_dir="./results",
        seed=0,
        report_to="tensorboard",
    ),
)

trainer.train()

# Inference after fine-tuning

In [None]:
peft_model_inference = FastLanguageModel.for_inference(peft_model)

peft_answers = []
for test_example in formatted_test_texts:
  inputs = tokenizer([test_example], return_tensors = "pt").to("cuda")

  outputs = peft_model_inference.generate(**inputs, max_new_tokens = 50, use_cache = True)
  answer=tokenizer.batch_decode(outputs)

  answer = answer[0].split("### Title:")[-1]
  answer = answer.split("<|end_of_text|>")[0]
  peft_answers.append(answer)

In [None]:
df["peft_model_generated_title"] = peft_answers
df

In [None]:
print(df.iloc[10].title)
print(df.iloc[10].original_model_generated_title)
print(df.iloc[10].peft_model_generated_title)

# Exercise: Rouge scores

Compute the ROUGE scores of the titles generated with the original model (no fine-tuning) and the titles generated with the fine-tuned model

In [None]:
!pip install evaluate
!pip install rouge_score

In [None]:
import evaluate
from nltk.tokenize import sent_tokenize

rouge_score = evaluate.load("rouge")

In [None]:
def compute_rouge_score(generated, reference):
    
    generated_with_newlines = ["\n".join(sent_tokenize(s.strip())) for s in generated]
    reference_with_newlines = ["\n".join(sent_tokenize(s.strip())) for s in reference]
    
    return rouge_score.compute(
        predictions=generated_with_newlines,
        references=reference_with_newlines,
        use_stemmer=True,
        
    )

In [None]:
print(compute_rouge_score(df.original_model_generated_title, df.title))
print(compute_rouge_score(df.peft_model_generated_title, df.title))