In [None]:
# Make sure these are installed
!pip install transformers datasets evaluate sentencepiece
!pip install peft bitsandbytes # <-- NEW: Install PEFT and bitsandbytes



In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    pipeline,
    BitsAndBytesConfig # For 4-bit quantization
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    PeftModel # For loading the saved model
)

# This prevents the wandb login prompt
os.environ["WANDB_DISABLED"] = "true"

# From hugging face import samsum dataset has 14k samples of convos
dataset = load_dataset("knkarthick/samsum")
print("Dataset loaded:")
print(dataset)

Dataset loaded:
DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 14731
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 818
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 819
    })
})


In [None]:
# Load the T5-small tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-small")
prefix = "summarize: "

# T5's max length is 512
max_input_length = 512
max_target_length = 128

def preprocess_function_samsum(examples):
    # Prepare the inputs (the 'dialogue')
    inputs = [prefix + doc for doc in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length")

    # Prepare the "labels" (the 'summary')
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=max_target_length, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing
# Runs on all 14,732 train samples
tokenized_dataset = dataset.map(preprocess_function_samsum, batched=True)

print("\nTokenizing complete.")
print(tokenized_dataset)

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Map:   0%|          | 0/14731 [00:00<?, ? examples/s]



Map:   0%|          | 0/818 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]


Tokenizing complete.
DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14731
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 818
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 819
    })
})


In [None]:
# Define a helper function to see the parameter reduction
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || "
        f"trainable%: {100 * trainable_params / all_param:.2f}"
    )

# Define 4-bit quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load the T5-small model with 4-bit quantization
model = AutoModelForSeq2SeqLM.from_pretrained(
    "t5-small",
    quantization_config=bnb_config, # Apply quantization
    device_map="auto" # Automatically place model on GPU
)

# Define LoRA configuration
lora_config = LoraConfig(
    r=16, # Rank of the adapters
    lora_alpha=32, # Alpha parameter
    target_modules=["q", "v"], # Apply LoRA to query and value layers
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # Specify task type for T5
)

# Wrap the model with PEFT
model = get_peft_model(model, lora_config)

# Print the parameter count
print("Model prepared with QLoRA:")
print_trainable_parameters(model)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Model prepared with QLoRA:
trainable params: 589824 || all params: 45367808 || trainable%: 1.30


In [None]:
# Define the training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./t5-samsum-qlora-results", # <-- CHANGED: New output dir
    eval_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    save_total_limit=3,
    report_to="none",
    num_train_epochs=8, # 8 epochs is fine for LoRA
    predict_with_generate=True,
    fp16=True, # fp16 is fine with 4-bit
)

# The data collator dynamically pads sequences
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Create the Seq2SeqTrainer
trainer = Seq2SeqTrainer(
    model=model, # <-- Pass the PEFT-wrapped model
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Seq2SeqTrainer(


In [None]:
# Start the training!
print("Starting T5 QLoRA training on SAMSum...")
trainer.train()
print("Training finished.")

# Save the final model & tokenizer
# This will ONLY save the tiny adapter weights
trainer.save_model("./t5-samsum-qlora-model")

Starting T5 QLoRA training on SAMSum...


Epoch,Training Loss,Validation Loss
1,0.474,0.421454
2,0.4572,0.40835
3,0.4347,0.401624
4,0.4433,0.399663
5,0.4383,0.394907
6,0.4341,0.393524
7,0.4269,0.393854
8,0.4261,0.392746


Training finished.


In [None]:
from transformers import pipeline
from peft import PeftModel # <-- NEW

print("\n--- Testing with QLoRA-fine-tuned model ---")

# Load the PEFT model for inference
# Load the base model (t5-small)
base_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

# Load the PEFT adapter weights on top
model = PeftModel.from_pretrained(base_model, "./t5-samsum-qlora-model")
model = model.merge_and_unload() # <-- Optional: Merge weights for faster inference

# Create the pipeline with the merged model
summarizer_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)

# Grab a dialogue from the test set
test_dialogue = dataset["test"][10]["dialogue"]

print("\n--- Testing with a new dialogue ---")
print("\nORIGINAL DIALOGUE:")
print(test_dialogue)

# Generate summary
summary = summarizer_pipe(test_dialogue, max_length=100, min_length=10, do_sample=False)

print("\nGENERATED SUMMARY:")
print(summary[0]['summary_text'])

print("\nACTUAL HUMAN SUMMARY:")
print(dataset["test"][10]["summary"])


--- Testing with QLoRA-fine-tuned model ---


Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



--- Testing with a new dialogue ---

ORIGINAL DIALOGUE:
Wanda: Let's make a party!
Gina: Why?
Wanda: beacuse. I want some fun!
Gina: ok, what do u need?
Wanda: 1st I need too make a list
Gina: noted and then?
Wanda: well, could u take yours father car and go do groceries with me?
Gina: don't know if he'll agree
Wanda: I know, but u can ask :)
Gina: I'll try but theres no promisess
Wanda: I know, u r the best!
Gina: When u wanna go
Wanda: Friday?
Gina: ok, I'll ask

GENERATED SUMMARY:
Gina wants Wanda to make a party on Friday. Wanda will take her father's car and go do groceries with her.

ACTUAL HUMAN SUMMARY:
Wanda wants to throw a party. She asks Gina to borrow her father's car and go do groceries together. They set the date for Friday. 
