# Summarization with Transformers and PEFT
This notebook demonstrates how to use transformers for text summarization, including the use of PEFT (Parameter-Efficient Fine-Tuning) to enhance the model with LoRA (Low-Rank Adaptation).

## Install Necessary Libraries
First, we'll install the necessary libraries.


In [None]:


!pip install --upgrade pip
!pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

!pip install \
    transformers==4.27.2 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0.3.0 --quiet




## Import Libraries
We'll import the necessary libraries for loading datasets, defining models, training, and evaluation.


In [None]:

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
from peft import LoraConfig, get_peft_model, TaskType, PeftModel, PeftConfig


## Load the Dataset
Load the DialogSum dataset from Hugging Face.


In [None]:


huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)




## Load the Model and Tokenizer
Load the FLAN-T5 model and tokenizer from Hugging Face.



In [None]:


model_name = 'google/flan-t5-base'
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)



## Function to Print Trainable Parameters
Define a function to print the number of trainable parameters in the model.


In [None]:


def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return (f"trainable model parameters: {trainable_model_params}\n"
            f"all model parameters: {all_model_params}\n"
            f"percentage of trainable model parameters: "
            f"{100 * trainable_model_params / all_model_params:.2f}%")

print(print_number_of_trainable_model_parameters(original_model))





## Prepare a Sample for Testing
Select a sample from the test set to evaluate the model.


In [None]:



index = 200
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"],
        max_new_tokens=200,
        )[0],
    skip_special_tokens=True
)

dash_line = '-' * 100
print(dash_line)
print(f"INPUT PROMPT:\n{prompt}")
print(dash_line)
print(f"BASELINE HUMAN SUMMARY:\n{summary}\n")
print(dash_line)
print(f"MODEL GENERATION - ZERO SHOT:\n{output}")





## Tokenize the Dataset
Define a function to tokenize the dataset and apply it to the entire dataset.



In [None]:


def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    return example

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary'])
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")
print(tokenized_datasets)





## Set Up Training Arguments and Train the Model
Configure training arguments and train the model using the Trainer class.



In [None]:


output_dir = f'./dialogue-summary-training-{str(int(time.time()))}'
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=1
)

trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

trainer.train()





## Evaluate the Model
Generate outputs for a sample from the test set and compare them with the baseline human summaries.



In [None]:


input_ids = tokenizer(prompt, return_tensors="pt").input_ids
original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f"BASELINE HUMAN SUMMARY:\n{summary}")
print(dash_line)
print(f"ORIGINAL MODEL:\n{original_model_text_output}")





## Load the ROUGE Metric and Compute Scores
Compute ROUGE scores to evaluate the model's performance.


In [None]:



rouge = evaluate.load('rouge')

dialogues = dataset['test'][:10]['dialogue']
human_baseline_summaries = dataset['test'][:10]['summary']
original_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
    Summarize the following conversation.

    {dialogue}

    Summary:
    """
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries))
df = pd.DataFrame(zipped_summaries, columns=['human_baseline_summaries', 'original_model_summaries'])

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True
)

print('ORIGINAL MODEL:')
print(original_model_results)





## Add LoRA Adapter Layers
Add LoRA adapter layers/parameters to the original model to be trained with PEFT.


In [None]:



lora_config = LoraConfig(
    r=32,  # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM  # FLAN-T5
)

peft_model = get_peft_model(original_model, lora_config)
print(print_number_of_trainable_model_parameters(peft_model))





## Set Up Training Arguments and Train the PEFT Model
Configure training arguments and train the PEFT model using the Trainer class.



In [None]:


output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'
peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,  # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"]
)

peft_trainer.train()




## Save the Trained PEFT Model and Tokenizer
Save the trained PEFT model and tokenizer for future use.


In [None]:



peft_model_path = "./peft-dialogue-summary-checkpoint-local"
peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)





## Load the Trained PEFT Model
Load the trained PEFT model and tokenizer for evaluation.



In [None]:


peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
peft_model = PeftModel.from_pretrained(peft_model_base, "./peft-dialogue-summary-checkpoint-local", torch_dtype=torch.bfloat16, is_trainable=False)
print(print_number_of_trainable_model_parameters(peft_model))





## Evaluate the PEFT Model
Generate outputs for a sample from the test set using the PEFT model and compare them with the baseline human summaries and original model outputs.




In [None]:
peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f"BASELINE HUMAN SUMMARY:\n{summary}")
print(dash_line)
print(f"ORIGINAL MODEL:\n{original_model_text_output}")
print(dash_line)
print(f"PEFT MODEL:\n{peft_model_text_output}")
