# Finetune a T5 Transformer model on the California BillSum dataset


In [None]:
!pip install -q transformers datasets evaluate rouge_score accelerate bitsandbytes loralib peft

# Load BillSum Dataset from the Datasets library

In [None]:
from datasets import load_dataset

billsum = load_dataset("billsum", split = "ca_test")

In [None]:
# Split the dataset into a train and test set
billsum = billsum.train_test_split(test_size=0.2)

In [None]:
# Look at the data
print(billsum)

# The text column is our model input
# The summary column is our model target
print("ORIGINAL TEXT:\n", billsum['train']['text'][0], "\n")
print("SUMMARY:\n", billsum['train']['summary'][0], "\n")

In [None]:
# Preprocessing
## Load Model
from transformers import AutoTokenizer

model_checkpoint = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Make a PEFT Model

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType

model_name = "t5-small"
tokenizer_name = "t5-small"

# Create a config corresponding to the PEFT method
peft_config = LoraConfig(
    task_type = TaskType.CAUSAL_LM,
    #target_modules= [""],
    inference_mode = False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

nf4_config = BitsAndBytesConfig(
  # Load Model in 4bit precision
   load_in_4bit=True, 
  # use normalized float 4 (default)
   bnb_4bit_quant_type="nf4",
  # uses a second quantization after the first one to save an additional 0.4 bits per parameter
   bnb_4bit_use_double_quant=True,
  # Format in which computations will occur
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
# Wrap base model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, quantization_config = nf4_config, device_map="auto")
model = get_peft_model(model, peft_config)

In [None]:
print(model)

In [None]:
model.print_trainable_parameters()

# Preprocessing

We need to create a preprocess function that we will apply to every instance in the dataset. The preprocess function needs to:

1. Prefix input with a prompt so T5 knows it's performing a summarisation task.

2. Use the keyword `text_target` argument when tokenizing labels

3. Truncate sequences to be no longer than the maximum length set by the `max_length` parameter

After applying the preprocessing to every instance in the dataset, we create batches of examples using a data collator function. This is because model training must be in batches.

In [None]:
def preprocess_function(examples, prefix: str = "summarize: "):
  # prepend the prefix to every instance in the dataset
  inputs = [prefix + doc for doc in examples["text"]]

  # Tokenize the text input and convert them into tensors
  model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

  # Tokenize the labels and convert them into tensors
  labels = tokenizer(text_target=examples['summary'], max_length=123, truncation=True)

  model_inputs["labels"] = labels["input_ids"]

  return model_inputs

In [None]:
# Apply preprocessing over entire dataset - batched = True process multiple elements of the datasets
tokenized_billsum = billsum.map(preprocess_function, batched = True)

In [None]:
tokenized_billsum

In [None]:
# Create a batch of examples, with dynamic padding. Use the appropriate collator function
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer, model = checkpoint)

# Evaluate
We require monitoring a metric during training to see how well our model is doing. We use the `evaluate` library to load an evaluation metric - we use the ROUGE metric.

With the loss metric defined, we must define a function that takes model predictions and labels and computes the loss metric. This is usually called the `compute_metrics` function.

In [None]:
import evaluate

rouge = evaluate.load("rouge")

In [None]:
import numpy as np

def compute_metrics(eval_pred):
  predictions, labels = eval_pred

  # Convert token ids to tokens
  decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

  # If the label is -100 then it's a pad token and we assign that token id to <PAD>
  labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
  decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

  result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

  # Count non padding tokens
  prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]

  result['gen_len'] = np.mean(prediction_lens)

  return {k: round(v, 4) for (k,v) in result.items()}

# Train using the Trainer API

The main training steps are:

1. Define training hyperparameters using a model specific `TrainingArguments` function. At the end of each epoch, the Trainer will evaluate the defined loss metric and save the training checkpoint.

2. Pass the training arguments to a Trainer function alongside the **model**, **dataset**, **tokenizer**, **data collator** and **compute metrics**

3. Call train() to finetune the model

In [None]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir = "billsum_summariser",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
    push_to_hub=False,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_billsum["train"],
    eval_dataset=tokenized_billsum["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
trainer.save_model("billsum_summariser_model")

# Inference

Use model for inference using a pipeline wrapper

In [None]:
from peft import PeftModel, PeftConfig

peft_model_id = "billsum_summariser_model"
config = PeftConfig.from_pretrained(peft_model_id)

model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", quantization_config = nf4_config, device_map="auto")
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
device = "cuda"
model = model.to(device)
model.eval()

In [None]:
text = """
summarize: This conceptual guide gives a brief overview of LoRA, a technique that accelerates the fine-tuning of large models while consuming less memory. To make fine-tuning more efficient, LoRA’s approach is to represent the weight updates with two smaller matrices (called update matrices) through low-rank decomposition. These new matrices can be trained to adapt to the new data while keeping the overall number of changes low. The original weight matrix remains frozen and doesn’t receive any further adjustments. To produce the final results, both the original and the adapted weights are combined.
"""

In [None]:
# Inference Pipeline using Pytorch
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("billsum_summariser_model")
inputs = tokenizer(text, return_tensors="pt").input_ids

print(text)
print(inputs)

In [None]:
import torch

with torch.no_grad():
  # Generate method is used to generate text
  outputs = model.generate(input_ids=inputs.to(device), max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)

In [None]:
# Decode generated token ids back into text
tokenizer.decode(outputs[0], skip_special_tokens=True)