### 1. Installing needed libraries

In [0]:
%run utils/Python/storageaccount

In [0]:
!pip install --disable-pip-version-check torch
!pip install --disable-pip-version-check torchdata

In [0]:
!pip install transformers
!pip install datasets==2.18.0 --quiet
!pip install evaluate==0.4.0 \
  rouge_score==0.1.2 \
    loralib==0.1.1 \
      peft==0.3.0 --quiet

In [0]:
import datasets
print(datasets.__version__)

In [0]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

### 2. Improting required libraries, dataset of DialogueSum & Flan_t5 model

In [0]:
hugging_dataset = 'knkarthick/dialogsum'
dataset = load_dataset(hugging_dataset)

In [0]:
### Reading sample of dialogues
sample_indices = [50, 123]
dash_line = '_'.join('' for x in range(100))
for i, index in enumerate(sample_indices):
  print(dash_line)
  print('sample ', i+1)
  print(dash_line)
  print('Input dialogue:')
  print(dataset['test'][index]['dialogue'])
  print(dash_line)
  print('human summary:')
  print(dataset['test'][index]['summary'])
  print(dash_line)
  print()

In [0]:
## Reading Flan T5 model
model_name = 'google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name) #, use_fast = True

######========================================================================================================================================
### 3. Fine tuning the model to improve the performance
#####3.1. Firstly, checking the model without any shot inference
#####3.2. Secondly, full fine tuning the model, instruct fine tuning, with all trainable parameters which is more than 247m parameters
#####3.3. Thirdly, fine tuning the model using PEFT which uses 3.5 m parameters (way less than instruct fine tuning)
######========================================================================================================================================

##### 3.1. Test the model with zero shot inference

In [0]:
index = 222
dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.
{dialogue}
Summary :
"""

dash_line = '_'.join('' for x in range(100))
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(model.generate(inputs['input_ids'], max_new_tokens=50,)[0], skip_special_tokens=True)

print(dash_line)
print('Input prompt: \n', prompt)
print(dash_line)
print('Baseline human summary: \n', summary)
print(dash_line)
print('Model generation - with Zero shot: \n', output)

In [0]:
def print_number_of_trainable_model_parameters(model):
  trainable_model_params = 0
  all_model_params = 0
  for _,param in model.named_parameters():
    all_model_params += param.numel()
    if param.requires_grad:
      trainable_model_params += param.numel()
  return f"trianbale model parameters: {trainable_model_params}\nall model params: {all_model_params}\npercentage of trainable model parameters : {np.round(trainable_model_params/all_model_params*100,2)} %"

print(print_number_of_trainable_model_parameters(model))

##### 3.2. perform full fine tuning

In [0]:
def tokenize_function(example):
  start_prompt = 'Summarize the following conversation. \n\n'
  end_prompt = '\n\nSummary: '
  prompt = [start_prompt + dialogue + end_prompt for dialogue in example['dialogue']]
  example['input_ids'] = tokenizer(prompt, padding='max_length', truncation = True, return_tensors = 'pt').input_ids
  example['labels'] = tokenizer(example['summary'], padding='max_length', truncation = True, return_tensors = 'pt').input_ids
  return example

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary'])

In [0]:
### choosing subsample of dataset
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100==0, with_indices=True)

In [0]:
### shapes of three parts of the dataset
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")
print(tokenized_datasets)

In [0]:
### After training the model, it will turn into the instruct model which is fune tuned
output_dir = 'XXX/instruct_dialogue-summary-training-'+str(int(time.time()))

training_args = TrainingArguments(output_dir, learning_rate=1e-5, num_train_epochs=40, weight_decay=0.01, logging_steps=1, max_steps = 4)

trainer = Trainer(model, args=training_args, train_dataset=tokenized_datasets['train'], eval_dataset=tokenized_datasets['validation'])

In [0]:
trainer.train()
trainer.save_model('YYY')

Info : Instruct model is the model which is trained above. If it is saved, it can be retrieved by below cell.

In [0]:
#from transformers import AutoModelForSequenceClassification, T5ForConditionalGeneration
#instruct_model = T5ForConditionalGeneration.from_pretrained("YYY")

In [0]:
### the original model is Flan-T5 without fine tuning
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype = torch.bfloat16)
original_tokenizer = AutoTokenizer.from_pretrained(model_name)

In [0]:
### Now comparing the original model and instruct model (fine tuned)
index = 200
dialogue = dataset['test'][index]['dialogue']
human_baseline_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors='pt').input_ids
original_model_output = original_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_output[0], skip_special_tokens= True)

instruct_model_output = model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200, num_beams=1))
instruct_model_text_output = tokenizer.decode(instruct_model_output[0], skip_special_tokens= True)


print(dash_line)
print(f'Baseline Human Summary:',human_baseline_summary)
print(dash_line)
print(f'Original model:',original_model_text_output)
print(dash_line)
print(f'Instruct model:',instruct_model_text_output)

In [0]:
##### Evaluate the instruct model Quantatively (with ROUGE metric)
rouge = evaluate.load('rouge')

dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summary = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []

for _,dialogue in enumerate(dialogues):
  prompt = f"""
  Summarize the following conversation.
  
  {dialogue}
  
  Summary:
  """

  input_ids = tokenizer(prompt, return_tensors='pt').input_ids
  original_model_output = original_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200))
  original_model_text_output = tokenizer.decode(original_model_output[0], skip_special_tokens= True)
  original_model_summaries.append(original_model_text_output)

  instruct_model_output = model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200))
  instruct_model_text_output = tokenizer.decode(instruct_model_output[0], skip_special_tokens= True)
  instruct_model_summaries.append(instruct_model_text_output)

zipped_summaries = list(zip(human_baseline_summary, original_model_summaries, instruct_model_summaries))
df = pd.DataFrame(zipped_summaries, columns=['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,#Person1#: I need to take a dictation for you.,"#Person1#: Ms. Dawson, I need to take a dictat..."
1,In order to prevent employees from wasting tim...,#Person1#: I need to take a dictation for you.,The Office Communication Policy is now a memor...
2,Ms. Dawson takes a dictation for #Person1# abo...,#Person1#: I need to take a dictation for you.,#Person1: This is a new policy. #Person2: This...
3,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,The traffic jam is bad for the environment.
4,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,People are talking about how to avoid getting ...
5,#Person2# complains to #Person1# about the tra...,The traffic jam at the Carrefour intersection ...,#Person1: I'm glad to hear that you've finally...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorce.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting divorced.,Masha and Hero are getting married.
9,#Person1# and Brian are at the birthday party ...,"#Person1#: Happy birthday, Brian. #Person2#: T...","#Person1#: Happy Birthday, this is for you! #P..."


In [0]:
original_model_Results = rouge.compute(predictions= original_model_summaries, references = human_baseline_summary[0:len(original_model_summaries)],
                                       use_aggregator = True, use_stemmer= True,)
instruct_model_results = rouge.compute(predictions= instruct_model_summaries, references = human_baseline_summary[0:len(instruct_model_summaries)],
                                       use_aggregator = True, use_stemmer= True,)

print('Original model: ')
print(original_model_Results)
print('Instruct model: ')
print(instruct_model_results)

In [0]:
print('Absolute percentage improvement of instruct model over Human baseline')
improvement = (np.array(list(instruct_model_results.values()))) - np.array(list(original_model_Results.values()))
for key, value in zip(instruct_model_results.keys(), improvement):
  print(f'{key}:{value*100:.2f}%')

##### Conclusion : The instruct model couldn't be improved that much in comparison with original model due to limited number of epochs & steps

#####3.3. Perform Parameter Efficient Fine-Tuning (PEFT)

In [0]:
### setup the PEFT/LoRA model for fine-tuning with dimension of 32 for adapter to be trained
from peft import LoraConfig, get_peft_model, TaskType
lora_config = LoraConfig(r=32, lora_alpha=32, target_modules = ["q", "v"], lora_dropout= 0.05, bias="none", task_type=TaskType.SEQ_2_SEQ_LM)

In [0]:
### percentage of trainable parameters are way less than instruct fine tuning
peft_model = get_peft_model(original_model, lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

In [0]:
### Train PEFT Adapter
output_dir = 'XXX/peft_dialogue-summary-training-'+str(int(time.time()))
peft_training_args = TrainingArguments(output_dir, auto_find_batch_size=True, learning_rate=1e-3, num_train_epochs=5, logging_steps=1, max_steps = 1)
peft_trainer = Trainer(peft_model, args=training_args, train_dataset=tokenized_datasets['train'])

In [0]:
peft_trainer.train()
eft_trainer.save_model('ZZZ')

In [0]:
### Retrieving PEFT trained model in case of saving in top cell
###peft_model = T5ForConditionalGeneration.from_pretrained("XXX")

In [0]:
### Now comparing the original model and PEFT model
index = 200
dialogue = dataset['test'][index]['dialogue']
human_baseline_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors='pt').input_ids
original_model_output = original_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_output[0], skip_special_tokens= True)

peft_model_output = peft_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_output[0], skip_special_tokens= True)


print(dash_line)
print(f'Baseline Human Summary:',human_baseline_summary)
print(dash_line)
print(f'Original model:',original_model_text_output)
print(dash_line)
print(f'PEFT model:',peft_model_text_output)

In [0]:
##### Evaluate the PEFT model Quantatively (with ROUGE metric)
rouge = evaluate.load('rouge')

dialogue = dataset['test'][0:10]['dialogue']
human_baseline_summary = dataset['test'][0:10]['summary']

original_model_summaries = []
peft_model_summaries = []

for _,dialogue in enumerate(dialogues):
  prompt = f"""
  Summarize the following conversation.
  
  {dialogue}
  
  Summary:
  """

  input_ids = tokenizer(prompt, return_tensors='pt').input_ids
  original_model_output = original_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200))
  original_model_text_output = tokenizer.decode(original_model_output[0], skip_special_tokens= True)
  original_model_summaries.append(original_model_text_output)

  peft_model_output = peft_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200))
  peft_model_text_output = tokenizer.decode(peft_model_output[0], skip_special_tokens= True)
  peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summary, original_model_summaries, peft_model_summaries))
df = pd.DataFrame(zipped_summaries, columns=['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,The memo is a memo to all employees.,Memo to all employees.
1,In order to prevent employees from wasting tim...,New memo issued to all employees today is due ...,Employees who use Instant Messaging will be pl...
2,Ms. Dawson takes a dictation for #Person1# abo...,Employees who use Instant Message will be subj...,This memo will be distributed to all employees...
3,#Person2# arrives late because of traffic jam....,The traffic jam in the city is bad for the env...,Taking public transport to work is a good option.
4,#Person2# decides to follow #Person1#'s sugges...,People are worried about their commute.,The public transport system is good for the en...
5,#Person2# complains to #Person1# about the tra...,The traffic is getting worse and worse.,#Person1: I'm here! #Person1: I'm finally here...
6,#Person1# tells Kate that Masha and Hero get d...,#Person1: Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced.,#Person1#: Masha and Hero are getting divorce....
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,Brian's birthday is coming and he's a little t...,Brian's birthday is coming up.


In [0]:
original_model_Results = rouge.compute(predictions= original_model_summaries, references = human_baseline_summary[0:len(original_model_summaries)],
                                       use_aggregator = True, use_stemmer= True,)
peft_model_results = rouge.compute(predictions= peft_model_summaries, references = human_baseline_summary[0:len(peft_model_summaries)],
                                       use_aggregator = True, use_stemmer= True,)

print('Original model: ')
print(original_model_Results)
print('PEFT model: ')
print(peft_model_results)

In [0]:
print('Absolute percentage improvement of PEFT model over Human baseline')
improvement = (np.array(list(peft_model_results.values()))) - np.array(list(original_model_Results.values()))
for key, value in zip(peft_model_results.keys(), improvement):
  print(f'{key}:{value*100:.2f}%')

In [0]:
print('Absolute percentage improvement of PEFT model over Instruct model')
improvement = (np.array(list(peft_model_results.values()))) - np.array(list(instruct_model_results.values()))
for key, value in zip(peft_model_results.keys(), improvement):
  print(f'{key}:{value*100:.2f}%')

#### Conclusion: PEFT model didn't work way better than instruct model which indicates that training full model tuning is not necessary. PEFT is working faster & less memory consuming incomparison to the instruct model