In [None]:
%pip install evaluate

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

**Load Dataset and LLM**

In [None]:
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)
dataset

In [None]:
model_name = 'google/flan-t5-base'
og_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def num_train_paras(model):
  train_paras  = 0
  all_model_paras = 0
  for _, param in model.named_parameters():
    all_model_paras+=param.numel()
    if param.requires_grad:
      train_paras+=param.numel()
  return f"trainable parameters: {train_paras}\nall model parameters: {all_model_paras}"

In [None]:
print(num_train_paras(og_model))

**Test Model with ZSI**

In [None]:
ind = 200
dialogue = dataset['test'][ind]['dialogue']
summary = dataset['test'][ind]['summary']

prompt = f"""
Summarize the following conversation:

{dialogue}

Summary:
"""

In [None]:
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    og_model.generate(
      inputs["input_ids"],
      max_new_tokens=200,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

In [None]:
inputs

**Full Fine-Tuning**

**Preprocess Dataset**

In [None]:
def tokenize_fun(eg):
  start_prompt = 'Summarize the following conversation: \n\n'
  end_prompt = '\n\nSummary: '
  prompt = [start_prompt+dialogue+end_prompt for dialogue in eg["dialogue"]]
  eg['input_ids'] = tokenizer(prompt, padding="max_length",truncation=True, return_tensors="pt").input_ids
  eg['labels'] = tokenizer(eg["summary"], padding="max_length",truncation=True, return_tensors="pt").input_ids
  return eg

In [None]:
tokenized_datasets = dataset.map(tokenize_fun, batched=True)
tokenized_datasets

In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(['id','topic','dialogue','summary'])
tokenized_datasets

In [None]:
tokenized_datasets.filter(
    lambda example, ind: ind%100 == 0,
    with_indices=True
)

**Fine-Tune Model with Preprocessed Dataset**

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')


In [None]:
# output_dir =  f'./dialogue-summary-training-{str(int(time.time()))}'
# output_dir = '/content/drive/MyDrive/dialogue-summary-training-checkpoints'

output_dir = '/kaggle/working/dialogue-summary-checkpoints'

training_args = TrainingArguments(
    output_dir = output_dir,
    learning_rate = 1e-5,
    num_train_epochs = 3,
    weight_decay = 0.01,
    logging_steps = 1,
    save_strategy="steps",
    save_steps=50,
    save_total_limit=2,
    per_device_train_batch_size=4,
    fp16=True,
    # max_steps=1,
    report_to="none"
)

trainer = Trainer(
    model=og_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

In [None]:
# checkpoint_path = Path("/content/drive/MyDrive/dialogue-summary-training-checkpoints/checkpoint-3300")

In [None]:
# trainer.train(resume_from_checkpoint=checkpoint_path)


In [None]:
# trainer.train()

In [None]:
# Step 1: Zip the final checkpoint directory
!zip -r /kaggle/working/final_checkpoint.zip /kaggle/working/dialogue-summary-checkpoints/checkpoint-4674


In [None]:
from pathlib import Path

In [None]:
checkpoint_path = Path("/kaggle/working/dialogue-summary-checkpoints/checkpoint-4674")

In [None]:
instruct_model = AutoModelForSeq2SeqLM.from_pretrained(
    checkpoint_path,
    local_files_only=True,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16
)

**Evaluate Model (Human Evaluation)**

In [None]:
ind = 200
dialogue = dataset["test"][ind]["dialogue"]
human_baseline_summary = dataset["test"][ind]["summary"]


In [None]:
prompt = f"""
Summarize the following conversation:

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors='pt').input_ids

og_input_ids = input_ids.to(og_model.device)
og_model_output = og_model.generate(
        input_ids = og_input_ids,
        generation_config = GenerationConfig(max_new_tokens=200,num_beams=1)
)
og_model_output = tokenizer.decode(og_model_output[0],skip_special_tokens=True)

inst_input_ids = input_ids.to(instruct_model.device)
inst_model_output = instruct_model.generate(
        input_ids = inst_input_ids,
        generation_config = GenerationConfig(max_new_tokens=200,num_beams=1)
)
inst_model_output = tokenizer.decode(inst_model_output[0],skip_special_tokens=True)

In [None]:
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{og_model_output}')
print(dash_line)
print(f'INSTRUCT MODEL:\n{inst_model_output}')


In [None]:
import torch
print("GPU Available:", torch.cuda.is_available())


**Evaluate Model Quantitatively (With ROUGE Metric)**

In [None]:
!pip install rouge_score


In [None]:
rouge = evaluate.load('rouge')

In [None]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

og_model_sums = []
inst_model_sums = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation:

{dialogue}

Summary: """
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    
    og_input_ids = input_ids.to(og_model.device)
    og_model_outputs = og_model.generate(input_ids=og_input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    og_model_text_output = tokenizer.decode(og_model_outputs[0], skip_special_tokens=True)
    og_model_sums.append(og_model_text_output)

    inst_input_ids = input_ids.to(instruct_model.device)
    inst_model_outputs = instruct_model.generate(input_ids=inst_input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    inst_model_text_output = tokenizer.decode(inst_model_outputs[0], skip_special_tokens=True)
    inst_model_sums.append(inst_model_text_output)
    

In [None]:
zipped_summaries = list(zip(human_baseline_summaries, og_model_sums, inst_model_sums))

In [None]:
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries'])
df

In [None]:
og_model_res = rouge.compute(
    predictions=og_model_sums,
    references=human_baseline_summaries,
    use_aggregator=True,
    use_stemmer=True,
)

inst_model_res = rouge.compute(
    predictions=inst_model_sums,
    references=human_baseline_summaries,
    use_aggregator=True,
    use_stemmer=True,
)

In [None]:
print("Original Model: ",og_model_res)
print("Instruct Model: ",inst_model_res)


In [None]:
res = pd.read_csv("/kaggle/input/dialogue-summary-training-results/dialogue-summary-training-results.csv")
res.head()
human_baseline_sums = res['human_baseline_summaries']
og_model_sums = res['original_model_summaries']
inst_model_sums = res['instruct_model_summaries']

og_model_res = rouge.compute(
    predictions = og_model_sums,
    references = human_baseline_sums,
    use_aggregator=True,
    use_stemmer=True
)

inst_model_res = rouge.compute(
    predictions=inst_model_sums,
    references=human_baseline_sums,
    use_aggregator=True,
    use_stemmer=True,
)

In [None]:
print("Original Model: ",og_model_res)
print("Instruct Model: ",inst_model_res)


**PEFT**

In [None]:
# tokenized_datasets = tokenized_datasets.filter(
#                 lambda example, ind: ind%100 == 0,
#                 with_indices=True
# )

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

In [None]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [None]:
peft_model = get_peft_model(og_model,
                           lora_config)

num_train_paras(peft_model)

**Train PEFT Adapter**

In [None]:

output_dir = '/kaggle/working/peft-dialogue-summary-checkpoints'

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    auto_find_batch_size = True,
    learning_rate = 1e-3,
    num_train_epochs = 3,
    logging_steps = 1,
    save_strategy="steps",
    save_steps=50,
    save_total_limit=2,
    per_device_train_batch_size=4,
    fp16=True,
    # max_steps=1,
    eval_strategy="steps",  
    eval_steps=50,
    report_to="none"
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

In [None]:
# peft_trainer.train()

In [None]:
peft_trainer.evaluate()

In [None]:
from peft import PeftModel, PeftConfig

In [None]:
peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

peft_model = PeftModel.from_pretrained(peft_model_base,
                                      '/kaggle/working/peft-ckpt-48-backup',
                                      torch_dtype=torch.bfloat16,
                                      is_trainable=False)

In [None]:
num_train_paras(peft_model)

**Evaluate Model Qualitatively (Human Evaluation)**

In [None]:
index = 200
dialogue = dataset['test'][index]['dialogue']
baseline_human_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_input_ids = input_ids.to(og_model.device)
original_model_outputs = og_model.generate(input_ids=original_input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

instruct_input_ids = input_ids.to(instruct_model.device)
instruct_model_outputs = instruct_model.generate(input_ids=instruct_input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)

peft_input_ids = input_ids.to(peft_model.device)
peft_model_outputs = peft_model.generate(input_ids=peft_input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'INSTRUCT MODEL:\n{instruct_model_text_output}')
print(dash_line)
print(f'PEFT MODEL: {peft_model_text_output}')

In [None]:
# !zip -r /kaggle/working/peft_final_checkpoint.zip /kaggle/working/peft-dialogue-summary-checkpoints/checkpoint-48


In [None]:
# !mv /kaggle/working/peft-dialogue-summary-checkpoints/checkpoint-48 /kaggle/working/peft-ckpt-48-backup


**Evaluate the Model Quantitatively (with ROUGE Metric)**

In [None]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

    input_ids = tokenizer(prompt, return_tensors='pt').input_ids

    human_baseline_text_output = human_baseline_summaries[idx]

    original_model_outputs = og_model.generate(input_ids=input_ids,generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    instruct_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)
    instruct_model_summaries.append(instruct_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))
 
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries', 'peft_model_summaries'])
df
    

In [None]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions = original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

instruct_model_results = rouge.compute(
    predictions=instruct_model_summaries,
    references=human_baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)
print('PEFT MODEL:')
print(peft_model_results)

In [1]:
!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace __notebook__.ipynb


This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr