In [19]:
from huggingface_hub import HfApi
api = HfApi()
#models = list(api.list_models())

models = api.list_models(
    #filter=ModelFilter(task="text-classification"),
    filter={"pipeline_tag": "text-summarization"},
        sort="downloads",
        direction=-1,
        limit=5
    )
modelList = list(models)
print(modelList[0].modelId)


MuntasirHossain/flan-t5-large-samsum-qlora-merged


In [18]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig

#Text Summarization
dataset_name = "knkarthick/dialogsum" #loading the dataset from huggingface
dataset = load_dataset(dataset_name)

In [19]:
from datasets import load_dataset_builder #allows inspection of dataset before downloading

data_builder = load_dataset_builder(dataset_name)

#print(data_builder.info.description)
print(data_builder.info.features)

{'id': Value(dtype='string', id=None), 'dialogue': Value(dtype='string', id=None), 'summary': Value(dtype='string', id=None), 'topic': Value(dtype='string', id=None)}


In [20]:
#Loading some sample dialogues with their baseline
dialogues = [1, 55, 80, 150]

dash_line = '*'.join('' for x in range(100))

for i, index in enumerate(dialogues):
    print(dash_line)
    print('Sample ', i + 1)
    print(dash_line)
    print('INPUT DIALOGUE:')
    print(dataset['test'][index]['dialogue'])
    print(dash_line)
    print('BASELINE HUMAN SUMMARY:')
    print(dataset['test'][index]['summary'])
    print(dash_line)
    print()

***************************************************************************************************
Sample  1
***************************************************************************************************
INPUT DIALOGUE:
#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to co

In [4]:
# Loading FLAN-T5 Base model - text-summarization model

model_name='google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) #FLAN-T5 tokenizer



In [5]:
# Base LLM summarization without prompt engineering
for i, index in enumerate(dialogues):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']
    
    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Sample ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

2025-03-25 21:45:38.497031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-25 21:45:38.554269: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


***************************************************************************************************
Sample  1
***************************************************************************************************
INPUT PROMPT:
#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to comm

The models performance was poor

In [6]:
#Using instruction prompt
# Zero Shot Inference: wrap the dialogue in a descriptive instruction

for i, index in enumerate(dialogues):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']

    prompt = f"""Can you read and summarize the conversation?

{dialogue}

Summary:
    """

    # Input constructed prompt instead of the dialogue.
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Sample ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)    
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

***************************************************************************************************
Sample  1
***************************************************************************************************
INPUT PROMPT:
Can you read and summarize the conversation?

#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir

We can attempt different prompt instruction to see if the model summary improves

In [7]:
#Using a prompt template. The dialogue were passed into the model as prompt before the instruction

for i, index in enumerate(dialogues):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']
        
    prompt = f"""
Dialogue:

{dialogue}

What was going on?
"""

    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )

    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

***************************************************************************************************
Example  1
***************************************************************************************************
INPUT PROMPT:

Dialogue:

#Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Mess

In [8]:
#Trying One Shot and Few Shot Inference to improve the model's generation
#We will be giving the model some example dialogue and the baseline summary before introducing the task

##One-Shot Inference

def make_prompt(indices_full, index_to_summarize):
    prompt = ''
    for index in indices_full:
        dialogue = dataset['test'][index]['dialogue']
        summary = dataset['test'][index]['summary']
        
        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. 
        # Other models may have their own preferred stop sequence.
        prompt += f"""
Dialogue:

{dialogue}

What was going on?
{summary}


"""
    
    dialogue = dataset['test'][index_to_summarize]['dialogue']
    
    prompt += f"""
Dialogue:

{dialogue}

What was going on?
"""
        
    return prompt

indices_full = [48]
index_to_summarize = 150

one_shot_prompt = make_prompt(indices_full, index_to_summarize)

print(one_shot_prompt)


Dialogue:

#Person1#: Yeah. Just pull on this strip. Then peel off the back.
#Person2#: You might make a few enemies this way.
#Person1#: If they don't think this is fun, they're not meant to be our friends.
#Person2#: You mean your friends. I think it's cruel.
#Person1#: Yeah. But it's fun. Look at those two ugly old ladies. . . or are they men?
#Person2#: Hurry! Get a shot!. . . Hand it over!
#Person1#: I knew you'd come around. . .

What was going on?
#Person2# at first thinks #Person1#'s behaviour cruel but finally joins #Person1#.



Dialogue:

#Person1#: Taxi!
#Person2#: Where will you go, sir?
#Person1#: Friendship Hotel.
#Person2#: OK, it's not far from here.
#Person1#: I have something important to do, can you fast the speed?
#Person2#: Sure, I'll try my best. Here we are.
#Person1#: It's fast! How much should I pay you?
#Person2#: The reading on the meter is 15 yuan.
#Person1#: Here's 20 yuan, keep the change.
#Person2#: Thank you very much.

What was going on?



In [9]:
summary = dataset['test'][index_to_summarize]['summary']

inputs = tokenizer(one_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ONE SHOT:\n{output}')

***************************************************************************************************
BASELINE HUMAN SUMMARY:
#Person1# takes a taxi to the Friendship Hotel for something important.

***************************************************************************************************
MODEL GENERATION - ONE SHOT:
The taxi driver will take Person1 to Friendship Hotel.


In [10]:
#Few-Shot Inference

indices_full = [48, 51, 98, 102]
index_to_summarize = 150

few_shot_prompt = make_prompt(indices_full, index_to_summarize)

print(few_shot_prompt)


Dialogue:

#Person1#: Yeah. Just pull on this strip. Then peel off the back.
#Person2#: You might make a few enemies this way.
#Person1#: If they don't think this is fun, they're not meant to be our friends.
#Person2#: You mean your friends. I think it's cruel.
#Person1#: Yeah. But it's fun. Look at those two ugly old ladies. . . or are they men?
#Person2#: Hurry! Get a shot!. . . Hand it over!
#Person1#: I knew you'd come around. . .

What was going on?
#Person2# at first thinks #Person1#'s behaviour cruel but finally joins #Person1#.



Dialogue:

#Person1#: What does your sister look like, Mike?
#Person2#: Well, she's tall and pretty.
#Person1#: Is she like you?
#Person2#: I suppose so. We're both friendly and easy-going.
#Person1#: Is she as clever as you?
#Person2#: No, she's not as clever as me.
#Person1#: Big head!

What was going on?
Mike is describing his sister to #Person1#.



Dialogue:

#Person1#: Is this the workshop to prepare for an interview?
#Person2#: This is the int

In [11]:
#passing the prompts into the model to perform few shot propmting

summary = dataset['test'][index_to_summarize]['summary']

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')

Token indices sequence length is longer than the specified maximum sequence length for this model (956 > 512). Running this sequence through the model will result in indexing errors


***************************************************************************************************
BASELINE HUMAN SUMMARY:
#Person1# takes a taxi to the Friendship Hotel for something important.

***************************************************************************************************
MODEL GENERATION - FEW SHOT:
The taxi driver will pick up Person1 at Friendship Hotel at 20 yuan.


Experimenting with different number of input prompt shows no significant result from one-shot inference

We will use some configuration parameters to influence the output
available parameters can be found in [huggingface documentation](https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig)

In [12]:
# Temperature controls the randomness inthe output
generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.70)

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        generation_config=generation_config,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')

***************************************************************************************************
BASELINE HUMAN SUMMARY:
#Person1# takes a taxi to the Friendship Hotel for something important.

***************************************************************************************************
MODEL GENERATION - FEW SHOT:
The taxi meter reading is 15 yuan, so the driver will have to give 20 yuan.


**Fine-Tunning our model**

In [21]:
from transformers import TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [22]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [7]:
# We will reload the FLAN-T5 base model but the small version due to compute 
# by setting torch_dtype=torch.bfloat16

model_name='google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [23]:
# Examining the number of trainable model parameters

def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 247577856
all model parameters: 247577856
percentage of trainable model parameters: 100.00%


In [24]:
# Preprocess the dataset by converting the dialog-summary (prompt-reponse) pairs into explicit instructions
# The convert the prompt-response into tokens and pull out their input_ids (1 per token)

def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    
    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

In [18]:
#tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [25]:
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

Shapes of the datasets:
Training: (12460, 2)
Validation: (500, 2)
Test: (1500, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 500
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1500
    })
})


In [None]:
# We train the model using the hugging face trainer class. 
# pass the preprocessed dataset with reference to the originalmodel

output_dir = f'/home/tale2@ad.umbc.edu/LLM/text_summarization/flan-t5-finetuned-{str(int(time.time()))}'

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_steps=50,
    save_steps=30,
    max_steps=2000,
    evaluation_strategy="steps",
    bf16=True,   
    fp16=False  
)

trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    #tokenizer=tokenizer
)

In [None]:
trainer.train()

trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)


Step,Training Loss,Validation Loss
50,10.6189,4.994411
100,4.8505,4.30998
150,4.1922,3.424272
200,3.5511,2.381094
250,3.0023,1.845306
300,2.614,1.398491
350,2.3097,1.112099
400,2.059,0.897564
450,1.8647,0.762956
500,1.6635,0.66186


  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
  torch.cpu.

In [26]:
model_path = "/home/tale2@ad.umbc.edu/LLM/text_summarization/flan-t5-finetuned-1743735222"
fine_tuned_model = AutoModelForSeq2SeqLM.from_pretrained(model_path, torch_dtype=torch.bfloat16)

In [31]:
# Qualitative evaluation

index = 150
dialogue = dataset['test'][index]['dialogue']
human_baseline_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

fine_tuned_model_outputs = fine_tuned_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
fine_tuned_model_text_output = tokenizer.decode(fine_tuned_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'FINE-TUNE MODEL:\n{fine_tuned_model_text_output}')

***************************************************************************************************
BASELINE HUMAN SUMMARY:
#Person1# takes a taxi to the Friendship Hotel for something important.
***************************************************************************************************
ORIGINAL MODEL:
The taxi will pick you up at the Friendship Hotel at 20 yuan.
***************************************************************************************************
FINE-TUNE MODEL:
The taxi will pick you up at the Friendship Hotel at 20 yuan.
