In [1]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer, DataCollatorWithPadding
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Getting the same dataset again
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

See what this dialogues contain

In [24]:
print(f"dialogue: {dataset['test'][1]['dialogue']}")
print(f"Human Summary: {dataset['test'][1]['summary']}")

dialogue: #Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.
#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with th

## Load the model

In [26]:
model_name='google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)

Now it's time to explore how well the base LLM summarizes a dialogue without any prompt engineering. **Prompt engineering** is an act of a human changing the **prompt** (input) to improve the response for a given task.

In [27]:
dash_line = '-'.join('' for x in range(100))
dialog = dataset['test'][1]['dialogue']
summary = dataset['test'][1]['summary']

inputs_to_encoder = tokenizer(dialog, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)
print(f"dialogue: {dataset['test'][1]['dialogue']}")
print(dash_line)
print(f"Human Summary: {dataset['test'][1]['summary']}")
print(dash_line)
print(f"Model Summary - without prompt engineering : {output_from_decoder}")


dialogue: #Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.
#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with th

# Zero Shot inference with instruction prompt

In [28]:
dash_line = '-'.join('' for x in range(100))
dialog = dataset['test'][1]['dialogue']
summary = dataset['test'][1]['summary']

instruction_prompt = f"""
Summarize the following conversation.

{dialog}

Summary:
    """

inputs_to_encoder = tokenizer(instruction_prompt, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)
print(f"dialogue: {dataset['test'][1]['dialogue']}")
print(dash_line)
print(f"Human Summary: {dataset['test'][1]['summary']}")
print(dash_line)
print(f"Model Summary - Zero Shot : {output_from_decoder}")

dialogue: #Person1#: Ms. Dawson, I need you to take a dictation for me.
#Person2#: Yes, sir...
#Person1#: This should go out as an intra-office memorandum to all employees by this afternoon. Are you ready?
#Person2#: Yes, sir. Go ahead.
#Person1#: Attention all staff... Effective immediately, all office communications are restricted to email correspondence and official memos. The use of Instant Message programs by employees during working hours is strictly prohibited.
#Person2#: Sir, does this apply to intra-office communications only? Or will it also restrict external communications?
#Person1#: It should apply to all communications, not only in this office between employees, but also any outside communications.
#Person2#: But sir, many employees use Instant Messaging to communicate with their clients.
#Person1#: They will just have to change their communication methods. I don't want any - one using Instant Messaging in this office. It wastes too much time! Now, please continue with th

This is much better but still a lot of nuance

## One Shot Inference
It's a way of providing LLMs with one example of prompt-response pair that match your task to give LLM some context. For example:

In [44]:
def make_prompt(index_for_prompt, index_to_summarise):
    prompt = ''
    for index in index_for_prompt:
        dialog = dataset['test'][index_for_prompt]['dialogue']
        summary = dataset['test'][index_for_prompt]['summary']
        
        prompt = f"""
        Dialogue:
        
        {dialog}
        
        What was going on?
        {summary}
        
            """ 

    dialogue = dataset['test'][index_to_summarise]['dialogue']
    
    prompt += f"""
    Dialogue:
    
    {dialogue}
    
    What was going on?
    """

    return prompt


Construct One shot prompt

In [45]:
one_shot_prompt = make_prompt([40], 200)
print(one_shot_prompt)


        Dialogue:

        ["#Person1#: What time is it, Tom?\n#Person2#: Just a minute. It's ten to nine by my watch.\n#Person1#: Is it? I had no idea it was so late. I must be off now.\n#Person2#: What's the hurry?\n#Person1#: I must catch the nine-thirty train.\n#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there."]

        What was going on?
        ['#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.']

            
    Dialogue:

    #Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd 

In [48]:
dash_line = '-'.join('' for x in range(100))


inputs_to_encoder = tokenizer(one_shot_prompt, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)

print(f"Human Summary: {dataset['test'][200]['summary']}")
print(dash_line)
print(f"Model Summary - One Shot Inference : {output_from_decoder}")

Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Model Summary - One Shot Inference : #Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to add a CD-ROM drive.


# Few Shot Inference

It's a way of providing LLMs with a few examples of prompt-response pairs typically 2-5 of a task within the prompt to help it understand the pattern before performing on new inputs. For example:

In the previous example, we gave model one example with index 40. In this example, we will provide LLMs with 3 examples with index 40, 50,  and 60.

Construct Few shot prompt

In [49]:
few_shot_prompt = make_prompt([40,50,60], 200)
print(few_shot_prompt)


        Dialogue:

        ["#Person1#: What time is it, Tom?\n#Person2#: Just a minute. It's ten to nine by my watch.\n#Person1#: Is it? I had no idea it was so late. I must be off now.\n#Person2#: What's the hurry?\n#Person1#: I must catch the nine-thirty train.\n#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.", "#Person1#: Yeah. Just pull on this strip. Then peel off the back.\n#Person2#: You might make a few enemies this way.\n#Person1#: If they don't think this is fun, they're not meant to be our friends.\n#Person2#: You mean your friends. I think it's cruel.\n#Person1#: Yeah. But it's fun. Look at those two ugly old ladies. . . or are they men?\n#Person2#: Hurry! Get a shot!. . . Hand it over!\n#Person1#: I knew you'd come around. . .", "#Person1#: Hey, Frank. I heard you got a new job.\n#Person2#: Yeah, Judy. I will be working for the Post Office. It's not a bad job.\n#Person1#: Is it true that you ha

In [51]:
dash_line = '-'.join('' for x in range(100))


inputs_to_encoder = tokenizer(few_shot_prompt, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)

print(f"Human Summary: {dataset['test'][200]['summary']}")
print(dash_line)
print(f"Model Summary - Few Shot Inference : {output_from_decoder}")

Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Model Summary - Few Shot Inference : #Person1 wants to upgrade his system and hardware.


## Experiment GenerateConfig parameters for Inference

### Adjust max_new_tokens

In this example, I am reducing the max_new_tokens value from 200 to 50

In [52]:
dash_line = '-'.join('' for x in range(100))


inputs_to_encoder = tokenizer(few_shot_prompt, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=50,
    )[0], 
    skip_special_tokens=True
)

print(f"Human Summary: {dataset['test'][200]['summary']}")
print(dash_line)
print(f"Model Summary - Few Shot Inference : {output_from_decoder}")

Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Model Summary - Few Shot Inference : #Person1 wants to upgrade his system and hardware.


### Add temperature parameter

temperature is a floating-point value (typically between 0 and 1, but sometimes up to 2), used during sampling from the model’s probability distribution over words/tokens.

Lower temperature (e.g. 0 – 0.3)
→ More deterministic, conservative output
→ Picks the most likely tokens
→ Good for facts, coding, structured tasks
→ Reproducible results

Medium temperature (e.g. 0.5 – 0.7)
→ Balanced between randomness and coherence
→ Suitable for brainstorming or more open-ended tasks

Higher temperature (e.g. 0.8 – 1.0 or higher)
→ More diverse and creative output
→ Can produce surprising or novel responses
→ Risk of hallucinations or incoherence increases

In [53]:
dash_line = '-'.join('' for x in range(100))


inputs_to_encoder = tokenizer(few_shot_prompt, return_tensors="pt")
output_from_decoder = tokenizer.decode(
    model.generate(
        inputs_to_encoder["input_ids"], 
        max_new_tokens=50, do_sample=True, temperature=0.1
    )[0], 
    skip_special_tokens=True
)

print(f"Human Summary: {dataset['test'][200]['summary']}")
print(dash_line)
print(f"Model Summary - Few Shot Inference : {output_from_decoder}")

Human Summary: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
Model Summary - Few Shot Inference : #Person1 recommends upgrading the system, adding a painting program, adding a computer and a CD-ROM drive.
