In [5]:
# %pip install torchdata==0.5.1
%pip install datasets==2.11.0

Collecting datasets==2.11.0
  Using cached datasets-2.11.0-py3-none-any.whl (468 kB)
Collecting dill<0.3.7,>=0.3.0
  Using cached dill-0.3.6-py3-none-any.whl (110 kB)
Collecting responses<0.19
  Using cached responses-0.18.0-py3-none-any.whl (38 kB)
Collecting multiprocess
  Using cached multiprocess-0.70.15-py37-none-any.whl (116 kB)
Collecting pyarrow>=8.0.0
  Using cached pyarrow-12.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.1 MB)
Collecting multiprocess
  Using cached multiprocess-0.70.14-py37-none-any.whl (115 kB)
Installing collected packages: pyarrow, dill, responses, multiprocess, datasets
Successfully installed datasets-2.11.0 dill-0.3.6 multiprocess-0.70.14 pyarrow-12.0.1 responses-0.18.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to

In [2]:
from datasets import load_dataset, Dataset, DatasetDict, load_metric
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
import json
import os

In [3]:
TEST_SUMMARY_ID=1
def transform_single_dialogsumm_file(file):
    data = open(file,"r").readlines()
    result = {"fname":[],"summary":[],"dialogue":[]}
    for i in data:
        d = json.loads(i)
        for j in d.keys():
            if j in result.keys():
                result[j].append(d[j])
    return Dataset.from_dict(result)

def transform_test_file(file):
    data = open(file,"r").readlines()
    result = {"fname":[],"summary%d"%TEST_SUMMARY_ID:[],"dialogue":[]}
    for i in data:
        d = json.loads(i)
        for j in d.keys():
            if j in result.keys():
                result[j].append(d[j])
    
    result["summary"] = result["summary%d"%TEST_SUMMARY_ID]
    return Dataset.from_dict(result)

def transform_dialogsumm_to_huggingface_dataset(train,validation,test):
    train = transform_single_dialogsumm_file(train)
    validation = transform_single_dialogsumm_file(validation)
    test = transform_test_file(test)
    return DatasetDict({"train":train,"validation":validation,"test":test})

In [4]:
dataset = transform_dialogsumm_to_huggingface_dataset("DialogSum_Data/dialogsum.train.jsonl","DialogSum_Data/dialogsum.dev.jsonl","DialogSum_Data/dialogsum.test.jsonl")

In [5]:
example_indices = [40,200]

dash_line = '-'.join('' for x in range(100))

for i, index in enumerate(example_indices):
    
    print('Example ', i+1)
    print(dash_line)
    print('***** Input Dialogue *****')
    print(dash_line)
    print(dataset['test'][index]['dialogue'])
    print(dash_line)
    print('***** Human Summary *****')
    print(dash_line)
    print(dataset['test'][index]['summary'])
    print(dash_line)
    

Example  1
---------------------------------------------------------------------------------------------------
***** Input Dialogue *****
---------------------------------------------------------------------------------------------------
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 
---------------------------------------------------------------------------------------------------
***** Human Summary *****
---------------------------------------------------------------------------------------------------
#Person1# wants to change the broken pendant i

In [6]:
model_name = 'google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

Downloading config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [7]:
    # Loading the tokernizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

Downloading tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [8]:
# Convert a raw text of our conversation into the vector space. This vector space is usable in FlanT5 model
sentence = "what time is it, Tom?"

sentence_encoded = tokenizer(sentence, return_tensors='pt')

sentence_decoded = tokenizer.decode(sentence_encoded["input_ids"][0], skip_special_tokens=True)

print ("encoded version: ", sentence_encoded['input_ids'][0])
print("decoded version: ", sentence_decoded)

encoded version:  tensor([ 125,   97,   19,   34,    6, 3059,   58,    1])
decoded version:  what time is it, Tom?


# Now we go for model with and without prompt engineering

### Without prompt engineering


In [10]:
for i, index in enumerate(example_indices):
    dialog = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']
    
    inputs = tokenizer(dialog, return_tensors='pt')
    model_out = model.generate(inputs['input_ids'], max_new_tokens=50,)
    output = tokenizer.decode(model_out[0], skip_special_tokens=True)
    
    print(f'Input Prompt:\n{dialog}')
    print(dash_line)
    print(f'Human Summary:\n{summary}')
    print(dash_line)
    print(f'Model\'s summary:{output}')
    print(dash_line)
    print(dash_line)

Input Prompt:
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 
---------------------------------------------------------------------------------------------------
Human Summary:
#Person1# wants to change the broken pendant in #Person2#'s shop.
---------------------------------------------------------------------------------------------------
Model's summary:#Person1#: Thank you very much.
---------------------------------------------------------------------------------------------------
-------------------------------------------------------------------

### With prompt engineering - Zero Shot inference

In [12]:
for i, index in enumerate(example_indices):
    dialog = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']
    
    prompt = f"""
            Summarize the following conversation.
            {dialog}
            
            Summary:
            """
    # input to tokenizer is prompt variable instead of dialog variable
    inputs = tokenizer(prompt, return_tensors='pt')
    model_out = model.generate(inputs['input_ids'], max_new_tokens=50,)
    output = tokenizer.decode(model_out[0], skip_special_tokens=True)
    
    print(f'Input Prompt:\n{dialog}')
    print(dash_line)
    print(f'Human Summary:\n{summary}')
    print(dash_line)
    print(f'Model\'s summary:{output}')
    print(dash_line)
    print(dash_line)

Input Prompt:
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 
---------------------------------------------------------------------------------------------------
Human Summary:
#Person1# wants to change the broken pendant in #Person2#'s shop.
---------------------------------------------------------------------------------------------------
Model's summary:The pendant is broken.
---------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------

Zero shot inference does not show much better summary. Let's try another prompt in zero shot inference to see the difference based on the prompt modification 

In [14]:
for i, index in enumerate(example_indices):
    dialog = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']
    
    prompt = f"""
            dialogue:
            {dialog}
            
            What was going on?
            """
    # input to tokenizer is prompt variable instead of dialog variable
    inputs = tokenizer(prompt, return_tensors='pt')
    model_out = model.generate(inputs['input_ids'], max_new_tokens=50,)
    output = tokenizer.decode(model_out[0], skip_special_tokens=True)
    
    print(f'Input Prompt:\n{dialog}')
    print(dash_line)
    print(f'Human Summary:\n{summary}')
    print(dash_line)
    print(f'Model\'s summary:{output}')
    print(dash_line)
    print(dash_line)

Input Prompt:
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 
---------------------------------------------------------------------------------------------------
Human Summary:
#Person1# wants to change the broken pendant in #Person2#'s shop.
---------------------------------------------------------------------------------------------------
Model's summary:The pendant is broken and Person1 wants to change it.
---------------------------------------------------------------------------------------------------
---------------------------------------------

## The model's summary is a little better

# In-Context Learning via one shot or few shot learning

In [16]:
def make_prompt(example_indices_full, example_index_to_summarize):
    prompt = ''
    for index in example_indices_full:
        dialog = dataset['test'][index]['dialogue']
        summary = dataset['test'][index]['summary']
        
        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequences
        prompt += f"""
Dialogue:
{dialog}

What was going on? 
{summary}



"""
    prompt += f"""
Dialogue:
{dataset['test'][example_index_to_summarize]['dialogue']}

What was going on?
"""
    return prompt

### Construct the prompt to perform inference
# one shot inference

In [17]:
example_indices_full = [40]
example_index_to_summarize = 200
one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)
print(one_shot_prompt)


Dialogue:
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 

What was going on? 
#Person1# wants to change the broken pendant in #Person2#'s shop.




Dialogue:
#Person1#: Oh, I'm starving. It's my first time to China. And I'd like to try some real Chinese cuisine. What would you recommend?
#Person2#: Well, depends. You see, there are eight famous Chinese food cuisines, for instance, Sichuan cuisine and Hunan cuisine.
#Person1#: There're all spicy or hot of heard.
#Person2#: That's right. If you have hot dishes, you can try some.
#Person1#: I cannot hav

In [36]:
summary = dataset['test'][example_index_to_summarize]['summary']

inputs = tokenizer(one_shot_prompt, return_tensors='pt')
model_out = model.generate(inputs['input_ids'], max_new_tokens=50,)
output = tokenizer.decode(model_out[0], skip_special_tokens=True)

print(dash_line)
print('Human Summary: ')
print(summary)
print(dash_line)
print('Model Summary: ')
print(output)

---------------------------------------------------------------------------------------------------
Human Summary: 
It's #Person1#'s first time to China and #Person1# wants some Chinese cuisine. #Person2# recommends some but it's too far and #Person1# is starving. Then #Person2# suggests a nearby Quanjude restaurant and its Beijing roast duck. #Person1# will go there.
---------------------------------------------------------------------------------------------------
Model Summary: 
#Person1 is looking for a restaurant in Beijing. It's a Cantonese restaurant. It's near the hotel.


### One shot learning made the summary a little bit better. So we can try few shot learning from now to see how it improves the model's performance

# Few Shot Inference

In [37]:
example_indices_full = [40,80, 120]
example_index_to_summarize = 200

few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)
print(few_shot_prompt)


Dialogue:
#Person1#: Hello, I bought the pendant in your shop, just before. 
#Person2#: Yes. Thank you very much. 
#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. 
#Person2#: Oh, is it? 
#Person1#: Would you change it to a new one? 
#Person2#: Yes, certainly. You have the receipt? 
#Person1#: Yes, I do. 
#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. 
#Person1#: Thank you so much. 

What was going on? 
#Person1# wants to change the broken pendant in #Person2#'s shop.




Dialogue:
#Person1#: Hello. Is this ABC Rent-a-car Company?
#Person2#: Yes, speaking. May I help you?
#Person1#: This morning we rented a car and we are on the way to Niagara Falls. I'm afraid we have a car accident near the border.
#Person2#: That's too bad. What kind of accident is it? Are you all right?
#Person1#: I'm all right, but my friend is seriously injured. Will you call an ambulance and the pol

In [39]:
summary = dataset['test'][example_index_to_summarize]['summary']

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
model_out = model.generate(inputs['input_ids'], max_new_tokens=50,)
output = tokenizer.decode(model_out[0], skip_special_tokens=True)

print(dash_line)
print('Human Summary: ')
print(summary)
print(dash_line)
print('Model Summary: ')
print(output)

---------------------------------------------------------------------------------------------------
Human Summary: 
It's #Person1#'s first time to China and #Person1# wants some Chinese cuisine. #Person2# recommends some but it's too far and #Person1# is starving. Then #Person2# suggests a nearby Quanjude restaurant and its Beijing roast duck. #Person1# will go there.
---------------------------------------------------------------------------------------------------
Model Summary: 
The first time to China, Person1 is looking for Chinese cuisine.


### We can see that few shot inference does not do better than one shot necessarily. 
Some people try to add more shots like 4, 5, 6 and more to improve the models performance, but the experience of teacher shows that more than 6 shots does not help much. Also, here you can see that the more than one shot inference does not help the model do better

# Generative Configuration parameters for Inference
We can change the configuration parameters to see a different output from the LLM.  So far, we only used max_new_tokens, which defines the maximum number of tokens to generate. A full list of available parameters can be foudn in the <a href="https://huggingface.co/docs/transformers/en/main_classes/text_generation" >hugggingFace Generation Documentation</a>.

A convenient way of organizing the configuration parameters is to use GenerationConfig class.

We can, for example, put do_sample=True, activate various decoding strategies which influences the next token from the probability distribution over the entire vocabulary, adjust the output temperature and other parameters (such as top_k and top_p).

<b>The parameter temperature</b>: closer to 0 give more conservative results, but closer to 1 gives more wild and bizzare results.

In [54]:
# generation_config = GenerationConfig(max_new_tokens=50)
generation_config = GenerationConfig(max_new_tokens=50,min_new_tokens=20)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True) # worse result
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1) #worse
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5) # No
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0) # No
inputs = tokenizer(few_shot_prompt, return_tensors='pt')
model_out = model.generate(inputs['input_ids'], generation_config,)
output = tokenizer.decode(model_out[0], skip_special_tokens=True)

print(dash_line)
print('Human Summary: ')
print(summary)
print(dash_line)
print('Model Summary: ')
print(output)

---------------------------------------------------------------------------------------------------
Human Summary: 
It's #Person1#'s first time to China and #Person1# wants some Chinese cuisine. #Person2# recommends some but it's too far and #Person1# is starving. Then #Person2# suggests a nearby Quanjude restaurant and its Beijing roast duck. #Person1# will go there.
---------------------------------------------------------------------------------------------------
Model Summary: 
The first time to China, Person1 is looking for Chinese cuisine.
