In [8]:
!pip install datasets transformers evaluate

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

huggingface_dataset_name = "SetFit/bbc-news"

dataset = load_dataset(huggingface_dataset_name)

example_indices = [40, 200]

dash_line = '-' * 100

for i, index in enumerate(example_indices):
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print('INPUT News:')
    print(dataset['test'][index]['text'])
    print(dash_line)
    print(dash_line)
    print()

----------------------------------------------------------------------------------------------------
Example  1
----------------------------------------------------------------------------------------------------
INPUT News:
stars pay tribute to actor davis hollywood stars including spike lee  burt reynolds and oscar nominee alan alda have paid tribute to actor ossie davis at a funeral in new york.  veteran star ossie davis  a well-known civil rights activist  died in miami at the age of 87 on 4 february 2005. friends and family  including actress ruby dee his wife of 56 years  gathered at the riverside church on saturday. also present at the service was former us president bill clinton and singer harry belafonte  who gave the eulogy.  he would have been a very good president of the united states   said mr clinton.  like most of you here  he gave more to me than i gave to him.   the 87-year-old was found dead last weekend in his hotel room in florida  where he was making a film. police

In [9]:
model_name='google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

sentence = "lopez misses uk charity premiere jennifer lopez cancelled an appearance at the uk charity premiere of her new movie saying she was too ill to fly.  the actress and singer dropped out at the last minute and has now cancelled all european promotion of the film shall we dance  and her new album. she said:  i very much wanted to be in london but unfortunately i m not well. at the advice of my doctors i m unable to travel.  co-star richard gere attended the event held in aid of the tsunami appeal. thousands braved the cold weather to see the stars in london s leicester square. the red carpet boasted waltzing dancers in honour of the film s ballroom dancing theme. the film s director peter chelsom said he was disappointed that lopez did not attend.   it s a shame. i know it s true that she s not well because she has also cancelled her promotional tour. i ve heard she has swollen glands.  gere  55  greeted the crowd and signed autographs  accompanied by his wife carey lowell. other stars who turned out on the night included honor blackman  strictly come dancing presenter tess daly and actress anita dobson. lopez issues a statement apologising for her absence.  i m so proud of shall we dance and was looking forward to visiting london   she said.  this film was a labour of love for me  and i want to thank everyone involved in bringing it to you  from the cast  to the film director  to the crew.  lopez appeared at the grammy awards on sunday  singing a duet with her third husband marc anthony."

sentence_encoded = tokenizer(sentence, return_tensors='pt')

sentence_decoded = tokenizer.decode(
        sentence_encoded["input_ids"][0], 
        skip_special_tokens=True
    )

print('ENCODED SENTENCE:')
print(sentence_encoded["input_ids"][0])
print('\nDECODED SENTENCE:')
print(sentence_decoded)

for i, index in enumerate(example_indices):
    dialogue = dataset['test'][index]['text']
    #summary = dataset['test'][index]['']
    
    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_line)
    print(dash_line)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')
for i, index in enumerate(example_indices):
    dialogue = dataset['test'][index]['text']
    #summary = dataset['test'][index]['summary']

    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
    """

    # Input constructed prompt instead of the dialogue.
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )
    
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    #print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)    
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')    

Token indices sequence length is longer than the specified maximum sequence length for this model (563 > 512). Running this sequence through the model will result in indexing errors


ENCODED SENTENCE:
tensor([    3, 17696,   172,  3041,    15,     7,     3,  1598,  7813, 13539,
          528,    29,    29,    99,    49,     3, 17696,   172, 18454,    46,
         3179,    44,     8,     3,  1598,  7813, 13539,    13,   160,   126,
         1974,  2145,   255,    47,   396,     3,  1092,    12,  3971,     5,
            8, 15676,    11,  7634,  6292,    91,    44,     8,   336,  1962,
           11,    65,   230, 18454,    66, 14864,  5546,    13,     8,   814,
         1522,    62,  2595,    11,   160,   126,  2306,     5,   255,   243,
           10,     3,    23,   182,   231,  1114,    12,    36,    16,     3,
           40,   106,  2029,    68, 12050,     3,    23,     3,    51,    59,
          168,     5,    44,     8,  1867,    13,    82,  6659,     3,    23,
            3,    51,     3,  6319,    12,  1111,     5,   576,    18,  3624,
         2354,   986,   873,    60,  5526,     8,   605,  1213,    16,  3052,
           13,     8, 28583,  3958,     5,  29

In [13]:
for i, index in enumerate(example_indices):
    dialogue = dataset['test'][index]['text']
    #summary = dataset['test'][index]['summary']
        
    prompt = f"""
Dialogue:

{dialogue}

What was going on?
"""

    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=50,
        )[0], 
        skip_special_tokens=True
    )

    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    #print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
    print(dash_line)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

----------------------------------------------------------------------------------------------------
Example  1
----------------------------------------------------------------------------------------------------
INPUT PROMPT:

Dialogue:

stars pay tribute to actor davis hollywood stars including spike lee  burt reynolds and oscar nominee alan alda have paid tribute to actor ossie davis at a funeral in new york.  veteran star ossie davis  a well-known civil rights activist  died in miami at the age of 87 on 4 february 2005. friends and family  including actress ruby dee his wife of 56 years  gathered at the riverside church on saturday. also present at the service was former us president bill clinton and singer harry belafonte  who gave the eulogy.  he would have been a very good president of the united states   said mr clinton.  like most of you here  he gave more to me than i gave to him.   the 87-year-old was found dead last weekend in his hotel room in florida  where he was making 

In [27]:
def make_prompt(example_indices_full, example_index_to_summarize):
    prompt = ''
    for index in example_indices_full:
        dialogue = dataset['test'][index]['text']
        summary = dataset['test'][index]['text']
        
        # The stop sequence '{summary}\n\n\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.
        prompt += f"""
news:

{dialogue}

What was going on?
{summary}


"""
    
    dialogue = dataset['test'][example_index_to_summarize]['text']
    
    prompt += f"""
news:

{dialogue}

What was going on?
{summary}
"""
        
    return prompt

In [28]:
example_indices_full = [40]
example_index_to_summarize = 200

one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(one_shot_prompt)


news:

stars pay tribute to actor davis hollywood stars including spike lee  burt reynolds and oscar nominee alan alda have paid tribute to actor ossie davis at a funeral in new york.  veteran star ossie davis  a well-known civil rights activist  died in miami at the age of 87 on 4 february 2005. friends and family  including actress ruby dee his wife of 56 years  gathered at the riverside church on saturday. also present at the service was former us president bill clinton and singer harry belafonte  who gave the eulogy.  he would have been a very good president of the united states   said mr clinton.  like most of you here  he gave more to me than i gave to him.   the 87-year-old was found dead last weekend in his hotel room in florida  where he was making a film. police said that he appeared to have died of natural causes. davis made his acting debut in 1950 in no way out starring sidney poiter. he frequently collaborated with director spike lee  starring in seven lee films includin

In [29]:
example_indices_full = [40, 80, 120]
example_index_to_summarize = 200

few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)

print(few_shot_prompt)


news:

stars pay tribute to actor davis hollywood stars including spike lee  burt reynolds and oscar nominee alan alda have paid tribute to actor ossie davis at a funeral in new york.  veteran star ossie davis  a well-known civil rights activist  died in miami at the age of 87 on 4 february 2005. friends and family  including actress ruby dee his wife of 56 years  gathered at the riverside church on saturday. also present at the service was former us president bill clinton and singer harry belafonte  who gave the eulogy.  he would have been a very good president of the united states   said mr clinton.  like most of you here  he gave more to me than i gave to him.   the 87-year-old was found dead last weekend in his hotel room in florida  where he was making a film. police said that he appeared to have died of natural causes. davis made his acting debut in 1950 in no way out starring sidney poiter. he frequently collaborated with director spike lee  starring in seven lee films includin

In [31]:
generation_config = GenerationConfig(max_new_tokens=50)
# generation_config = GenerationConfig(max_new_tokens=10)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)

inputs = tokenizer(few_shot_prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        generation_config=generation_config,
    )[0], 
    skip_special_tokens=True
)

print(dash_line)
print(f'MODEL GENERATION - FEW SHOT:\n{output}')
print(dash_line)

----------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT:
What was going on? stock market eyes japan recovery martina navratilova has defended her decision to prolong her tennis career at the age of 48. navratilova who made a comeback after retiring in
----------------------------------------------------------------------------------------------------
