# Bonus point (beam search)

In [334]:
# beam search
# Referenced from machinelearningmastery.com
def beam_search_decoder(data, k):
    sequences = [[list(), 0.0]]
    # walk over each step in sequence
    for row in data:
        all_candidates = list()
        # expand each current candidate
        for i in range(len(sequences)):
            seq, score = sequences[i]
            for j in range(len(row)):
                candidate = [seq + [j], score - log(row[j])]
                all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1])
        # select k best
        sequences = ordered[:k]
    return sequences

# Task 1

In [335]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
    StoppingCriteriaList,
    MaxLengthCriteria,
)
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

model.generation_config.pad_token_id = model.generation_config.eos_token_id

maxLength = 30

prompt = "Today I believe we can finally"
k = 20
p = 0.95

# return Pytorch tensor
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs.input_ids

In [336]:
# Greedy Search
greedy_output = model.generate(
    input_ids, 
    max_new_tokens=maxLength, 
    return_dict_in_generate=True, 
    output_scores=True
)

# Beam Search
beam_output = model.generate(
    input_ids, 
    max_new_tokens=maxLength, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    early_stopping=True,
    return_dict_in_generate=True, 
    output_scores=True
)

# Top-K
# set top_k to 20
top_k_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_new_tokens=maxLength, 
    top_k=k,
    return_dict_in_generate=True, 
    output_scores=True
)

# Top-P (nuclear sampling)

# set top_k = 20 and set top_p = 0.95
top_p_output = model.generate(
    input_ids,
    do_sample=True, 
    max_new_tokens=maxLength, 
    top_k=k, 
    top_p=p, 
    return_dict_in_generate=True, 
    output_scores=True
)



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

### Print outputs

In [337]:
print(f'Greedy Search',tokenizer.batch_decode(greedy_output[0], skip_special_tokens=True))
print(f'\nBeam Search',tokenizer.batch_decode(beam_output[0], skip_special_tokens=True))
print(f'\nTop-K',tokenizer.batch_decode(top_k_output[0], skip_special_tokens=True))
print(f'\nTop-P',tokenizer.batch_decode(top_p_output[0], skip_special_tokens=True))

Greedy Search ['Today I believe we can finally get to the point where we can make a difference in the lives of the people of the United States of America.\n\nI believe that we can']

Beam Search ['Today I believe we can finally get to the bottom of this issue.\n\n"We need to find a way to make sure that we don\'t get into a situation where we']

Top-K ['Today I believe we can finally make America a more prosperous and secure world without further complacency and insecurity."\n\n"Our nation\'s progress is not only dependent on the contributions']

Top-P ['Today I believe we can finally reach the point where the best is yet to come."\n\nI was very happy to see the news that the UK government and the UK government are']


### Generate Scores

In [338]:
input_length = inputs.input_ids.shape[1]
# Greedy
greedy_output_scores = model.compute_transition_scores(
    greedy_output.sequences, greedy_output.scores, normalize_logits=True
)
greedy_output_tokens = greedy_output.sequences[:, input_length:]

# Beam
beam_output_scores = model.compute_transition_scores(
    beam_output.sequences, beam_output.scores, normalize_logits=True
)
beam_output_tokens = beam_output.sequences[:, input_length:]

# Top-K
top_k_output_scores = model.compute_transition_scores(
    top_k_output.sequences, top_k_output.scores, normalize_logits=True
)
top_k_output_tokens = top_k_output.sequences[:, input_length:]

# Top-P
top_p_output_scores = model.compute_transition_scores(
    top_p_output.sequences, top_p_output.scores, normalize_logits=True
)
top_p_output_tokens = top_p_output.sequences[:, input_length:]

# for tok, score in zip(greedy_output_tokens[0], greedy_output_scores[0]):
#     # | token | token string | logits | probability
#     print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")


In [339]:
# https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig
def calculate_perplexity_and_likelihood(scores):

    likelihood = 0
    perplexity = 0  
    probabilities = []
    
    # Logit is normalized already    
    for score in scores[0]:
        
        logit = score.numpy()
        
        # Based on the document, since logit is normalized prob is simply np.exp(logit)
        prob = np.exp(logit)
        probabilities.append(prob)
        
    likelihood = np.sum(np.log(probabilities))
    perplexity = np.exp(-likelihood / len(probabilities))
    
    return perplexity, likelihood

In [340]:
# calculate the perplexity and likelihood for the greedy output
greedy_perplexity, greedy_likelihood = calculate_perplexity_and_likelihood(greedy_output_scores)
print(f"Greedy perplexity: {greedy_perplexity:.3f}")
print(f"Greedy likelihood: {greedy_likelihood:.2f}")

# calculate the perplexity and likelihood for the beam output
beam_perplexity, beam_likelihood = calculate_perplexity_and_likelihood(beam_output_scores)
print(f"Beam perplexity: {beam_perplexity:.3f}")
print(f"Beam likelihood: {beam_likelihood:.2f}")

# calculate the perplexity and likelihood for the top-k output
top_k_perplexity, top_k_likelihood = calculate_perplexity_and_likelihood(top_k_output_scores)
print(f"Top-k perplexity: {top_k_perplexity:.3f}")
print(f"Top-k likelihood: {top_k_likelihood:.2f}")

# calculate the perplexity and likelihood for the top-p output
top_p_perplexity, top_p_likelihood = calculate_perplexity_and_likelihood(top_p_output_scores)
print(f"Top-p perplexity: {top_p_perplexity:.3f}")
print(f"Top-p likelihood: {top_p_likelihood:.2f}")

Greedy perplexity: 3.841
Greedy likelihood: -40.37
Beam perplexity: 461.831
Beam likelihood: -184.06
Top-k perplexity: 8.427
Top-k likelihood: -63.94
Top-p perplexity: 5.215
Top-p likelihood: -49.55


# Task 2

### Samsum: https://huggingface.co/datasets/samsum


In [10]:
# To load_dataset("samsum")
# !pip install py7zr

In [11]:
from datasets import load_dataset

dataset = load_dataset("samsum")

Found cached dataset samsum (C:/Users/Jeonghoon Kim/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e)


  0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("philschmid/bart-large-cnn-samsum")

model = AutoModelForSeq2SeqLM.from_pretrained("philschmid/bart-large-cnn-samsum")

In [14]:
import pandas as pd

# create an empty DataFrame with 5 columns
df = pd.DataFrame(columns=['prompt', 'greedy_search', 'beam_search', 'top_k', 'top_p'])


# maxLength to 150 to assure full text
maxLength = 150
train = False

if (train):
    n = 50
else:
    n = 0

# Create summary from first 50 with test set 
print('Please... Be patient...\nThis takes a while...')
for i in range(n):
    prompt = dataset['test'][i]['dialogue']
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs.input_ids

    # Greedy Search
    greedy_output = model.generate(
        input_ids, 
        max_new_tokens=maxLength, 
        return_dict_in_generate=True, 
        output_scores=True
    )

    # Beam Search
    beam_output = model.generate(
        input_ids, 
        max_new_tokens=maxLength, 
        num_beams=5, 
        no_repeat_ngram_size=2, 
        early_stopping=True,
        return_dict_in_generate=True, 
        output_scores=True
    )

    # Top-K
    # set top_k to 20
    top_k_output = model.generate(
        input_ids, 
        do_sample=True, 
        max_new_tokens=maxLength, 
        top_k=k,
        return_dict_in_generate=True, 
        output_scores=True
    )

    # Top-P (nuclear sampling)
    # set top_k = 20 and set top_p = 0.95
    top_p_output = model.generate(
        input_ids,
        do_sample=True, 
        max_new_tokens=maxLength, 
        top_k=k, 
        top_p=p, 
        return_dict_in_generate=True, 
        output_scores=True
    )
    
    # convert list to string
    greedy = ' '.join(tokenizer.batch_decode(greedy_output[0], skip_special_tokens=True))
    beam = ' '.join(tokenizer.batch_decode(beam_output[0], skip_special_tokens=True))
    top_k = ' '.join(tokenizer.batch_decode(top_k_output[0], skip_special_tokens=True))
    top_p = ' '.join(tokenizer.batch_decode(top_p_output[0], skip_special_tokens=True))
    
    row = {'prompt': prompt, 'greedy_search': greedy, 'beam_search': beam, 'top_k': top_k, 'top_p': top_p}
    df = df.append(row, ignore_index=True)
    
# save to csv file
if (train):
    df.to_csv('output.csv', encoding='utf-8-sig', index=False)
else:
    pass

print('\ndf saved!')

Please... be patient...
This takes a while...

df saved!


# Task 3-1

### content overlap metrics: BLEU
### model-based metrics:  BERT score


In [59]:
# !pip install bert_score

In [79]:
print(dataset['test']['summary'][0])
print('--------------------------------------------------')
print(df['greedy_search'][0])

Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.
--------------------------------------------------
Hannah is looking for Betty's number. Larry called her last time they were at the park together. Hannah doesn't know Larry very well. Amanda suggests Hannah to text him. Hannah agrees to text Larry instead.   ...  .  


In [61]:
import pandas as pd
import evaluate

df = pd.read_csv('output.csv', encoding='utf-8-sig')

# Get 50 references
references = dataset['test']['summary'][:50]

# get all predictions
greedy_predictions = list(df['greedy_search'])
beam_predictions = list(df['beam_search'])
top_k_predictions =  list(df['top_k'])
top_p_predictions =  list(df['top_p'])

## Bert Score

In [62]:
# load evaluate metrics
bertscore = evaluate.load("bertscore")

greedy_bert = bertscore.compute(predictions=greedy_predictions, references=references, lang="en")
beam_bert = bertscore.compute(predictions=beam_predictions, references=references, lang="en")
top_k_bert = bertscore.compute(predictions=top_k_predictions, references=references, lang="en")
top_p_bert = bertscore.compute(predictions=top_p_predictions, references=references, lang="en")

## Blue

In [102]:
from nltk.tokenize import word_tokenize

# load evaluate metrics
bleu = evaluate.load("bleu")

greedy_list = []
beam_list = []
top_k_list = []
top_p_list = []

for i in range(len(greedy_predictions)):
    greedy_blue = bleu.compute(predictions=greedy_predictions[i:i+1], references=references[i:i+1])
    beam_blue = bleu.compute(predictions=list(beam_predictions[i:i+1]), references=references[i:i+1])
    top_k_blue = bleu.compute(predictions=list(top_k_predictions[i:i+1]), references=references[i:i+1])
    top_p_blue = bleu.compute(predictions=list(top_p_predictions[i:i+1]), references=references[i:i+1])
    
    greedy_list.append(greedy_blue['bleu'])
    beam_list.append(beam_blue['bleu'])
    top_k_list.append(top_k_blue['bleu'])
    top_p_list.append(top_p_blue['bleu'])


In [103]:
greedy_list

[0.0,
 0.0,
 0.0,
 0.0,
 0.22306552953519643,
 0.26675945012732133,
 0.0,
 0.12952741640890278,
 0.10369816700638208,
 0.21672328066436025,
 0.1605459569752503,
 0.153876603091887,
 0.08621434964845645,
 0.06819851521871377,
 0.11966751775324048,
 0.0,
 0.15702128402250726,
 0.21866340640321882,
 0.04465922625585483,
 0.11437790076957295,
 0.0,
 0.1840040987758381,
 0.17303353843686656,
 0.07906552205912001,
 0.1363970304374275,
 0.11336958836647044,
 0.0,
 0.0,
 0.0,
 0.25976620289907654,
 0.0,
 0.0,
 0.0,
 0.0824186035805035,
 0.2965879078152069,
 0.0,
 0.06455672843053721,
 0.0,
 0.0,
 0.3381677279168117,
 0.0,
 0.0,
 0.1806702354428209,
 0.0,
 0.09473323932047992,
 0.09212480089422209,
 0.1365091799434783,
 0.07786956590083421,
 0.0,
 0.0]

### Add Scores to df

In [104]:
df['greedy_bert_score'] = pd.Series(greedy_bert['f1'])
df['beam_bert_score'] = pd.Series(beam_bert['f1'])
df['top_k_bert_score'] = pd.Series(top_k_bert['f1'])
df['top_p_bert_score'] = pd.Series(top_p_bert['f1'])

df['greedy_bleu'] = pd.Series(greedy_list)
df['beam_bleu'] = pd.Series(beam_list)
df['top_k_bleu'] = pd.Series(top_k_list)
df['top_p_bleu'] = pd.Series(top_p_list)

In [106]:
savePath ='outputWithAutomaticEvaluation.csv'
df.to_csv(savePath, encoding='utf-8-sig', index=False)
print('df was saved to "', savePath, '"')

df was saved to " outputWithScores.csv "


### Compute Average of each evaluation