# Import Libraries

In [None]:
import torch
import string
import numpy as np
import pandas as pd
import os
from transformers import (
    BertTokenizer, BertForMaskedLM,
    XLNetTokenizer, XLNetLMHeadModel,
    XLMRobertaTokenizer, XLMRobertaForMaskedLM,
    BartTokenizer, BartForConditionalGeneration,
    ElectraTokenizer, ElectraForMaskedLM,
    RobertaTokenizer, RobertaForMaskedLM,
    GPT2Tokenizer, GPT2LMHeadModel
)

from IPython.display import clear_output

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Load Models

In [None]:
MODELS = {
    0: "bert-base-cased",
    1: "xlnet-base-cased",
    2: "xlm-roberta-base",
    3: "facebook/bart-large",
    4: "google/electra-small-generator",
    5: "roberta-base",
    6: "gpt2"
}

In [None]:
bert_tokenizer = BertTokenizer.from_pretrained(MODELS[0])
bert_model = BertForMaskedLM.from_pretrained(MODELS[0]).to(device).eval()
clear_output()

In [None]:
xlnet_tokenizer = XLNetTokenizer.from_pretrained(MODELS[1])
xlnet_model = XLNetLMHeadModel.from_pretrained(MODELS[1]).to(device).eval()
clear_output()

In [None]:
xlmroberta_tokenizer = XLMRobertaTokenizer.from_pretrained(MODELS[2])
xlmroberta_model = XLMRobertaForMaskedLM.from_pretrained(MODELS[2]).to(device).eval()
clear_output()

In [None]:
bart_tokenizer = BartTokenizer.from_pretrained(MODELS[3])
bart_model = BartForConditionalGeneration.from_pretrained(MODELS[3]).to(device).eval()
clear_output()

In [None]:
electra_tokenizer = ElectraTokenizer.from_pretrained(MODELS[4])
electra_model = ElectraForMaskedLM.from_pretrained(MODELS[4]).to(device).eval()
clear_output()

In [None]:
roberta_tokenizer = RobertaTokenizer.from_pretrained(MODELS[5])
roberta_model = RobertaForMaskedLM.from_pretrained(MODELS[5]).to(device).eval()
clear_output()

In [None]:
gpt2_tokenizer = GPT2Tokenizer.from_pretrained(MODELS[6])
gpt2_model = GPT2LMHeadModel.from_pretrained(MODELS[6]).to(device).eval()
clear_output()

In [None]:
sentence = "Deep learning is"

# Greedy Search Decoding

In [None]:
def greedy_search_decoding(model_name, model, tokenizer, sentence, n_steps, choices_per_step):
    input_ids = tokenizer(sentence, return_tensors="pt")["input_ids"].to(device)

    iterations = []
    for _ in range(n_steps):
        output = model(input_ids=input_ids)
        next_token_probs = torch.softmax(output.logits[0, -1, :], dim=-1)
        
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)[:choices_per_step]
        
        iteration = {
            "Model": model_name,
            "Input": tokenizer.decode(input_ids[0], skip_special_tokens=True),
            **{f"Choice {i+1}": f"{tokenizer.decode(sorted_ids[i], skip_special_tokens=True)} ({100 * next_token_probs[sorted_ids[i]].item():.2f}%)"
               for i in range(choices_per_step)}
        }

        input_ids = torch.cat([input_ids, sorted_ids[0].unsqueeze(0).unsqueeze(0)], dim=-1)
        iterations.append(iteration)

    df = pd.DataFrame(iterations)
    return df

### BERT

In [None]:
greedy_bert_df = greedy_search_decoding("BERT", bert_model, bert_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_bert_df

### XLNET

In [None]:
greedy_xlnet_df = greedy_search_decoding("XLNET", xlnet_model, xlnet_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_xlnet_df

### XLM-RoBERTa

In [None]:
greedy_xlmroberta_df = greedy_search_decoding("XLM-RoBERTa", xlmroberta_model, xlmroberta_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_xlmroberta_df

### BART

In [None]:
greedy_bart_df = greedy_search_decoding("BART", bart_model, bart_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_bart_df

### ELECTRA

In [None]:
greedy_electra_df = greedy_search_decoding("ELECTRA", electra_model, electra_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_electra_df

### RoBERTa

In [None]:
greedy_roberta_df = greedy_search_decoding("RoBERTa", roberta_model, roberta_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_roberta_df

### GPT2

In [None]:
greedy_gpt2_df = greedy_search_decoding("GPT2", gpt2_model, gpt2_tokenizer, sentence, n_steps=5, choices_per_step=5)
greedy_gpt2_df

# Beam Search Decoding 

In [None]:
def beam_search_decoding(model_name, model, tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5):
    input_ids = tokenizer(sentence, return_tensors="pt")["input_ids"].to(device)

    iterations = []
    beam = [(input_ids, 0.0)]

    for step in range(n_steps):
        candidates = []
        for input_ids, beam_score in beam:
            output = model(input_ids=input_ids)
            next_token_logits = output.logits[0, -1, :]
            next_token_probs = torch.log_softmax(next_token_logits, dim=-1)

            for punct_token in [tokenizer.encode(t, add_special_tokens=False)[0] for t in string.punctuation]:
                next_token_probs[punct_token] -= 100 

            top_k_probs, top_k_ids = torch.topk(next_token_probs, beam_width)

            token_id = top_k_ids[beam_width - 1].unsqueeze(0)
            token_log_prob = top_k_probs[beam_width - 1].item()

            new_input_ids = torch.cat([input_ids, token_id.unsqueeze(0)], dim=-1)
            new_score = beam_score + token_log_prob
            candidates.append((new_input_ids, new_score))

        candidates = sorted(candidates, key=lambda x: x[1], reverse=True)[:beam_width]

        beam = candidates

        iteration = dict()
        for i, (candidate_input_ids, candidate_score) in enumerate(beam):
            iteration["Model"] = model_name
            iteration["Input"] = tokenizer.decode(candidate_input_ids[0], skip_special_tokens=True)

            top_tokens = {}
            for j in range(choices_per_step):
                token_id = top_k_ids[j]
                token_prob = top_k_probs[j].item()
                top_tokens[f"Choice {j + 1}"] = f"{tokenizer.decode(token_id, skip_special_tokens=True)} ({100 * torch.exp(torch.tensor(token_prob)):.2f}%)"

            iteration.update(top_tokens)

        iterations.append(iteration)

    df = pd.DataFrame(iterations)
    return df

### BERT

In [None]:
beam_bert_df = beam_search_decoding("BERT", bert_model, bert_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_bert_df

### XLNET

In [None]:
beam_xlnet_df = beam_search_decoding("XLNET", xlnet_model, xlnet_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_xlnet_df

### XLM-RoBERTa 

In [None]:
beam_xlmroberta_df = beam_search_decoding("XLM-RoBERTa", xlmroberta_model, xlmroberta_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_xlmroberta_df

### BART

In [None]:
beam_bart_df = beam_search_decoding("BART", bart_model, bart_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_bart_df

### ELECTRA

In [None]:
beam_electra_df = beam_search_decoding("ELECTRA", electra_model, electra_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_electra_df

### RoBERTa

In [None]:
beam_roberta_df = beam_search_decoding("RoBERTa", roberta_model, roberta_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=5)
beam_roberta_df

### GPT2

In [None]:
beam_gpt2_df = beam_search_decoding("GPT2", gpt2_model, gpt2_tokenizer, sentence, n_steps=5, choices_per_step=5, beam_width=8)
beam_gpt2_df

# Comparison

### Greedy Search Results

In [None]:
greedy_df = pd.DataFrame({
    "Model": [
        greedy_bert_df.loc[4]["Model"],
        greedy_xlnet_df.loc[4]["Model"],
        greedy_xlmroberta_df.loc[4]["Model"],
        greedy_bart_df.loc[4]["Model"],
        greedy_electra_df.loc[4]["Model"],
        greedy_roberta_df.loc[4]["Model"],
        greedy_gpt2_df.loc[4]["Model"]
    ],
    "Output": [
        greedy_bert_df.loc[4]["Input"],
        greedy_xlnet_df.loc[4]["Input"],
        greedy_xlmroberta_df.loc[4]["Input"],
        greedy_bart_df.loc[4]["Input"],
        greedy_electra_df.loc[4]["Input"],
        greedy_roberta_df.loc[4]["Input"],
        greedy_gpt2_df.loc[4]["Input"]
    ]
})

In [None]:
greedy_df

### Beam Search Results

In [None]:
beam_df = pd.DataFrame({
    "Model": [
        beam_bert_df.loc[4]["Model"],
        beam_xlnet_df.loc[4]["Model"],
        beam_xlmroberta_df.loc[4]["Model"],
        beam_bart_df.loc[4]["Model"],
        beam_electra_df.loc[4]["Model"],
        beam_roberta_df.loc[4]["Model"],
        beam_gpt2_df.loc[4]["Model"]
    ],
    "Output": [
        beam_bert_df.loc[4]["Input"],
        beam_xlnet_df.loc[4]["Input"],
        beam_xlmroberta_df.loc[4]["Input"],
        beam_bart_df.loc[4]["Input"],
        beam_electra_df.loc[4]["Input"],
        beam_roberta_df.loc[4]["Input"],
        beam_gpt2_df.loc[4]["Input"]
    ]
})

In [None]:
beam_df