In [20]:
from torch.utils.data import Dataset
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import torch
import os
import re
import matplotlib.pyplot as plt
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
from rouge_score import rouge_scorer
import math
device = "cuda" if torch.cuda.is_available() else "cpu"
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import random_split

In [21]:
# Define a path to save the trained model
MODEL_PATH = "chatbot_model2.pt"
BEST_MODEL_PATH = "best_chatbot_model.pt"

class ChatData(Dataset):
    def __init__(self, path:str, tokenizer):
        self.data = pd.read_csv('Questions_Canada.csv', encoding='unicode_escape')
        self.X = []

        for idx, row in self.data.iterrows():
            question = str(row['question'])  # Convert to string
            answer = str(row['answer'])      # Convert to string
            self.X.append('<startofstring> ' + question + ' <bot>: ' + answer + ' <endofstring')

        self.X_encoded = tokenizer(self.X, max_length=40, truncation=True, padding="max_length", return_tensors="pt")
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx]
        }

# Calculate BLEU score
def calculate_bleu_score(candidate_responses, reference_responses):
    bleu_scores = []
    smoothing_function = SmoothingFunction().method1  # Using smoothing function

    for candidate_response, reference_response in zip(candidate_responses, reference_responses):
        candidate_tokens = candidate_response.split()  # Tokenize candidate response
        reference_tokens = reference_response.split()  # Tokenize reference response
        bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothing_function)
        bleu_scores.append(bleu_score)

    return bleu_scores

# Calculate ROUGE score
def calculate_rouge_score(candidate_responses, reference_responses):
    rouge_scorer_instance = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = []

    for candidate_response, reference_response in zip(candidate_responses, reference_responses):
        rouge_score = rouge_scorer_instance.score(candidate_response, reference_response)
        rouge_scores.append(rouge_score)

    return rouge_scores

# Evaluate perplexity
def calculate_perplexity(model, tokenizer, dataloader, device):
    total_loss = 0
    num_batches = 0

    with torch.no_grad():
        for data in tqdm(dataloader, desc="Calculating Perplexity"):
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            output = model(input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = output.loss
            total_loss += loss.item()
            num_batches += 1

    perplexity = math.exp(total_loss / num_batches)
    return perplexity

def train(chatData, model, optimizer, device, BEST_MODEL_PATH, MODEL_PATH, reference_responses, epochs=20, patience=10):
    loss_values = []  # To store loss values
    bleu_scores = []  # To store BLEU scores
    rouge_scores = []  # To store ROUGE scores

    best_loss = float('inf')
    early_stopping_counter = 0

    for i in tqdm(range(epochs)):
        total_loss = 0
        candidate_responses = []

        for data in tqdm(chatData, desc=f"Epoch {i + 1}/{epochs}"):
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            optimizer.zero_grad()
            output = model(input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = output.loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            # Generate responses during training
            generated_response = model.generate(input_ids, attention_mask=attention_mask, max_length=60, num_return_sequences=1, no_repeat_ngram_size=2)
            response = tokenizer.decode(generated_response[0], skip_special_tokens=True)
            
            # preprocessing to fetch only answer part from generate output
            response = re.split('<bot>:', response)[-1].strip()
            response = re.split("<end", response, 1)[0].strip()
            candidate_responses.append(response)

        # Calculate BLEU scores for the epoch
        epoch_bleu_scores = calculate_bleu_score(candidate_responses, reference_responses)
        average_bleu_score = sum(epoch_bleu_scores) / len(epoch_bleu_scores)
        bleu_scores.append(average_bleu_score)

        # Calculate ROUGE scores for the epoch
        epoch_rouge_scores = calculate_rouge_score(candidate_responses, reference_responses)
        average_rouge_score = {
            'rouge1': sum(score['rouge1'].fmeasure for score in epoch_rouge_scores) / len(epoch_rouge_scores),
            'rouge2': sum(score['rouge2'].fmeasure for score in epoch_rouge_scores) / len(epoch_rouge_scores),
            'rougeL': sum(score['rougeL'].fmeasure for score in epoch_rouge_scores) / len(epoch_rouge_scores),
        }
        rouge_scores.append(average_rouge_score)

        # Log and print the loss for the epoch
        print(f"Epoch {i + 1}/{epochs} - Loss: {total_loss:.4f} - BLEU Score: {average_bleu_score:.4f} - ROUGE Score: {average_rouge_score}")

        # Append the loss values to the list for plotting
        loss_values.append(total_loss)

        # Save the model if it has the best training loss
        if total_loss < best_loss:
            best_loss = total_loss
            model.save_pretrained(BEST_MODEL_PATH)
            tokenizer.save_pretrained(BEST_MODEL_PATH)

            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        # Evaluate perplexity after each epoch
        perplexity = calculate_perplexity(model, tokenizer, chatData, device)
        print(f"Perplexity after epoch {i + 1}: {perplexity}")

        # Early stopping
        if early_stopping_counter >= patience:
            print(f"Early stopping after {patience} epochs without improvement.")
            break

    # Plot loss, BLEU scores, and ROUGE scores
    epochs_range = range(1, len(loss_values) + 1)
    plt.figure(figsize=(18, 5))
    
    plt.subplot(1, 3, 1)
    plt.plot(epochs_range, loss_values, label='Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training Loss')

    plt.subplot(1, 3, 2)
    plt.plot(epochs_range, bleu_scores, label='BLEU Score', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('BLEU Score')
    plt.legend()
    plt.title('BLEU Scores')

    plt.subplot(1, 3, 3)
    plt.plot(epochs_range, [score['rouge1'] for score in rouge_scores], label='ROUGE-1 Score', color='green')
    plt.plot(epochs_range, [score['rouge2'] for score in rouge_scores], label='ROUGE-2 Score', color='blue')
    plt.plot(epochs_range, [score['rougeL'] for score in rouge_scores], label='ROUGE-L Score', color='purple')
    plt.xlabel('Epoch')
    plt.ylabel('ROUGE Score')
    plt.legend()
    plt.title('ROUGE Scores')

    plt.tight_layout()
    plt.show()

    print("Training completed.")
# Load or initialize the model
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({"pad_token": "<pad>",
                            "bos_token": "<startofstring>",
                            "eos_token": "<endofstring>"})
tokenizer.add_tokens(["<bot>:"])

if os.path.exists(MODEL_PATH):
    model = GPT2LMHeadModel.from_pretrained(MODEL_PATH)
else:
    model = GPT2LMHeadModel.from_pretrained("gpt2")
    model.resize_token_embeddings(len(tokenizer))
#     model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=50261)

model = model.to(device)
chatData = ChatData("Questions_Canada.csv", tokenizer)
chatData = DataLoader(chatData, batch_size=4)
model.train()



optimizer = Adam(model.parameters(), lr=1e-3)

# Define reference responses for BLEU score calculation
reference_responses = [
    "Sure. A separate application form must be submitted in order to apply for your child."
]

print("Training ...")
train(chatData, model, optimizer, device, BEST_MODEL_PATH, MODEL_PATH, reference_responses)

You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embeding dimension will be 50261. This might induce some performance reduction as *Tensor Cores* will not be available. For more details  about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc


Training ...


  0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 1/20 - Loss: 397.6006 - BLEU Score: 0.0000 - ROUGE Score: {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 1: 303.54755342247023


Epoch 2/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 2/20 - Loss: 139.7923 - BLEU Score: 0.0021 - ROUGE Score: {'rouge1': 0.2222222222222222, 'rouge2': 0.0, 'rougeL': 0.1111111111111111}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 2: 128.52634243449978


Epoch 3/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 3/20 - Loss: 113.7716 - BLEU Score: 0.0116 - ROUGE Score: {'rouge1': 0.09090909090909091, 'rouge2': 0.0, 'rougeL': 0.09090909090909091}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 3: 32.45004562409555


Epoch 4/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 4/20 - Loss: 81.1315 - BLEU Score: 0.0021 - ROUGE Score: {'rouge1': 0.11764705882352941, 'rouge2': 0.0, 'rougeL': 0.11764705882352941}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 4: 10.628435965588286


Epoch 5/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 5/20 - Loss: 61.5833 - BLEU Score: 0.0000 - ROUGE Score: {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 5: 5.930998699599301


Epoch 6/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 6/20 - Loss: 46.2582 - BLEU Score: 0.0131 - ROUGE Score: {'rouge1': 0.08, 'rouge2': 0.0, 'rougeL': 0.08}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 6: 3.496980789119191


Epoch 7/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 7/20 - Loss: 33.6520 - BLEU Score: 0.0128 - ROUGE Score: {'rouge1': 0.08695652173913045, 'rouge2': 0.0, 'rougeL': 0.08695652173913045}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 7: 2.6551572325687602


Epoch 8/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 8/20 - Loss: 25.7363 - BLEU Score: 0.0000 - ROUGE Score: {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 8: 2.1928656719481814


Epoch 9/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 9/20 - Loss: 21.1861 - BLEU Score: 0.0000 - ROUGE Score: {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 9: 1.840569774583591


Epoch 10/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Epoch 10/20 - Loss: 16.0306 - BLEU Score: 0.0000 - ROUGE Score: {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}


Calculating Perplexity:   0%|          | 0/26 [00:00<?, ?it/s]

Perplexity after epoch 10: 1.665327727807432


Epoch 11/20:   0%|          | 0/26 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


KeyboardInterrupt: 