
Skyrim NPC Chatbot
---------------------
This notebook trains three Skyrim-themed chatbot models based on in-game dialogue:

1. BiLSTM Chatbot (with Keras Tokenizer & padded sequences)
2. GloVe + BiLSTM Chatbot (with pre-trained GloVe 300D embeddings)
3. DistilGPT2 Chatbot (fine-tuned using HuggingFace Transformers)

Features:
- Auto-saves models and tokenizer to Google Drive
- Handles preprocessing, tokenization, and padding
- Includes real-time chat interface for interaction
- GPT2 responses are sampled with top-k and top-p filtering
- Dataset format: Player Input ↔ NPC Response (CSV)

Data path: `/content/drive/MyDrive/chatbot/dataset_final.csv`

Model save path: `/content/drive/MyDrive/chatbot/`

Usage:
- Run each cell to preprocess, train, and chat
- Use `exit` or `quit` to leave the chat loop


Github: https://github.com/mohanchandrass/NPC-Chatbot-for-Skyrim-using-Bidirectional-LSTMs


# **1. Setup and Imports**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
import keras.backend as K
import keras.losses as losses
import os
import pandas as pd
import re
import pickle
from google.colab import drive


# **2. Mount Google Drive for Auto-Saving**

In [None]:
drive.mount('/content/drive')

drive_save_path = "/content/drive/MyDrive/chatbot/BiLstm"
os.makedirs(drive_save_path, exist_ok=True)

tf.keras.mixed_precision.set_global_policy('mixed_float16')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **3. Data Preprocessing**

In [None]:
def preprocess_text(text):
    text = text.lower().strip()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text.strip()

# **5. Load Skyrim Dialogue Dataset**

In [None]:
def load_skyrim_dialogue_dataset(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    df = pd.read_csv(file_path)

    player_inputs = df['Player Input'].astype(str).apply(preprocess_text).tolist()
    npc_responses = df['NPC Response'].astype(str).apply(preprocess_text).tolist()

    return player_inputs, npc_responses

file_path = "/content/drive/MyDrive/chatbot/dataset_final.csv"
player_inputs, npc_responses = load_skyrim_dialogue_dataset(file_path)

# **6. Tokenization and Padding**

In [None]:
tokenizer = Tokenizer(filters='', oov_token='<OOV>')
tokenizer.fit_on_texts(player_inputs + npc_responses)
vocab_size = len(tokenizer.word_index) + 1


tokenizer_path = os.path.join(drive_save_path, "tokenizer.pkl")
with open(tokenizer_path, "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

max_len = 50
input_sequences = tokenizer.texts_to_sequences(player_inputs)
response_sequences = tokenizer.texts_to_sequences(npc_responses)


input_padded = pad_sequences(input_sequences, maxlen=max_len, padding='post')
response_padded = pad_sequences(response_sequences, maxlen=max_len, padding='post')


print(f"Input padded shape: {input_padded.shape}")
print(f"Response padded shape: {response_padded.shape}")
print(f"Vocab size: {vocab_size}")


Input padded shape: (50000, 50)
Response padded shape: (50000, 50)
Vocab size: 6086


# **7. Train-Test Split**

In [None]:
X_train, X_val, y_train, y_val = train_test_split(input_padded, response_padded, test_size=0.2, random_state=42)


In [None]:
print(X_train.shape)
print(y_train.shape)
print("Shape of y_train:", y_train.shape)
print("Data type of y_train:", y_train.dtype)
print("Sample y_train values:\n", y_train[0])

(40000, 50)
(40000, 50)
Shape of y_train: (40000, 50)
Data type of y_train: int32
Sample y_train values:
 [ 25  34   2 470 702 752  29   2 294   4 769 129   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0]


# **8. Model Architecture**

In [None]:
def perplexity(y_true, y_pred):
    cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
    return tf.exp(tf.reduce_mean(cross_entropy))


In [None]:
embedding_dim = 256
lstm_units = 512
dropout_rate = 0.3

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    Bidirectional(LSTM(lstm_units, return_sequences=True)),
    Dropout(dropout_rate),
    Bidirectional(LSTM(lstm_units, return_sequences=True)),
    Dropout(dropout_rate),
    Dense(vocab_size, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy', perplexity]
)



# **9. Callback for Model and Tokenizer Saving**

In [None]:
class SaveModelAndTokenizerCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Save tokenizer
        tokenizer_path = os.path.join(drive_save_path, f"tokenizer.pkl")
        with open(tokenizer_path, "wb") as handle:
            pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

        # Save model
        epoch_model_path = os.path.join(drive_save_path, f"skyrim_chatbot_latest.keras")
        self.model.save(epoch_model_path)

        print(f"✅ Model and Tokenizer saved after epoch {epoch+1} at {epoch_model_path} and {tokenizer_path}")


# **10. Training**

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
    ModelCheckpoint(os.path.join(drive_save_path, "skyrim_chatbot_best.keras"), save_best_only=True, save_weights_only=False),
    SaveModelAndTokenizerCallback()
]

try:
    epochs = 100
    batch_size = 64

    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        callbacks=callbacks
    )


    final_model_path = os.path.join(drive_save_path, "skyrim_chatbot_final.keras")
    model.save(final_model_path)
    print(f"✅ Model training complete! Best model saved as {final_model_path}")

except KeyboardInterrupt:
    print("\n⚠️ Training interrupted! Saving latest model before exiting...")
    latest_model_path = os.path.join(drive_save_path, "skyrim_chatbot_latest.keras")
    model.save(latest_model_path)
    print(f"✅ Latest model saved as {latest_model_path}")


Epoch 1/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.8199 - loss: 1.5610 - perplexity: 116.4468✅ Model and Tokenizer saved after epoch 1 at /content/drive/MyDrive/chatbot/BiLstm/skyrim_chatbot_latest.keras and /content/drive/MyDrive/chatbot/BiLstm/tokenizer.pkl
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 112ms/step - accuracy: 0.8200 - loss: 1.5605 - perplexity: 116.2937 - val_accuracy: 0.8345 - val_loss: 1.1331 - val_perplexity: 3.1123
Epoch 2/100
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - accuracy: 0.8355 - loss: 1.1104 - perplexity: 3.0437✅ Model and Tokenizer saved after epoch 2 at /content/drive/MyDrive/chatbot/BiLstm/skyrim_chatbot_latest.keras and /content/drive/MyDrive/chatbot/BiLstm/tokenizer.pkl
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 102ms/step - accuracy: 0.8355 - loss: 1.1104 - perplexity: 3.0436 - val_accuracy: 0.8391 - val_loss: 1.0370 - v

In [None]:
model.summary()

# **11. Response Generation Function**

In [None]:
def generate_response(player_input, model, tokenizer, max_sequence_length):
    input_seq = tokenizer.texts_to_sequences([player_input])
    input_seq = pad_sequences(input_seq, maxlen=max_sequence_length, padding='post')

    predicted_seq = model.predict(input_seq)
    predicted_seq = np.argmax(predicted_seq, axis=-1)

    response = tokenizer.sequences_to_texts(predicted_seq)[0]

    if not response or response in ["<OOV>", ""]:
        return "I am not sure, traveler. Perhaps the answer lies elsewhere."

    return response


# **12. Chatbot Interaction Loop**

In [None]:
try:
    print("Welcome to the Elder Scrolls NPC Chatbot! Type 'exit' or 'quit' to end the conversation.")
    while True:
        user_input = input("You: ").strip().lower()
        if user_input in ["exit", "quit"]:
            print("NPC: Farewell, traveler. May the blessings of the Tribunal be with you!")
            break

        response = generate_response(user_input, model, tokenizer, max_len)
        print(f"NPC: {response}")

except KeyboardInterrupt:
    print("\n⚠️ Chatbot interrupted! Saving latest model before exiting...")
    latest_model_path = os.path.join(drive_save_path, "skyrim_chatbot_latest.keras")
    model.save(latest_model_path)
    print(f"✅ Latest model saved as {latest_model_path}")
    print("NPC: Farewell, traveler. May the blessings of the Tribunal be with you!")


Welcome to the Elder Scrolls NPC Chatbot! Type 'exit' or 'quit' to end the conversation.
You: hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step
NPC: greetings traveler how can i assist you today <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV>
You: hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
NPC: hello there what brings you here <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV>
You: what is skyrim
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
NPC: skyrim is the northern province of tamriel ho

# **Glove+LSTM**

*RUN CELLS 1 - 7 TILL TRAIN SPLIT BEFORE CONTINUING*

# **1. Mount Google Drive for Auto-Saving**

In [None]:
drive.mount('/content/drive')

drive_save_path = "/content/drive/MyDrive/chatbot/Glove+LSTM"
os.makedirs(drive_save_path, exist_ok=True)

tf.keras.mixed_precision.set_global_policy('mixed_float16')

Import your pretrained glove embedding

In [None]:
glove_path = "/content/drive/MyDrive/chatbot/glove.6B.300d.txt"

In [None]:
def perplexity(y_true, y_pred):
    cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
    return tf.exp(tf.reduce_mean(cross_entropy))

In [None]:
embedding_index = {}
embedding_dim = 300

with open(glove_path, encoding='utf8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = coefs

embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in tokenizer.word_index.items():
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [None]:

lstm_units = 512
dropout_rate = 0.3

model2 = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        weights=[embedding_matrix],
        input_length=max_len,
        trainable=False
    ),
    Bidirectional(LSTM(lstm_units, return_sequences=True)),
    Dropout(dropout_rate),
    Bidirectional(LSTM(lstm_units, return_sequences=True)),
    Dropout(dropout_rate),
    Dense(vocab_size, activation='softmax')
])


optimizer = Adam(learning_rate=0.001)

model2.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy', perplexity]
)




In [None]:
model2.summary()

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
    ModelCheckpoint(os.path.join(drive_save_path, "skyrim_chatbot_glove_best.keras"), save_best_only=True, save_weights_only=False),
    SaveModelAndTokenizerCallback()
]

try:
    epochs = 50
    batch_size = 128

    history = model2.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        callbacks=callbacks
    )

    final_model_path = os.path.join(drive_save_path, "skyrim_chatbot_glove_final.keras")
    model2.save(final_model_path)
    print(f"✅ Model training complete! Best model saved as {final_model_path}")

except KeyboardInterrupt:
    print("\n⚠️ Training interrupted! Saving latest model before exiting...")
    latest_model_path = os.path.join(drive_save_path, "skyrim_chatbot_glove_latest.keras")
    model2.save(latest_model_path)
    print(f"✅ Latest model saved as {latest_model_path}")


Epoch 1/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - accuracy: 0.8338 - loss: 1.1910✅ Model and Tokenizer saved after epoch 1 at /content/drive/MyDrive/chatbot/skyrim_chatbot_latest.keras and /content/drive/MyDrive/chatbot/tokenizer.pkl
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 182ms/step - accuracy: 0.8338 - loss: 1.1909 - val_accuracy: 0.8350 - val_loss: 1.1374
Epoch 2/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - accuracy: 0.8367 - loss: 1.1138✅ Model and Tokenizer saved after epoch 2 at /content/drive/MyDrive/chatbot/skyrim_chatbot_latest.keras and /content/drive/MyDrive/chatbot/tokenizer.pkl
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 167ms/step - accuracy: 0.8367 - loss: 1.1137 - val_accuracy: 0.8383 - val_loss: 1.0761
Epoch 3/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - accuracy: 0.8392 - loss: 1.0530✅ Model and Tokenizer s

NameError: name 'model' is not defined

# **Response Generation Function (THESE 2 CELLS SAME AS MODEL 1)**

In [None]:
def generate_response(player_input, model2, tokenizer, max_sequence_length):
    input_seq = tokenizer.texts_to_sequences([player_input])
    input_seq = pad_sequences(input_seq, maxlen=max_sequence_length, padding='post')

    predicted_seq = model2.predict(input_seq)
    predicted_seq = np.argmax(predicted_seq, axis=-1)

    response = tokenizer.sequences_to_texts(predicted_seq)[0]

    if not response or response in ["<OOV>", ""]:
        return "I am not sure, traveler. Perhaps the answer lies elsewhere."

    return response


# **Chatbot Interaction Loop**

In [None]:
try:
    print("Welcome to the Elder Scrolls NPC Chatbot! Type 'exit' or 'quit' to end the conversation.")
    while True:
        user_input = input("You: ").strip().lower()
        if user_input in ["exit", "quit"]:
            print("NPC: Farewell, traveler. May the blessings of the Tribunal be with you!")
            break

        response = generate_response(user_input, model, tokenizer, max_len)
        print(f"NPC: {response}")

except KeyboardInterrupt:
    print("\n⚠️ Chatbot interrupted! Saving latest model before exiting...")
    latest_model_path = os.path.join(drive_save_path, "skyrim_chatbot_latest.keras")
    model.save(latest_model_path)
    print(f"✅ Latest model saved as {latest_model_path}")
    print("NPC: Farewell, traveler. May the blessings of the Tribunal be with you!")


## **DistilGPT2**

*THIS IS STANDALONE TRAINING FIRST INSTALL THEN RUN ALL THE RESPECTIVE CELLS*

In [None]:
!pip install transformers datasets
!pip install -U transformers

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [None]:
from datasets import Dataset
import pandas as pd

# Load dataset
file_path = "/content/drive/MyDrive/chatbot/dataset_final.csv"
df = pd.read_csv(file_path)

# Clean text
df['Player Input'] = df['Player Input'].astype(str).str.lower().str.strip()
df['NPC Response'] = df['NPC Response'].astype(str).str.lower().str.strip()

# Combine input and response (for causal language modeling)
conversations = [
    {"text": f"{inp} {res}"}
    for inp, res in zip(df['Player Input'], df['NPC Response'])
]

# Convert to HuggingFace dataset
dataset = Dataset.from_list(conversations)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
tokenizer.pad_token = tokenizer.eos_token


In [None]:
def tokenize(example):
    tokens = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [None]:
import torch
import math

def compute_metrics(p):
    predictions, labels = p

    logits = predictions[0] if isinstance(predictions, tuple) else predictions

    logits = logits[:, :-1, :].contiguous()
    labels = labels[:, 1:].contiguous()

    loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100)
    loss = loss_fct(logits.view(-1, logits.size(-1)), labels.view(-1))

    try:
        perplexity = torch.exp(loss)
    except OverflowError:
        perplexity = torch.tensor(float("inf"))

    return {
        "loss": loss.item(),
        "perplexity": perplexity.item()
    }


In [None]:
from transformers import GPT2LMHeadModel, Trainer, TrainingArguments, TrainerCallback
import os

model = GPT2LMHeadModel.from_pretrained("distilgpt2")


class SaveModelAndTokenizerCallback(TrainerCallback):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def on_train_begin(self, args, state, control, **kwargs):
        pass

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        save_dir = "/content/drive/MyDrive/chatbot/gpt2_output1"

        os.makedirs(save_dir, exist_ok=True)

        model.save_pretrained(save_dir)
        self.tokenizer.save_pretrained(save_dir)

        print(f"✅ Model and Tokenizer overwritten at {save_dir} after epoch {state.epoch}")

save_callback = SaveModelAndTokenizerCallback(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/chatbot/gpt2_output1",
    per_device_train_batch_size=2,
    num_train_epochs=1, # CHANGE EPOCHS BASED ON UR HARDWARE SAME FOR BATCH SIZE
    logging_dir='./logs',
    save_total_limit=2,
    save_strategy="epoch",
    eval_steps=100,
    logging_steps=50,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    callbacks=[save_callback],
    compute_metrics=compute_metrics
)




In [None]:
trainer.train()

model.save_pretrained("/content/drive/MyDrive/chatbot/distilgpt2_chatbot")
tokenizer.save_pretrained("/content/drive/MyDrive/chatbot/distilgpt2_chatbot")


Step,Training Loss
50,2.2922
100,0.5457
150,0.4883
200,0.4657
250,0.4512
300,0.4354
350,0.4285
400,0.4205
450,0.4119
500,0.3951


('/content/drive/MyDrive/chatbot/distilgpt2_chatbot/tokenizer_config.json',
 '/content/drive/MyDrive/chatbot/distilgpt2_chatbot/special_tokens_map.json',
 '/content/drive/MyDrive/chatbot/distilgpt2_chatbot/vocab.json',
 '/content/drive/MyDrive/chatbot/distilgpt2_chatbot/merges.txt',
 '/content/drive/MyDrive/chatbot/distilgpt2_chatbot/added_tokens.json',
 '/content/drive/MyDrive/chatbot/distilgpt2_chatbot/tokenizer.json')

# **Response Generation Function and Chatbot Loop**

In [None]:
import torch

def gpt2_generate_response(input_text, model, tokenizer):
    model.eval()

    input_ids = tokenizer.encode(f"<|startoftext|>{input_text}<|endoftext|>", return_tensors='pt')
    input_ids = input_ids.to(model.device)

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=128,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            top_k=50,
            top_p=0.95,
            do_sample=True
        )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)


    response = generated_text[len(input_text):].strip()

    return response


In [None]:
print("Welcome to the Skyrim GPT-2 Chatbot! Type 'exit' to leave.")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'quit']:
        print("NPC: Farewell, Dovahkiin.")
        break

    response = gpt2_generate_response(user_input, model, tokenizer)
    print(f"NPC: {response}")


Welcome to the Skyrim GPT-2 Chatbot! Type 'exit' to leave.
You: hello
NPC: rtoftext|>hello<|startoftext|>what do you need?
You: hi
NPC: startoftext|>hi<|startoftext|>the snow elves are the descendants of the first snow elves. their homeland is vast, but they remain mysterious.
You: i didnt ask that
NPC: didnt ask that<|startoftext|>yeah, and i’ll do
You: quit
NPC: Farewell, Dovahkiin.
