In [None]:
import pandas as pd

data = pd.read_csv("/Game_of_Thrones_Script.csv")

data = data.drop(columns=['Release Date', 'Season', 'Episode', 'Episode Title', 'Name'])
sentences = data['Sentence']

sentences = sentences.dropna()

sentences = sentences.head(5000)


In [None]:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
from google.colab import files
import io

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
input_sequences = [tokenizer.encode(s, add_special_tokens=True) for s in sentences]

sentence_lengths = [len(tokenizer.encode(s, add_special_tokens=True)) for s in sentences]
max_sequence_len = 52

vocab_size = len(tokenizer.vocab)

x = [seq[:-1] for seq in input_sequences]
y = [seq[1:] for seq in input_sequences]

x = pad_sequences(x, maxlen= max_sequence_len-1, padding='pre')
y = pad_sequences(y, maxlen= max_sequence_len-1, padding='pre')

x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.2, random_state=123)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=123)


model = Sequential()
model.add(Embedding(vocab_size, 10, input_length= max_sequence_len-1))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(vocab_size, activation= 'softmax'))
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

model.summary()

epochs = 20
batch_size = 8

history = model.fit(x_train,
                          np.expand_dims(y_train, -1),
                          validation_data=(x_val, np.expand_dims(y_val, -1)),
                          epochs= epochs, batch_size= batch_size)

loss, accuracy = transformer.evaluate(x_test, np.expand_dims(y_test, -1))
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

plt.figure(figsize=(18, 4))

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:

plt.figure(figsize=(18, 4))

# Plot Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot Loss
plt.subplot(1, 2, 2)  # Use the second subplot
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.savefig("LSTM Accuracy and Loss Graph.png")
plt.show()
files.download("LSTM Accuracy and Loss Graph.png")

In [None]:
model.save("lstm_model.h5")
files.download("lstm_model.h5")