In [None]:
# Import packages
import pandas as pd
import numpy as np
import tensorflow as tf
import random as python_random
# fix random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
python_random.seed(42)

In [None]:
# Import sentences
sentences = pd.read_csv("/content/drive/My Drive/Colab Notebooks/LSTM/sentences.csv", header = None).iloc[:,0]
sentences = list(map(lambda x: x.split(" "),sentences))
# Import labels
labels = pd.read_csv("/content/drive/My Drive/Colab Notebooks/LSTM/labels2.csv", header = None).iloc[:,0]

In [None]:
# Transform each word in the generated sentences into their label value [1-276]
word_labels = pd.read_csv("/content/drive/My Drive/Colab Notebooks/LSTM/sign_labels.csv", header = None).values
sentence_index = []
for sentence in sentences:
    sentence_index.append(list(map(lambda x: np.where(word_labels==x)[0][0],sentence)))

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

X = pad_sequences(
    sentence_index, maxlen= 4, padding="post", truncating="post"
)

encoder = LabelEncoder()
encoder.fit(labels.ravel())
encoded_Y = encoder.transform(labels.ravel())
y = to_categorical(encoded_Y)

In [None]:
# Split the dataset into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, shuffle= True, random_state = 42)

X_train = tf.convert_to_tensor(X_train)
X_test = tf.convert_to_tensor(X_test)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Activation
from tensorflow.keras import optimizers

# Define the model 
def create_model():
  model = Sequential()
  model.add(Embedding(276, 32, input_length=4))
  model.add(LSTM(512, dropout=0.1))
  model.add(Dense(2421))
  model.add(Activation('softmax'))
  optimizer = optimizers.Adam(learning_rate=3e-4)
  model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
  return model

In [None]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Fitting and prediction
estimator = KerasClassifier(build_fn=create_model, epochs=10, batch_size=10, verbose=1)
history = estimator.fit(
    X_train, y_train, validation_data=(X_test, y_test)
)

In [None]:
from sklearn.model_selection import KFold, cross_val_score
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv = cross_val_score(estimator,X,y,cv=kfold)
print(cv.mean())

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()