In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from scipy.io import loadmat
import pandas as pd
import numpy as np
import random as python_random
import tensorflow as tf
# fix random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
python_random.seed(42)

In [None]:
#Import CSI values
dataset = loadmat('/content/drive/My Drive/Colab Notebooks/dataset/dataset_lab_276_dl.mat')
csi = dataset['csid_lab']
csi_abs = np.abs(csi)
csi_ang = np.angle(csi)
# Concatenate and reshape
csi_tensor = np.concatenate((csi_abs,csi_ang),1)
csi_tensor = np.swapaxes(csi_tensor,0,3)
csi_tensor = np.swapaxes(csi_tensor,1,3)
csi_tensor = np.swapaxes(csi_tensor,2,3)
del dataset,csi,csi_abs, csi_ang

In [None]:
# Group the available words by their CSI values (20instances/word)
words_csi = []
for index in range(276):
    round = 0
    words_csi.append(csi_tensor[index])
    for instance in range(19):
        round += 276
        words_csi[index] = np.concatenate((words_csi[index],csi_tensor[index+round]))
    words_csi[index] = np.reshape(words_csi[index],(20,200,60,3))
del csi_tensor,index,round,instance

In [None]:
# Import sentences
data_sent = pd.read_csv("/content/drive/My Drive/Colab Notebooks/ConvLSTM/sentences.csv", header = None)
sentences = data_sent.iloc[:,0]
sentences = list(map(lambda x: x.split(" "), sentences))
word_labels = pd.read_csv("/content/drive/My Drive/Colab Notebooks/ConvLSTM/sign_labels.csv", header = None).values
# Transform each word in the generated sentences into their label value [1-276]
sentence_index = []
for sentence in sentences:
    sentence_index.append(list(map(lambda x: np.where(word_labels==x)[0][0],sentence)))
del data_sent, sentence, sentences,word_labels

In [None]:
# Keep only the sentences with 3 words
# For Technical reasons we can't take the whole data
sentence_index = sentence_index[:896]
# Transform the sentences by their CSI values
sentences_csi = []
for sentence in sentence_index:
    for instance in range(20):
        sentence_temp = []
        for word in sentence:
            sentence_temp.append(words_csi[word][instance])
        sentences_csi.append(np.concatenate(sentence_temp).reshape((-1,200,60,3)))
del instance, sentence, sentence_index, sentence_temp, word, words_csi

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Make the sequences all have the same length i.e. 3
X = pad_sequences(
    sentences_csi, maxlen=3, padding="post", truncating="post"
)
# shape(X) = (17920, 3, 200, 60, 3)
del sentences_csi

In [None]:
# Import the 896 first labels
sentence_labels = pd.read_csv("/content/drive/My Drive/Colab Notebooks/ConvLSTM/labels.csv", header = None).values.tolist()[:896]
# Multiply each label 20 times to match the input.
sent_labels = []
for sentence in sentence_labels:
    for instance in range(20):
        sent_labels.append(sentence)
del instance, sentence, sentence_labels 

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
# Encode the variables
encoder = LabelEncoder()
encoder.fit(sent_labels)
encoded_Y = encoder.transform(sent_labels)
y = to_categorical(encoded_Y)
del encoded_Y, encoder, sent_labels

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Activation, Flatten, Dense, AveragePooling2D, BatchNormalization
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# create model
def baseline_model():
    model = Sequential()
    model.add(ConvLSTM2D(8, (3,3),
                     input_shape=(3,200,60,3),
                     activation='relu'))
    model.add(BatchNormalization())
    model.add(AveragePooling2D(pool_size=(3,3)))
    model.add(Flatten())
    model.add(Dense(180))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer= 'SGD',
                  metrics=['accuracy'])
    return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=10,batch_size= 10, verbose=1)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=42)
del X,y
history = estimator.fit(X_train,y_train,validation_data=(X_test,y_test))

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for 
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
kfold = KFold(n_splits = 5, shuffle=True, random_state=42)
crossval = cross_val_score(estimator, X_train, y_train, cv = kfold)
print(crossval.mean())