# 1. Import Dependencies

In [54]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [55]:
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError, CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall, Precision, Accuracy, categorical_accuracy
from tensorflow.keras.utils import to_categorical

In [56]:
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

In [57]:
# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# 2. Exploratory Data Analysis

# 2.1 Load Audio List

### WORD LIST

In [58]:

file_path = {
    "words": os.path.join("data", "words"),
    "list": os.path.join("data", "list"),
    "background_noise": os.path.join("data", "_background_noise_"),
    "presentation": os.path.join("data", "presentation")
}

# word_list = os.listdir(file_path["words"])
word_list = ["forward","backward","stop","go","on","of"]
words_exception = ['bed', 'bird', 'cat', 'dog', 'happy', 'house', 'learn', 'left', 'marvin', 'sheila', 'visual', 'wow']

### SPLITTED DATA

In [59]:
data_list = os.listdir(file_path["list"])
list_dict = {}
for _list in data_list:
    list_dict[_list.replace(".txt","")] = list(map(lambda x : x.replace("\n",""), open(os.path.join(file_path["list"], _list)).readlines()))

In [60]:
list_dict.keys()

dict_keys(['testing_list', 'training_list', 'validation_list'])

In [61]:
print("Keseluruhan Data Training :", len(list_dict["training_list"]))
print("Keseluruhan Data Testing:", len(list_dict["testing_list"]))
print("Keseluruhan Data Validation :", len(list_dict["validation_list"]))

Keseluruhan Data Training : 84843
Keseluruhan Data Testing: 11005
Keseluruhan Data Validation : 9981


### Define Limited Number of Training, Testing, and Validation Data

In [62]:
def limited_word_file_list(word_file_list:list, n:int):
    new_list = []
    word_count = 0
    curr_word = ""  
    for i in word_file_list:
        word, file = i.split('/')
        if word_count < n:
            new_list.append(i)
        if curr_word != word and curr_word != "": 
            new_list.append(i)
            word_count = 0
        curr_word = word
        word_count += 1
    return new_list


In [63]:
for key in list_dict:
    _list = list_dict[key]
    res = []
    for word in word_list:
        res.extend(list(filter(lambda x: word in x, _list)))
    list_dict[key] = res

In [64]:
training_files = limited_word_file_list(list_dict["training_list"], 1200)
testing_files = limited_word_file_list(list_dict["testing_list"], 200)
validation_files = limited_word_file_list(list_dict["validation_list"], 200)

## 2.2 Create Dataframe

In [65]:
limited_files = [testing_files, training_files, validation_files]

In [66]:
label_encoder = LabelEncoder()

df_list = pd.DataFrame()

for i in range(len(list_dict.keys())):
    curr_df = pd.DataFrame(limited_files[i])
    curr_df = curr_df.join(pd.Series(limited_files[i]).str.split("/",expand=True), lsuffix='_caller', rsuffix='_other')
    curr_df.rename(columns={"0_caller":"path"},inplace=True)
    curr_df.rename(columns={"0_other":"word"},inplace=True)
    curr_df.rename(columns={1:"file"},inplace=True)
    curr_df["usage"] = list(list_dict.keys())[i].replace("_list","")
    # Label word
    curr_df["label"] = curr_df["word"]
    curr_df["label"] = label_encoder.fit_transform(curr_df["label"])
    
    df_list = df_list.append(curr_df)
    
df_list
    

  df_list = df_list.append(curr_df)
  df_list = df_list.append(curr_df)
  df_list = df_list.append(curr_df)


Unnamed: 0,path,word,file,usage,label
0,forward/bb05582b_nohash_3.wav,forward,bb05582b_nohash_3.wav,testing,1
1,forward/f2e59fea_nohash_3.wav,forward,f2e59fea_nohash_3.wav,testing,1
2,forward/c9b5ff26_nohash_4.wav,forward,c9b5ff26_nohash_4.wav,testing,1
3,forward/837a0f64_nohash_1.wav,forward,837a0f64_nohash_1.wav,testing,1
4,forward/e49428d9_nohash_0.wav,forward,e49428d9_nohash_0.wav,testing,1
...,...,...,...,...,...
1296,off/ccea893d_nohash_1.wav,off,ccea893d_nohash_1.wav,validation,3
1297,off/c4cfbe43_nohash_1.wav,off,c4cfbe43_nohash_1.wav,validation,3
1298,off/0ea9c8ce_nohash_3.wav,off,0ea9c8ce_nohash_3.wav,validation,3
1299,off/4e6902d0_nohash_0.wav,off,4e6902d0_nohash_0.wav,validation,3


In [67]:
df = df_list[df_list['word'] != 'one']

In [68]:
df["usage"].value_counts()

training      7200
testing       1122
validation    1101
Name: usage, dtype: int64

In [69]:
pd.pivot_table(df, columns="usage",index="word",values="file",aggfunc="count")

usage,testing,training,validation
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
backward,166,1200,154
forward,155,1200,146
go,200,1200,200
off,200,1200,200
on,200,1200,200
stop,201,1200,201


In [70]:
# Define file path in the dataframe
def define_file_path(x):
    word, file_name = x.split("/")
    return os.path.join("data","words",word,file_name)

df["path"] = df["path"].map(define_file_path)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["path"] = df["path"].map(define_file_path)


## 2.3 Load WAV Function

In [71]:
def load_wav(filename):
    try :
        # Load encoded wav file
        file_contents = tf.io.read_file(filename)
        # Decode wav (tensors by channels) 
        wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
        # Removes trailing axis
        wav = tf.squeeze(wav, axis=-1)
        return wav
    except : pass

# 3. Preprocessing

## 3.1 Convert to Spectogram

In [72]:
def preprocess(file_path, label): 
    wav = load_wav(file_path)
    wav = wav[:16000]
    zero_padding = tf.zeros([16000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

## 3.2 Create DAta Partition

In [73]:
df_training = df[(df["usage"] == "training")].copy()
df_testing = df[(df["usage"] == "testing")].copy()
df_validation = df[(df["usage"] == "validation")].copy()

In [74]:
y_train = LabelBinarizer().fit_transform(df_training["word"])
y_test = LabelBinarizer().fit_transform(df_testing["word"])
y_validation = LabelBinarizer().fit_transform(df_validation["word"])

In [75]:
tf_training = tf.data.Dataset.from_tensor_slices((df_training["path"].values, y_train))
tf_testing = tf.data.Dataset.from_tensor_slices((df_testing["path"].values, y_test))
tf_validation = tf.data.Dataset.from_tensor_slices((df_validation["path"].values, y_validation))

## 3.3 Tensorflow Pipeline

In [76]:
SHUFFLE_BUFFER = 2000
BATCH_SIZE = 64

In [77]:
tf_training = tf_training.map(preprocess)
tf_training = tf_training.shuffle(SHUFFLE_BUFFER)
tf_training = tf_training.cache()
tf_training = tf_training.batch(BATCH_SIZE, drop_remainder=True)
tf_training = tf_training.prefetch(8)

In [78]:
tf_testing = tf_testing.map(preprocess)
tf_testing = tf_testing.shuffle(SHUFFLE_BUFFER)
tf_testing = tf_testing.cache()
tf_testing = tf_testing.batch(BATCH_SIZE, drop_remainder=True)
tf_testing = tf_testing.prefetch(8)

In [79]:
tf_validation = tf_validation.map(preprocess)
tf_validation = tf_validation.shuffle(SHUFFLE_BUFFER)
tf_validation = tf_validation.cache()
tf_validation = tf_validation.batch(BATCH_SIZE, drop_remainder=True)
tf_validation = tf_validation.prefetch(8)

In [80]:
samples, labels = tf_training.as_numpy_iterator().next()

# 4. Machine Learning Model

## 4.1 Create LSTM Model

In [81]:
input_shape = samples.shape[1:3]
print("input shape:", input_shape)

input shape: (491, 257)


In [82]:
model = Sequential()
model.add(layers.LSTM(128, return_sequences=True, input_shape=input_shape))
model.add(layers.LSTM(64, return_sequences=True))
model.add(layers.Dropout(0.5))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(6, activation="softmax"))

In [83]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 491, 128)          197632    
                                                                 
 lstm_3 (LSTM)               (None, 491, 64)           49408     
                                                                 
 dropout_1 (Dropout)         (None, 491, 64)           0         
                                                                 
 flatten_1 (Flatten)         (None, 31424)             0         
                                                                 
 dense_2 (Dense)             (None, 64)                2011200   
                                                                 
 dense_3 (Dense)             (None, 6)                 390       
                                                                 
Total params: 2,258,630
Trainable params: 2,258,630
No

In [84]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss=CategoricalCrossentropy(), 
              metrics=[Recall(),Precision()])

In [85]:
# EPOCHS = 4

# hist = model.fit(
#     tf_training, 
#     epochs=EPOCHS, 
#     validation_data=tf_validation, 
#     callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2)
# )

In [86]:
# tf.saved_model.save(model, "LSTM_1")

In [87]:
LOAD_MODEL_NAME = "LSTM_1"
LOAD_MODEL_PATH = os.path.join("model",LOAD_MODEL_NAME)
loaded_model = tf.saved_model.load(LOAD_MODEL_PATH)

In [97]:
actual = []
predicted = []
for tf_data in tf_testing:
    X, y = tf_data
    # Assuming 'lstm_input1' and 'lstm_input2' are the expected input names in the signature
    # Prepare input data for prediction
    input_tensor_X = tf.convert_to_tensor(X, dtype=tf.float32)
    
    input_tensor_y= tf.convert_to_tensor(y)
    input_tensor_y = tf.cast(input_tensor_y, dtype=tf.float32)
    input_tensor_y = input_tensor_y * tf.ones([64, 491, 257, 6])
    
    # Concatenate input tensors along the desired axis
    input_tensor = tf.concat([input_tensor_X, input_tensor_y], axis=0)
    
    ypred = loaded_model.signatures['serving_default'](lstm_input=input_tensor)
    y_pred = np.argmax(ypred, axis=1)
    y_test = np.argmax(y, axis=1)
    
    predicted.append(y_pred)
    actual.append(y_test)


InvalidArgumentError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [64,6] vs. [64,491,257,6] [Op:Mul]

In [None]:
actual = np.array(actual).flatten()
predicted = np.array(predicted).flatten()

In [None]:
cm = confusion_matrix(actual, predicted)
cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels=word_list)

cm_display.plot()
plt.title("Confusion Matrix ")
plt.show()