In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd 'drive/MyDrive/ColabNotebooks/HDA'

/content/drive/MyDrive/ColabNotebooks/HDA


In [4]:
!pip install tensorflow-io
!pip install psutil
!pip install tensorflow
!pip install keras-tuner
!pip install einops

Collecting tensorflow-io
  Downloading tensorflow_io-0.36.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.36.0


In [21]:
import threading
import psutil
import random
import time
import subprocess
import sys
import seaborn as sns
import pandas as pd
import os
import numpy as np
from config import PREPROCESSING_PATH ,DATASET_SPLIT_PATH
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from scipy.io import wavfile
import matplotlib.pyplot as plt


from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras_tuner import BayesianOptimization, HyperModel
from einops.layers.tensorflow import Rearrange
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix,accuracy_score

In [None]:
# sys.path.append(PREPROCESSING_PATH)

In [5]:
import preprocessing_tf
import evaluation

In [6]:
random.seed(42)
tf.random.set_seed(42)

In [7]:

print("Number of GPU:", len(tf.config.list_physical_devices('GPU')))
print("Version:", tf.__version__)

Number of GPU: 0
Version: 2.15.0


# Dataset Loading

To improve training efficiency, the dataset is pre-loaded into runtime, avoiding slow read speeds from the drive. It's already divided into train, validation, and test folders.

In [8]:
%cd /content/drive/MyDrive/ColabNotebooks/HDA

/content/drive/MyDrive/ColabNotebooks/HDA


In [None]:
!unzip project_data_split.zip -d /content/data

In [10]:
DATASET_SPLIT_PATH = "/content/data/project_data_split"

# Create train and validation dataset

Construct dataframes that includes the file paths and the corresponding spoken command (label) for each audio sample. The dataset comprises audio samples of 25 keywords: `backward`, `down`, `eight`, `five`, `follow`, `forward`, `four`, `go`, `learn`, `left`, `nine`, `no`, `off`, `on`, `one`, `right`, `seven`, `six`, `stop`, `three`, `two`, `up`, `visual`, `yes`, `zero`. Additionally, it contains 10 words `bed`,`bird`,`cat`, `dog`,`happy`,`house`,`marvin`,`sheila`,`tree`,`wow` that the model should not recognize as keywords.

To facilitate model training and evaluation, the labels are appropriately mapped: labels corresponding to the 25 keywords are retained in their original form, signifying that these are the commands the model is expected to recognize. Conversely, the labels for the 10 non-keyword words are mapped to a single class named "unknown". This approach consolidates these distinct non-keyword labels into a single category, simplifying the model's task by reducing the classification scope to the keywords and an "unknown" class for any non-keyword utterances.

In [11]:
train_df = preprocessing_tf.get_file_list(os.path.join(DATASET_SPLIT_PATH,"train"))
val_df = preprocessing_tf.get_file_list(os.path.join(DATASET_SPLIT_PATH,"validation"))

In [12]:
train_df.head()

Unnamed: 0,filepath,label,mapped_label
0,/content/data/project_data_split/train/down/20...,down,down
1,/content/data/project_data_split/train/down/a2...,down,down
2,/content/data/project_data_split/train/down/1d...,down,down
3,/content/data/project_data_split/train/down/a8...,down,down
4,/content/data/project_data_split/train/down/3a...,down,down


In [13]:
file_paths = tf.constant(train_df['filepath'].values)
labels = tf.constant(train_df['mapped_label'].values)

In [14]:
# Create a StringLookup layer
#label_lookup = label_lookup = tf.keras.layers.StringLookup(num_oov_indices=0)
label_lookup = tf.keras.layers.StringLookup(num_oov_indices=0)
label_lookup.adapt(labels)
# Transform labels into numeric
numeric_labels = label_lookup(labels)

# Create a TensorFlow dataset
train_dataset = tf.data.Dataset.from_tensor_slices((file_paths, numeric_labels))

In [15]:
file_paths_val = tf.constant(val_df['filepath'].values)
labels_val = tf.constant(val_df['mapped_label'].values)
numeric_labels_val = label_lookup(labels_val)
validation_dataset = tf.data.Dataset.from_tensor_slices((file_paths_val, numeric_labels_val))

# Preprocessing the Datasets

The training and validation datasets undergo preprocessing through our established pipeline. For the baseline model, the preprocessing involves only two steps: padding the data to ensure uniformity in size, which is essential for the model's input requirements, and converting the audio files into spectrograms.

In [17]:
train_spectrogram_ds = train_dataset.map(lambda fp, lbl: preprocessing_tf.preprocess_map_new(fp, lbl,noise=True, mfcc=True),
                               num_parallel_calls=tf.data.AUTOTUNE)
train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)

val_spectrogram_ds = validation_dataset.map(lambda fp, lbl: preprocessing_tf.preprocess_map_new(fp, lbl,noise=True, mfcc=True),
                               num_parallel_calls=tf.data.AUTOTUNE)

val_spectrogram_ds= val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)

Padding shape: (16000,)
/content/data/project_data_split/_background_noise_/doing_the_dishes.wav
Noisy shape: (16000,)
log_mel_spectrogram shape: (98, 40)
Mfcc shape: (98, 40, 1)
Padding shape: (16000,)
/content/data/project_data_split/_background_noise_/exercise_bike.wav
Noisy shape: (16000,)
log_mel_spectrogram shape: (98, 40)
Mfcc shape: (98, 40, 1)


In [18]:
batch_size = 32
train_spectrogram_ds = train_spectrogram_ds.batch(batch_size)
val_spectrogram_ds = val_spectrogram_ds.batch(batch_size)

In [19]:
input_shape =train_spectrogram_ds.element_spec[0].shape[1:]
print('Input shape:', input_shape)
num_labels = len(label_lookup.get_vocabulary())
print('Number of labels:', num_labels)

Input shape: (98, 40, 1)
Number of labels: 26


# CNN Model

In [27]:
def create_cnn_tstride8_model(hp):
    num_labels = 26

    model = tf.keras.Sequential([
        layers.Conv2D(filters=hp.Int('filters', min_value=64, max_value=256, step=32),
                      kernel_size=(16, 8),
                      strides=(8, 1),
                      activation='relu',
                      input_shape=input_shape),
        layers.MaxPooling2D(pool_size=(3, 1), strides=(1, 1)),
        layers.Flatten(),
        layers.Dense(units=hp.Int('units', min_value=32, max_value=256, step=32), activation='relu'),
        layers.Dense(num_labels, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [28]:

tuner = BayesianOptimization(
    create_cnn_tstride8_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='bayesian_optimization',
    project_name='cnn_tstride8'
)


tuner.search(x=train_spectrogram_ds,
             validation_data=val_spectrogram_ds,
             epochs=10,
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])


best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best hyperparameters: {best_hps}")


best_model = tuner.hypermodel.build(best_hps)


best_model.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])


best_model.summary()


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
192               |192               |filters
160               |160               |units

Epoch 1/10


  _, data = wavfile.read(file_path)




KeyboardInterrupt: 

# Evaluation

In [None]:
test_df = preprocessing_tf.get_file_list(os.path.join(DATASET_SPLIT_PATH,"test"))
file_paths_test = tf.constant(test_df['filepath'].values)
labels_test = tf.constant(test_df['mapped_label'].values)
numeric_labels_test = label_lookup(labels_test)
test_dataset = tf.data.Dataset.from_tensor_slices((file_paths_test, numeric_labels_test))


In [None]:
test_spectrogram_ds = test_dataset.map(lambda fp, lbl: preprocessing_tf.preprocess_map_new(fp, lbl, noise=True),
                               num_parallel_calls=tf.data.AUTOTUNE)

Padding shape: (16000,)
/content/data/project_data_split/_background_noise_/running_tap.wav
Noisy shape: (16000,)
Signal shape: (16000,)
Spectrogram shape: (124, 129, 1)


In [None]:
batch_size = 32
test_spectrogram_ds = test_spectrogram_ds.batch(batch_size)

In [None]:
stop_event = threading.Event()
log_file_path = 'gpu_log_files/gpu_usage_baseline_model_test.txt'
interval = 10

In [None]:
thread = threading.Thread(target=evaluation.log_gpu_usage, args=(log_file_path, stop_event, interval), daemon=True)
thread.start()

In [None]:
start_time = time.time()
predictions = best_model.predict(test_spectrogram_ds)
end_time = time.time()
#cpu_usage_test = psutil.cpu_percent()
cpu_usage_test = evaluation.get_system_ram_usage()
stop_event.set()
total_time_test = end_time - start_time
print(f"Total testing time: {total_time_test} seconds")

In [None]:
df_gpu_test = evaluation.get_gpu_usage(log_file_path)

In [None]:
true_labels = np.concatenate([y for x, y in test_spectrogram_ds], axis=0)
metrics_df = evaluation.get_error_metrics("CNN Model", true_labels, predictions)
metrics_df.head()

In [None]:
evaluation.plot_confusion_matrix(true_labels,predictions,label_lookup)

# Summary

In [None]:
mean_train = df_gpu_train['Memory Usage (MiB)'].mean()
mean_test = df_gpu_test['Memory Usage (MiB)'].mean()
max_train = df_gpu_train['Memory Usage (MiB)'].max()
max_test = df_gpu_test['Memory Usage (MiB)'].max()
total_params = model.count_params()

In [None]:
print(f"Training time: {total_time_train:.3f} seconds")
print(f"Average GPU usage train: {mean_train:.3f} MiB")
print(f"Maximum GPU usage train: {max_train:.3f} MiB")
print(f"CPU usage train: {cpu_usage_train:.3f} GB")

print(f"Testing time: {total_time_test:.3f} seconds")
print(f"Average GPU usage test: {mean_test:.3f} MiB")
print(f"Maximum GPU usage test: {max_test:.3f} MiB")
print(f"CPU usage test: {cpu_usage_test:.3f} GB")

print(f"Number of parameters: {total_params}")

#Number of model parameters

metrics_df.head()

Training time: 164.835 seconds
Average GPU usage train: 338.217 MiB
Maximum GPU usage train: 381.000 MiB
CPU usage train: 11.402 GB
Testing time: 53.298 seconds
Average GPU usage test: 381.000 MiB
Maximum GPU usage test: 381.000 MiB
CPU usage test: 11.330 GB
Number of parameters: 1627933


Unnamed: 0,Model Name,Precision,Recall,F1 Score,Accuracy,Cross-Entropy Loss
0,Baseline Model,0.548313,0.266889,0.241465,0.338119,2.509869


#References
