### Training


In [1]:
%load_ext tensorboard

import random
import shutil
import os
import json
import numpy as np

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from sklearn.model_selection import train_test_split


2023-12-19 22:05:18.218788: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DATA_PATH = "data.json"

VALIDATION_SPLIT = 0.2 # percentage of dataset
TEST_SPLIT = 0.1 # percentage of dataset

NUM_SESSION_GROUPS = 5
LOGDIR = "logs/hparam_tuning/"
CHECKPOINT_DIR = "logs/checkpoint"
NUM_EPOCHS = 20
BATCH_SIZE = 32


In [3]:
# load training data from json file

with open(DATA_PATH, "r") as f:
    data = json.load(f)

x = np.array(data["mfcc"])
y = np.array(data["labels"])


In [4]:
# split data into train, validation and test sets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=TEST_SPLIT)
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=VALIDATION_SPLIT)

# add an axis to input sets to match the shape CNN expects (last axis is like channel in color images)
x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]
x_validation = x_validation[..., np.newaxis]


In [5]:
# choose hyperparameters to tune

# HP_CONV_LAYERS = hp.HParam("conv_layers", hp.IntInterval(1, 3))
# HP_CONV_KERNEL_SIZE = hp.HParam("conv_kernel_size", hp.Discrete([3, 5]))
# HP_POOL_SIZE = hp.HParam("conv_pool_size", hp.Discrete([2, 3]))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([32, 64]))
HP_DENSE_LAYERS = hp.HParam("dense_layers", hp.IntInterval(1, 3))
HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.2, 0.3))
HP_OPTIMIZER = hp.HParam("optimizer", hp.Discrete(["adam", "sgd"]))

HPARAMS = [
    # HP_CONV_LAYERS,
    # HP_CONV_KERNEL_SIZE,
    # HP_POOL_SIZE,
    HP_NUM_UNITS,
    HP_DENSE_LAYERS,
    HP_DROPOUT,
    HP_OPTIMIZER
]

METRICS = [
    hp.Metric(
        "epoch_accuracy",
        group="validation",
        display_name="accuracy (val)",
    ),
    hp.Metric(
        "epoch_loss",
        group="validation",
        display_name="loss (val)",
    ),
    hp.Metric(
        "batch_accuracy",
        group="train",
        display_name="accuracy (train)",
    ),
    hp.Metric(
        "batch_loss",
        group="train",
        display_name="loss (train)",
    ),
]


In [6]:
def create_model(hparams, seed):
    rng = random.Random(seed)

    INPUT_SHAPE = (x_train.shape[1], x_train.shape[2], 1)

    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=INPUT_SHAPE))

   #  conv_filters = 8
   #  for _ in range(hparams[HP_CONV_LAYERS]):
   #     model.add(
   #        tf.keras.layers.Conv2D(
   #           filters=conv_filters,
   #          #  kernel_size=hparams[HP_CONV_KERNEL_SIZE],
   #           kernel_size=3,
   #           padding="same",
   #           activation="relu"
   #        )
   #     )
   #     model.add(
   #        tf.keras.layers.MaxPooling2D(
   #           pool_size=hparams[HP_POOL_SIZE],
   #           strides=hparams[HP_POOL_SIZE]-1,
   #           padding="same"
   #        )
   #     )
   #     model.add(tf.keras.layers.BatchNormalization())
   #     conv_filters *= 2
    
    # 1st conv layer
    model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_SHAPE))
    model.add(tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())

    # 3rd conv layer\n",
    model.add(tf.keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(tf.keras.layers.BatchNormalization())
    
    # flatten output and feed it into dense layer
    model.add(tf.keras.layers.Flatten())

    for _ in range(hparams[HP_DENSE_LAYERS]):
        model.add(tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation="relu"))

    model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT], seed=rng.random()))

    # output layer
    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    model.compile(
       optimizer=hparams[HP_OPTIMIZER],
       loss='sparse_categorical_crossentropy',
       metrics=['accuracy']
    )

    return model


In [7]:
def run(base_logdir, session_id, hparams):
    model = create_model(hparams=hparams, seed=session_id)
    logdir = os.path.join(base_logdir, session_id)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=logdir,
        histogram_freq=1
    )

    hparams_callback = hp.KerasCallback(logdir, hparams)

    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=CHECKPOINT_DIR,
        save_weights_only=True,
        monitor="val_accuracy",
        mode="max",
        save_best_only=True,
        verbose=1
    )

    model.fit(
        x=x_train,
        y=y_train,
        epochs=NUM_EPOCHS,
        batch_size=BATCH_SIZE,
        shuffle=False,
        validation_data=(x_validation, y_validation),
        callbacks=[tensorboard_callback, hparams_callback, checkpoint_callback]
    )

    return model


In [8]:
def run_all(logdir, verbose=True):
    rng = random.Random(0)

    with tf.summary.create_file_writer(logdir).as_default():
        hp.hparams_config(hparams=HPARAMS, metrics=METRICS)

    sessions_per_group = 2
    num_sessions = NUM_SESSION_GROUPS * sessions_per_group
    session_index = 0  # across all session groups
    for group_index in range(NUM_SESSION_GROUPS):
        hparams = {h: h.domain.sample_uniform(rng) for h in HPARAMS}
        hparams_string = str(hparams)
        for repeat_index in range(sessions_per_group):
            session_id = str(session_index)
            session_index += 1
            if verbose:
                print(
                    "--- Running training session %d/%d"
                    % (session_index, num_sessions)
                )
                print(hparams_string)
                print("--- repeat #: %d" % (repeat_index + 1))
            model = run(
                base_logdir=logdir,
                session_id=session_id,
                hparams=hparams,
            )
    
    return model


In [9]:
# main

np.random.seed(0)
logdir = LOGDIR
shutil.rmtree(logdir, ignore_errors=True)
print(f"Saving output to {logdir}")
model = run_all(logdir=logdir, verbose=True)
print(f"Done. Output saved to {logdir}")


Saving output to logs/hparam_tuning/
--- Running training session 1/10
{HParam(name='num_units', domain=Discrete([32, 64]), display_name=None, description=None): 64, HParam(name='dense_layers', domain=IntInterval(1, 3), display_name=None, description=None): 2, HParam(name='dropout', domain=RealInterval(0.2, 0.3), display_name=None, description=None): 0.20404843781807777, HParam(name='optimizer', domain=Discrete(['adam', 'sgd']), display_name=None, description=None): 'sgd'}
--- repeat #: 1


2023-12-19 22:06:06.052055: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-19 22:06:06.339710: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-19 22:06:06.340168: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-19 22:06:06.342135: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-19 22:06:06.342670: I tensorflow/compile

Epoch 1/20


2023-12-19 22:06:08.663415: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8907
2023-12-19 22:06:08.984835: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-19 22:06:09.287086: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f9507c75190 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-19 22:06:09.287222: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 970, Compute Capability 5.2
2023-12-19 22:06:09.598412: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-19 22:06:09.684428: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 1: val_accuracy improved from -inf to 0.32130, saving model to logs/checkpoint
Epoch 2/20
Epoch 2: val_accuracy improved from 0.32130 to 0.49722, saving model to logs/checkpoint
Epoch 3/20
Epoch 3: val_accuracy improved from 0.49722 to 0.52222, saving model to logs/checkpoint
Epoch 4/20
Epoch 4: val_accuracy improved from 0.52222 to 0.54537, saving model to logs/checkpoint
Epoch 5/20
Epoch 5: val_accuracy improved from 0.54537 to 0.56019, saving model to logs/checkpoint
Epoch 6/20
Epoch 6: val_accuracy improved from 0.56019 to 0.64630, saving model to logs/checkpoint
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.64630
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.64630
Epoch 9/20
Epoch 9: val_accuracy improved from 0.64630 to 0.68981, saving model to logs/checkpoint
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.68981
Epoch 11/20
Epoch 11: val_accuracy improved from 0.68981 to 0.70000, saving model to logs/checkpoint
Epoch 12/20
Epoch 12: val_accura

2023-12-19 22:08:29.554541: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


Epoch 1: val_accuracy improved from -inf to 0.30741, saving model to logs/checkpoint
Epoch 2/20
Epoch 2: val_accuracy improved from 0.30741 to 0.53241, saving model to logs/checkpoint
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.53241
Epoch 4/20
Epoch 4: val_accuracy improved from 0.53241 to 0.59815, saving model to logs/checkpoint
Epoch 5/20
Epoch 5: val_accuracy improved from 0.59815 to 0.63796, saving model to logs/checkpoint
Epoch 6/20
Epoch 6: val_accuracy improved from 0.63796 to 0.65926, saving model to logs/checkpoint
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.65926
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.65926
Epoch 9/20
Epoch 9: val_accuracy did not improve from 0.65926
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.65926
Epoch 11/20
Epoch 11: val_accuracy did not improve from 0.65926
Epoch 12/20
Epoch 12: val_accuracy improved from 0.65926 to 0.70093, saving model to logs/checkpoint
Epoch 13/20
Epoch 13: val_accuracy did not

In [10]:
%tensorboard --logdir logs/hparam_tuning

In [None]:
# load best model from checkpoint

model.load_weights(CHECKPOINT_DIR)