In [1]:
import numpy as np
import tensorflow as tf

import os

In [38]:
SPECTROGRAM_TRAIN_TFRECORD_FILENAME = "spectrogram_train.tfrecord"
SPECTROGRAM_VALID_TFRECORD_FILENAME = "spectrogram_valid.tfrecord"
DATA_DIR = "data"
BUFFER_SIZE = 64
BATCH_SIZE = 64
SPECTROGRAM_SHAPE = (85, 129)

# Load Record

In [49]:
train_dataset = tf.data.TFRecordDataset([os.path.join(DATA_DIR, SPECTROGRAM_TRAIN_TFRECORD_FILENAME)])
valid_dataset = tf.data.TFRecordDataset([os.path.join(DATA_DIR, SPECTROGRAM_VALID_TFRECORD_FILENAME)])

In [50]:
feature_description = {
    'spectrogram': tf.io.FixedLenFeature([], tf.string, default_value=''),
    'label': tf.io.FixedLenFeature([], tf.float32, default_value=0.0)
}

def _parse_function(example_proto):
    example = tf.io.parse_single_example(example_proto, feature_description)
    spectrogram = tf.io.parse_tensor(example['spectrogram'], out_type=tf.float32)
    spectrogram = tf.ensure_shape(spectrogram, SPECTROGRAM_SHAPE)
    label = tf.ensure_shape(example['label'], ())
    return spectrogram, label

In [51]:
train_dataset = train_dataset.map(_parse_function)
valid_dataset = valid_dataset.map(_parse_function)

In [52]:
for i in train_dataset.take(1):
    print(i)

(<tf.Tensor: shape=(85, 129), dtype=float32, numpy=
array([[5.50424635e-01, 3.23084682e-01, 7.39609897e-01, ...,
        8.44748065e-05, 1.00848323e-04, 7.20620155e-05],
       [8.08760464e-01, 5.43129921e-01, 9.12065446e-01, ...,
        4.87966863e-05, 6.39859863e-05, 1.21831894e-04],
       [4.52644348e-01, 1.53675258e+00, 5.54100394e-01, ...,
        2.00303148e-05, 2.59448389e-05, 7.03334808e-05],
       ...,
       [6.99571013e-01, 8.03465366e-01, 7.09879398e-01, ...,
        7.33799025e-05, 7.61434203e-05, 9.62615013e-06],
       [3.28246504e-03, 1.66637897e+00, 2.48969793e+00, ...,
        1.12396221e-04, 1.19734090e-04, 1.23858452e-04],
       [1.18247461e+00, 2.05399251e+00, 1.59880507e+00, ...,
        8.77474740e-05, 2.21014307e-05, 2.74181366e-05]], dtype=float32)>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)


# Preparation

In [53]:
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(BUFFER_SIZE).cache()
valid_dataset = valid_dataset.batch(BATCH_SIZE).prefetch(BUFFER_SIZE).cache()

# First Model

Model parameters copied from tutorial: https://www.tensorflow.org/tutorials/audio/simple_audio

In [67]:
norm_layer = tf.keras.layers.experimental.preprocessing.Normalization()
norm_layer.adapt(train_dataset.map(lambda x, _: x))
model1 = tf.keras.models.Sequential([
    tf.keras.Input(shape=SPECTROGRAM_SHAPE),
    tf.keras.layers.Reshape((SPECTROGRAM_SHAPE[0], SPECTROGRAM_SHAPE[1], 1)),
    tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
    norm_layer,
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2)
])
model1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_3 (Reshape)          (None, 85, 129, 1)        0         
_________________________________________________________________
resizing_6 (Resizing)        (None, 32, 32, 1)         0         
_________________________________________________________________
normalization_8 (Normalizati (None, 32, 32, 129)       259       
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 30, 30, 32)        37184     
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 14, 14, 64)       

In [74]:
model1.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
               metrics=['accuracy'])

In [75]:
model1.fit(train_dataset, epochs=10, validation_data=valid_dataset, callbacks=tf.keras.callbacks.EarlyStopping(patience=2))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2a01f255790>