In [1]:
import os

import numpy as np
import tensorflow as tf

In [2]:
SPECTROGRAM_TRAIN_TFRECORD_FILENAME = "spectrogram_train.tfrecord"
SPECTROGRAM_VALID_TFRECORD_FILENAME = "spectrogram_valid.tfrecord"
DATA_DIR = "data"
BUFFER_SIZE = 64
BATCH_SIZE = 64
SPECTROGRAM_SHAPE = (85, 129)

In [3]:
!nvidia-smi

Tue May 11 10:40:04 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce GTX 106...  Off  | 00000000:04:00.0 Off |                  N/A |
| 32%   32C    P8     6W / 120W |      0MiB /  6078MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [4]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Load Record

In [5]:
train_dataset = tf.data.TFRecordDataset([os.path.join(DATA_DIR, SPECTROGRAM_TRAIN_TFRECORD_FILENAME)])
valid_dataset = tf.data.TFRecordDataset([os.path.join(DATA_DIR, SPECTROGRAM_VALID_TFRECORD_FILENAME)])

In [6]:
feature_description = {
    'spectrogram': tf.io.FixedLenFeature([], tf.string, default_value=''),
    'label': tf.io.FixedLenFeature([], tf.float32, default_value=0.0)
}

def _parse_function(example_proto):
    example = tf.io.parse_single_example(example_proto, feature_description)
    spectrogram = tf.io.parse_tensor(example['spectrogram'], out_type=tf.float32)
    spectrogram = tf.ensure_shape(spectrogram, SPECTROGRAM_SHAPE)
    label = tf.ensure_shape(example['label'], ())
    return spectrogram, label

In [7]:
train_dataset = train_dataset.map(_parse_function)
valid_dataset = valid_dataset.map(_parse_function)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [8]:
for i in train_dataset.take(1):
    print(i)

(<tf.Tensor: shape=(85, 129), dtype=float32, numpy=
array([[3.79017234e-01, 1.37462819e+00, 3.20527291e+00, ...,
        2.03023610e-05, 4.91823666e-05, 1.59546733e-04],
       [1.34324759e-01, 1.31664729e+00, 3.49691677e+00, ...,
        1.01808109e-04, 1.20156576e-04, 9.34302807e-05],
       [7.57609010e-02, 1.05255270e+00, 2.61501384e+00, ...,
        7.29629246e-05, 3.64155385e-05, 2.37822533e-05],
       ...,
       [3.68746817e-01, 1.46235037e+00, 3.51829767e+00, ...,
        4.68326034e-05, 5.54631079e-05, 7.57128000e-05],
       [6.25966415e-02, 9.80049014e-01, 2.85418606e+00, ...,
        8.71873126e-05, 1.01852413e-04, 8.16807151e-05],
       [3.01045269e-01, 1.19051707e+00, 2.84571457e+00, ...,
        4.19902790e-05, 1.61160388e-05, 2.91764736e-05]], dtype=float32)>, <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)


# Preparation

In [9]:
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(BUFFER_SIZE).cache()
valid_dataset = valid_dataset.batch(BATCH_SIZE).prefetch(BUFFER_SIZE).cache()

# First Model

Model parameters copied from tutorial: https://www.tensorflow.org/tutorials/audio/simple_audio

In [10]:
norm_layer = tf.keras.layers.experimental.preprocessing.Normalization()
norm_layer.adapt(train_dataset.map(lambda x, _: x))
model1 = tf.keras.models.Sequential([
    tf.keras.Input(shape=SPECTROGRAM_SHAPE),
    tf.keras.layers.Reshape((SPECTROGRAM_SHAPE[0], SPECTROGRAM_SHAPE[1], 1)),
    tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
    norm_layer,
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2)
])
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 85, 129, 1)        0         
_________________________________________________________________
resizing (Resizing)          (None, 32, 32, 1)         0         
_________________________________________________________________
normalization (Normalization (None, 32, 32, 129)       259       
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        37184     
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 64)        0

In [11]:
model1.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
               metrics=['accuracy'])

In [12]:
model1.fit(train_dataset, epochs=20, validation_data=valid_dataset, callbacks=tf.keras.callbacks.EarlyStopping(patience=2))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


<tensorflow.python.keras.callbacks.History at 0x7fdf640cf6d0>