In [4]:
import numpy as np
import tensorflow as tf
import keras
from datasets import Dataset, load_dataset
import librosa
from keras import layers, models

print("Tensorflow version:", tf.__version__)
print("Keras version:", keras.__version__)
print("Numpy version:", np.__version__)

  from .autonotebook import tqdm as notebook_tqdm


Tensorflow version: 2.16.2
Keras version: 3.8.0
Numpy version: 1.26.4


In [5]:
dataset = load_dataset("google/speech_commands", "v0.01")
# Overview of the dataset
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['file', 'audio', 'label', 'is_unknown', 'speaker_id', 'utterance_id'],
        num_rows: 51093
    })
    validation: Dataset({
        features: ['file', 'audio', 'label', 'is_unknown', 'speaker_id', 'utterance_id'],
        num_rows: 6799
    })
    test: Dataset({
        features: ['file', 'audio', 'label', 'is_unknown', 'speaker_id', 'utterance_id'],
        num_rows: 3081
    })
})


In [6]:
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']

In [7]:
# Function to get a 10% subset
def get_subset(dataset_split, fraction=0.1):
    return dataset_split.train_test_split(test_size=1 - fraction)["train"]

# Creating 10% subsets
train_subset = get_subset(train_dataset)
validation_subset = get_subset(validation_dataset)
test_subset = get_subset(test_dataset)

In [8]:

# Check available devices
print("Available devices:")
for device in tf.config.list_physical_devices():
    print(device)

Available devices:
PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [9]:

def preprocess_audio(set):

    audio_array = set['audio']['array']
    sampling_rate = set['audio']['sampling_rate']

    mel_spectogram = librosa.feature.melspectrogram(y=audio_array, sr=sampling_rate, n_mels=128)

    log_mel_spectogram = librosa.power_to_db(mel_spectogram)

    log_mel_spectogram = np.expand_dims(log_mel_spectogram, axis=-1)

    return {'audio': log_mel_spectogram}

train_dataset = train_subset.map(preprocess_audio)
validation_dataset = validation_subset.map(preprocess_audio)
test_dataset = test_subset.map(preprocess_audio)

Map: 100%|██████████| 5109/5109 [00:34<00:00, 146.43 examples/s]
Map: 100%|██████████| 679/679 [00:04<00:00, 158.58 examples/s]
Map: 100%|██████████| 308/308 [00:02<00:00, 146.18 examples/s]


In [10]:
# A simple CNN model
model = models.Sequential([
    layers.InputLayer(shape=(128, 32, 1)),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(30, activation='softmax') # Number of possible commands
])

# Compiling
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

2025-03-17 14:07:12.098808: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-03-17 14:07:12.099522: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-03-17 14:07:12.099546: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-03-17 14:07:12.099940: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-03-17 14:07:12.099961: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:
def audio_generator(dataset):
    for sample in dataset:
        audio_features = sample['audio']
        label = sample['label']
        
        # Convert audio_features to a numpy array (if it's not already)
        audio_features = np.array(audio_features)
        
        # Ensure the audio features have the shape (128, 32, 1)
        # Pad or truncate if necessary (this assumes the audio data is 2D, with shape (128, n_features, 1))
        if audio_features.shape[1] < 32:
            # Pad the sequence if it's shorter than expected
            pad_width = 32 - audio_features.shape[1]
            audio_features = np.pad(audio_features, ((0, 0), (0, pad_width), (0, 0)), mode='constant')
        elif audio_features.shape[1] > 32:
            # Truncate the sequence if it's longer than expected
            audio_features = audio_features[:, :32, :]
        
        # Yield the audio features and label
        yield audio_features, label

def convert_to_tf_dataset(dataset):
    # Create a TensorFlow Dataset from the generator
    tf_dataset = tf.data.Dataset.from_generator(
        lambda: audio_generator(dataset), 
        output_signature=(
            tf.TensorSpec(shape=(128, 32, 1), dtype=tf.float32),  # The expected shape of the audio data
            tf.TensorSpec(shape=(), dtype=tf.int64)  # Adjust dtype according to your label type (e.g., tf.int64 for class labels)
        )
    )
    return tf_dataset


# Convert the train, validation, and test datasets
train_tf_dataset = convert_to_tf_dataset(train_dataset)
validation_tf_dataset = convert_to_tf_dataset(validation_dataset)
test_tf_dataset = convert_to_tf_dataset(test_dataset)


In [12]:
for sample in test_tf_dataset.take(1):
    print(sample)

(<tf.Tensor: shape=(128, 32, 1), dtype=float32, numpy=
array([[[-70.249626],
        [-70.249626],
        [-70.249626],
        ...,
        [-70.249626],
        [-70.249626],
        [-70.249626]],

       [[-70.249626],
        [-70.249626],
        [-70.249626],
        ...,
        [-69.6542  ],
        [-67.13712 ],
        [-66.26893 ]],

       [[-67.78666 ],
        [-64.45067 ],
        [-63.874172],
        ...,
        [-64.14173 ],
        [-64.32198 ],
        [-62.54308 ]],

       ...,

       [[-70.249626],
        [-70.249626],
        [-70.249626],
        ...,
        [-70.249626],
        [-70.249626],
        [-70.249626]],

       [[-70.249626],
        [-70.249626],
        [-70.249626],
        ...,
        [-70.249626],
        [-70.249626],
        [-70.249626]],

       [[-70.249626],
        [-70.249626],
        [-70.249626],
        ...,
        [-70.249626],
        [-70.249626],
        [-70.249626]]], dtype=float32)>, <tf.Tensor: shape=(), dtype=int64

2025-03-17 14:07:12.516871: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [13]:
print(model.input_shape)

(None, 128, 32, 1)


In [14]:
BATCH_SIZE = 128

train_tf_dataset = train_tf_dataset.batch(128).prefetch(tf.data.AUTOTUNE)
validation_tf_dataset = validation_tf_dataset.batch(128).prefetch(tf.data.AUTOTUNE)
test_tf_dataset = test_tf_dataset.batch(128).prefetch(tf.data.AUTOTUNE)

train_subset_size = train_subset.num_rows
validation_subset_size = validation_subset.num_rows

steps_per_epoch = train_subset_size // 128  # 128 is the batch size
validation_steps = validation_subset_size // 128


In [15]:
print("Fitting the model now!!!")

model.fit(
    train_tf_dataset,  
    epochs=10,
    steps_per_epoch=steps_per_epoch,  
    validation_data=validation_tf_dataset, 
    validation_steps=validation_steps,  
)

# Evaluate on the test dataset
test_loss, test_accuracy = model.evaluate(test_tf_dataset)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Fitting the model now!!!
Epoch 1/10


2025-03-17 14:07:12.780431: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.0353 - loss: 32.1573 - val_accuracy: 0.0250 - val_loss: 3.3999
Epoch 2/10
[1m 1/39[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 220ms/step - accuracy: 0.0513 - loss: 3.3960

2025-03-17 14:07:58.324940: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:07:58.324959: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16045564654368671250
2025-03-17 14:07:58.324963: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/Add_12/_34]]
2025-03-17 14:07:58.324968: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14322534746514809295
2025-03-17 14:07:58.324971: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9207247282923676287
2025-03-17 14:07:58.324975: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15731468

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 147ms/step - accuracy: 0.0513 - loss: 3.3960 - val_accuracy: 0.0250 - val_loss: 3.3998
Epoch 3/10
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - accuracy: 0.0530 - loss: 3.3995 - val_accuracy: 0.0609 - val_loss: 4.1865
Epoch 4/10
[1m 1/39[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 111ms/step - accuracy: 0.0598 - loss: 4.0642

2025-03-17 14:08:50.308300: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:08:50.308318: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/Add_14/_36]]
2025-03-17 14:08:50.308335: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15621652735144034728
2025-03-17 14:08:50.308347: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15989270496028941490
2025-03-17 14:08:50.308355: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16045564654368671250
2025-03-17 14:08:50.308360: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 1432253

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 135ms/step - accuracy: 0.0598 - loss: 4.0642 - val_accuracy: 0.0375 - val_loss: 4.9048
Epoch 5/10
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - accuracy: 0.0725 - loss: 5.6590 - val_accuracy: 0.1266 - val_loss: 3.2999
Epoch 6/10
[1m 1/39[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 202ms/step - accuracy: 0.1282 - loss: 3.2315

2025-03-17 14:09:42.229779: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:09:42.229793: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_22]]
2025-03-17 14:09:42.229818: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 278702807721696980
2025-03-17 14:09:42.229845: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 18403876145751363687
2025-03-17 14:09:42.229855: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14322534746514809295
2025-03-17 14:09:42.229860: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 10755408142139655893
2025-03-

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 136ms/step - accuracy: 0.1282 - loss: 3.2315 - val_accuracy: 0.1281 - val_loss: 3.2962
Epoch 7/10
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 1s/step - accuracy: 0.1376 - loss: 3.4066 - val_accuracy: 0.1766 - val_loss: 4.2478
Epoch 8/10
[1m 1/39[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 115ms/step - accuracy: 0.1453 - loss: 3.9368

2025-03-17 14:10:33.520082: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:10:33.520096: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/Add_12/_34]]
2025-03-17 14:10:33.520107: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15989270496028941490
2025-03-17 14:10:33.520115: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16045564654368671250
2025-03-17 14:10:33.520121: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 278702807721696980
2025-03-17 14:10:33.520127: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 184038761

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 129ms/step - accuracy: 0.1453 - loss: 3.9368 - val_accuracy: 0.1500 - val_loss: 4.2072
Epoch 9/10
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 1s/step - accuracy: 0.1749 - loss: 3.7493 - val_accuracy: 0.0406 - val_loss: 15.1059
Epoch 10/10
[1m 1/39[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 100ms/step - accuracy: 0.0684 - loss: 16.4932

2025-03-17 14:11:21.689620: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:11:21.689636: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 15989270496028941490
2025-03-17 14:11:21.689640: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16045564654368671250
2025-03-17 14:11:21.689644: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 278702807721696980
2025-03-17 14:11:21.689648: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14322534746514809295
2025-03-17 14:11:21.689650: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/A

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 128ms/step - accuracy: 0.0684 - loss: 16.4932 - val_accuracy: 0.0422 - val_loss: 12.7958
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 676ms/step - accuracy: 0.0423 - loss: 12.9299
Test Loss: 12.984161376953125, Test Accuracy: 0.0357142873108387


2025-03-17 14:11:29.089448: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-17 14:11:29.089466: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_6]]
2025-03-17 14:11:29.089474: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7058018453906899510
2025-03-17 14:11:29.089479: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8994084161323031664
2025-03-17 14:11:29.089492: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16305376571388013926
