**Import the necessary libraries**

In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

**Set the seed value for experiment reproducibility.**

In [3]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

**Download and extract the dataset from Tensorflow's Mini Speech Commands dataset**

In [4]:
DATASET_PATH = '/content/mini_speech_commands'
#Since im using colab for this, the path is set like this,
#feel free to change the path if you're running on your own PC.
if not os.path.exists(DATASET_PATH):
    tf.keras.utils.get_file(
        'mini_speech_commands.zip',
        origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
        extract=True,
        cache_dir='/content/',
        cache_subdir='.')

In [5]:
commands = os.listdir(DATASET_PATH)
commands = [cmd for cmd in commands if os.path.isdir(os.path.join(DATASET_PATH, cmd))]

**Preprocess the audio data**

In [6]:
def preprocess_audio(audio, labels):
    audio = tf.squeeze(audio, axis=-1)
    return audio, labels

**Load and preprocess the training and validation datasets**

In [7]:
train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
    directory=DATASET_PATH,
    batch_size=64,
    validation_split=0.2,
    seed=0,
    output_sequence_length=16000,
    subset='both')

train_ds = train_ds.map(preprocess_audio, tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess_audio, tf.data.AUTOTUNE)

Found 8000 files belonging to 8 classes.
Using 6400 files for training.
Using 1600 files for validation.


**Now time to define the model architecture**

*   We want the inputs in shapes of 16000 items.
*   Activation is set to 'relu'



In [8]:
input_shape = (16000,)
num_labels = len(commands)

model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Reshape((16000, 1)),
    layers.Conv1D(32, 3, activation='relu'),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels),
])

**Compiling and then training the model.**

In [9]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

In [10]:
EPOCHS = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: early stopping


**Evaluate the model**

*   Finally we are going to evaluate our created model and see the test results.
*   This includes the Loss and Accuracy of our model.





In [11]:
test_ds = val_ds.take(1)
test_results = model.evaluate(test_ds, return_dict=True)
print("Test Loss:", test_results['loss'])
print("Test Accuracy:", test_results['accuracy'])

Test Loss: 1.8547708988189697
Test Accuracy: 0.34375


**Export the model**

Now we can export our created model and use it elsewhere.

In [12]:
model.save("/content/sound_classification_model")