In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Define the path to the dataset
DATASET_PATH = '/content/mini_speech_commands'

# Download the dataset
if not os.path.exists(DATASET_PATH):
    tf.keras.utils.get_file(
        'mini_speech_commands.zip',
        origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
        extract=True,
        cache_dir='/content/',
        cache_subdir='.')

# Define the commands
commands = os.listdir(DATASET_PATH)
commands = [cmd for cmd in commands if os.path.isdir(os.path.join(DATASET_PATH, cmd))]

# Function to preprocess the audio data
def preprocess_audio(audio, labels):
    audio = tf.squeeze(audio, axis=-1)
    return audio, labels

# Load and preprocess the training and validation datasets
train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
    directory=DATASET_PATH,
    batch_size=64,
    validation_split=0.2,
    seed=0,
    output_sequence_length=16000,
    subset='both')

train_ds = train_ds.map(preprocess_audio, tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess_audio, tf.data.AUTOTUNE)

# Define the model architecture
input_shape = (16000,)
num_labels = len(commands)

model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Reshape((16000, 1)),
    layers.Conv1D(32, 3, activation='relu'),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels),
])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

# Train the model
EPOCHS = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
)

# Evaluate the model
test_ds = val_ds.take(1)
test_results = model.evaluate(test_ds, return_dict=True)
print("Test Loss:", test_results['loss'])
print("Test Accuracy:", test_results['accuracy'])

# Save the model
model.save("/content/sound_classification_model")

print("Model training and evaluation completed.")


Found 8000 files belonging to 8 classes.
Using 6400 files for training.
Using 1600 files for validation.
Epoch 1/10