# Bird Sound Recognition with Tensorflow and MobileNet

The first thing you’d want to do is prepare your environment by importing relevant libraries and dependencies.



In [20]:
import numpy as np
import tensorflow as tf
import itertools
import os 
import shutil
import random
import pathlib
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

print(f"TensorFlow version {tf.__version__}")

TensorFlow version 2.15.0


# Prepare dataset

In [21]:
data_dir  = 'AY/step5-stft-resize-C/train'
test_dir  = 'AY/step5-stft-resize-C/test'
#data_dir  = 'AY/step8-mfcc-resize-C/train'
#test_dir  = 'AY/step8-mfcc-resize-C/test'

Split train dataset into train and val

In [22]:
BATCH_SIZE = 32

train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
    directory=data_dir,
    validation_split=0.2,
    subset="both",
    seed=seed,
    image_size=(224, 224),  # Adjust this to your desired image size
    batch_size=BATCH_SIZE  # Adjust batch size as needed
)

test_ds = val_ds.shard(num_shards=2, index=0)
val_ds = val_ds.shard(num_shards=2, index=1)

label_names = np.array(train_ds.class_names)

label_counts = {}

for images, labels in train_ds:
    for label in labels:
        label_name = label_names[label]
        label_counts[label_name] = label_counts.get(label_name, 0) + 1

for label_name, count in label_counts.items():
    print(f"Label '{label_name}': {count} samples")
#train_ds.element_spec

NotFoundError: Could not find directory AY/step5-stft-resize-C/train

Take one batch and verify that your dataset is structured correctly and that the images and labels are the shape you expect

In [None]:
# get a few examples
for example_images, example_labels in train_ds.take(1):  
  print(example_images.shape)
  print(example_labels.shape)

Show a few images

In [None]:
plt.figure(figsize=(10,4))
rows = 2
cols = 5
for images, labels in train_ds.take(1):
    for i in range(10):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(label_names[labels[i]])
        plt.axis("off")

plt.tight_layout()
plt.show()

# Build and modify the ResNet50 model


In [None]:
NUM_CLASSES = 10
IMG_SIZE = (224, 224)

#Load the ResNet50 model, excluding the top layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
output = Dense(NUM_CLASSES, activation='softmax')(x)

# Create the final model
new_model = Model(inputs=base_model.input, outputs=output)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
new_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])


In [None]:
# train model
import time

EPOCHS = 10

start = time.monotonic()

history = new_model.fit(
                    train_ds,
                    epochs=EPOCHS,
                    validation_data=val_ds,
                    callbacks = [tf.keras.callbacks.EarlyStopping(verbose=1, patience=2, mode='min')]
)

training_time = time.monotonic()-start
print(f"Total training time: {training_time:.2f} seconds")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

metrics = history.history
plt.figure(figsize=(16,6))

# Loss plot
plt.subplot(1,2,1)
plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch')
plt.ylabel('Loss [SparseCategoricalCrossentropy]')

# Accuracy plot
plt.subplot(1,2,2)
plt.plot(history.epoch, 100*np.array(metrics['sparse_categorical_accuracy']), 
         100*np.array(metrics['val_sparse_categorical_accuracy']))
plt.legend(['sparse_categorical_accuracy', 'val_sparse_categorical_accuracy'])
plt.ylim([0, 100])
plt.xlabel('Epoch')
plt.ylabel('Sparse Categorical Accuracy [%]')

plt.tight_layout()
plt.show()

Evaluate the model performance

In [None]:
# Evaluate the model
results = new_model.evaluate(test_ds, return_dict=True)
print("Test Sparse Categorical Accuracy:", results['sparse_categorical_accuracy'])

In [None]:
# Make predictions
y_pred = new_model.predict(test_ds)
y_pred_classes = tf.argmax(y_pred, axis=1)

# Get true labels
y_true = tf.concat([y for x, y in test_ds], axis=0)

## Compute confusion matrix


In [None]:
import seaborn as sns

confusion_mtx = tf.math.confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_names, yticklabels=label_names,)
plt.xlabel('Predicted')
plt.ylabel('Label')
plt.savefig('stft_predictions_resnet.png')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred_classes, target_names=label_names))


In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
    directory=test_dir,
#    seed=seed,
    image_size=(224, 224),  # Adjust this to your desired image size
#    batch_size=BATCH_SIZE  # Adjust batch size as needed
)

new_model.evaluate(test_ds, return_dict=True)
#my_pred = new_model.predict(test_ds)
my_pred = new_model.predict(test_ds, steps=len(test_ds))
my_pred = tf.argmax(my_pred, axis=1)
my_true = tf.concat(list(test_ds.map(lambda s,lab: lab)), axis=0)

In [None]:
unique, counts = np.unique(my_true, return_counts=True)
print(dict(zip(unique, counts)))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(my_true, my_pred, target_names=label_names))