In [1]:
import os
import pickle
import librosa
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# configuration
DATA_DIR = "data_split"
SAMPLE_RATE = 16000
EMBEDDING_SIZE = 1024  # YAMNet output size

In [3]:
# load YAMNet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [4]:
# SUPPORT FUNCTIONS
# feature extraction using YAMNet
def extract_yamnet_embedding(file_path):
    waveform, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    waveform = waveform.astype(np.float32)
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return np.mean(embeddings.numpy(), axis=0)  # Shape: (1024,)

# load data
def load_dataset(data_dir):
    X, y = [], []
    for label in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, label)
        if not os.path.isdir(class_dir):
            continue
        for file in os.listdir(class_dir):
            if not file.endswith(".wav"):
                continue
            try:
                file_path = os.path.join(class_dir, file)
                features = extract_yamnet_embedding(file_path)
                X.append(features)
                y.append(label)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return np.array(X), np.array(y)

In [5]:
# DATA PREPARATION
# Load dataset
X, y = load_dataset(DATA_DIR)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [33]:
## MODEL DEFINITION
# Define Classifier Model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(EMBEDDING_SIZE,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define EarlyStopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [34]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                    epochs=50, batch_size=32, verbose=1, callbacks=[early_stop])

Epoch 1/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5884 - loss: 0.8533 - val_accuracy: 0.7000 - val_loss: 0.5678
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6955 - loss: 0.5871 - val_accuracy: 0.7417 - val_loss: 0.5160
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7807 - loss: 0.5008 - val_accuracy: 0.7333 - val_loss: 0.4744
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7622 - loss: 0.4819 - val_accuracy: 0.7750 - val_loss: 0.4353
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8111 - loss: 0.3852 - val_accuracy: 0.8500 - val_loss: 0.3706
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8300 - loss: 0.3853 - val_accuracy: 0.7917 - val_loss: 0.4230
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━━

In [6]:
# EVALUATION
# load the model
model = tf.keras.models.load_model('model/yamnet_faucet_model.h5')
# load label encoder
with open('model/yamnet_label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Classification Report
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

print(classification_report(y_true_labels, y_pred_labels, target_names=label_encoder.classes_))

# Confusion Matrix
cm = confusion_matrix(y_true_labels, y_pred_labels)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')

# Save the plot instead of showing it
os.makedirs('model', exist_ok=True)
plt.savefig('model/confusion_matrix.png', bbox_inches='tight')
plt.close()  # Close the figure to free up memory

# Normalized Confusion Matrix
cm_normalized = cm.astype('float') / cm.sum(axis=0)[np.newaxis, :]
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Normalized Confusion Matrix')
plt.savefig('model/normalized_confusion_matrix.png', bbox_inches='tight')
plt.close()  # Close the figure to free up memory




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
              precision    recall  f1-score   support

      faucet       0.87      0.77      0.82        52
       noise       0.84      0.91      0.88        69

    accuracy                           0.85       121
   macro avg       0.85      0.84      0.85       121
weighted avg       0.85      0.85      0.85       121



In [36]:
# save the model and label encoder
model.save("model/yamnet_faucet_model.h5")
with open("model/yamnet_label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)


