In [63]:
import os
import pickle
import librosa
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping

In [64]:
# configuration
DATA_DIR = "data_split"
SAMPLE_RATE = 16000
EMBEDDING_SIZE = 1024  # YAMNet output size

In [65]:
# load YAMNet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [66]:
# SUPPORT FUNCTIONS
# feature extraction using YAMNet
def extract_yamnet_embedding(file_path):
    waveform, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    waveform = waveform.astype(np.float32)
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return np.mean(embeddings.numpy(), axis=0)  # Shape: (1024,)

# load data
def load_dataset(data_dir):
    X, y = [], []
    for label in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, label)
        if not os.path.isdir(class_dir):
            continue
        for file in os.listdir(class_dir):
            if not file.endswith(".wav"):
                continue
            try:
                file_path = os.path.join(class_dir, file)
                features = extract_yamnet_embedding(file_path)
                X.append(features)
                y.append(label)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return np.array(X), np.array(y)

In [67]:
# DATA PREPARATION
# Load dataset
X, y = load_dataset(DATA_DIR)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [68]:
## MODEL DEFINITION
# Define Classifier Model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(EMBEDDING_SIZE,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define EarlyStopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [69]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                    epochs=50, batch_size=32, verbose=1, callbacks=[early_stop])

Epoch 1/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7004 - loss: 0.5926 - val_accuracy: 0.8500 - val_loss: 0.4311
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8006 - loss: 0.4749 - val_accuracy: 0.8500 - val_loss: 0.3390
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8503 - loss: 0.3584 - val_accuracy: 0.8500 - val_loss: 0.3158
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9063 - loss: 0.2692 - val_accuracy: 0.8667 - val_loss: 0.3114
Epoch 5/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9166 - loss: 0.2251 - val_accuracy: 0.8833 - val_loss: 0.2729
Epoch 6/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9458 - loss: 0.2279 - val_accuracy: 0.8500 - val_loss: 0.3181
Epoch 7/50
[1m15/15[0m [32m━━━━━━━━━━

In [70]:
# EVALUATION
# Classification Report
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

print(classification_report(y_true_labels, y_pred_labels, target_names=label_encoder.classes_))

# Confusion Matrix
cm = confusion_matrix(y_true_labels, y_pred_labels)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')

# Save the plot instead of showing it
os.makedirs('model', exist_ok=True)
plt.savefig('model/confusion_matrix.png', bbox_inches='tight')
plt.close()  # Close the figure to free up memory

# Normalized Confusion Matrix
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Normalized Confusion Matrix')
plt.savefig('model/normalized_confusion_matrix.png', bbox_inches='tight')
plt.close()  # Close the figure to free up memory


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
              precision    recall  f1-score   support

      faucet       0.93      0.84      0.88        31
       noise       0.84      0.93      0.89        29

    accuracy                           0.88        60
   macro avg       0.89      0.88      0.88        60
weighted avg       0.89      0.88      0.88        60



In [71]:
# save the model and label encoder
model.save("model/yamnet_faucet_model.h5")
with open("model/yamnet_label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)


