In [1]:
import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from IPython.display import Audio
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')


2025-04-29 21:09:46.867811: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745960987.092778      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745960987.153046      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Paths
DATA_PATH = '/kaggle/input/urbansound8k/'
METADATA_PATH = '/kaggle/input/urbansound8k/UrbanSound8K.csv'

# Metadata loading
metadata = pd.read_csv(METADATA_PATH)


In [3]:


def find_file(file_name, base_dir='/kaggle/input/urbansound8k/'):
    for folder in range(1, 11):
        file_path = os.path.join(base_dir, f"fold{folder}", file_name)
        if os.path.exists(file_path):
            return file_path
    return None



In [4]:


samples_to_consider = 22050 * 4



In [5]:
def extract_features(file_path, fixed_length=168):
    """Extract Mel Spectrogram with a consistent shape (168, 168)."""
    try:
        audio, sr = librosa.load(file_path, sr=22050, mono=True)
        n_fft = min(2048, len(audio))  # Ensure valid n_fft

        # **Extract Log-Mel Spectrogram**
        mel_spectrogram = librosa.feature.melspectrogram(
            y=audio, sr=sr, n_fft=n_fft, n_mels=168, fmax=8000
        )
        mel_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

        # **Fix shape to (168, 168)**
        if mel_db.shape[1] > fixed_length:
            mel_db = mel_db[:, :fixed_length]  # Trim
        else:
            mel_db = np.pad(mel_db, ((0, 0), (0, fixed_length - mel_db.shape[1])), mode='constant')  # Pad

        return mel_db  # Return spectrogram only

    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return None  # Handle errors gracefully

In [6]:
X, y = [], []
for index, row in tqdm(metadata.iterrows(), total=len(metadata)):
    file_path = find_file(row['slice_file_name'], DATA_PATH)
    if file_path:
        features = extract_features(file_path)
        if features is not None:
            X.append(features)
            y.append(row['classID'])

X = np.array(X, dtype=np.float32)
X = np.expand_dims(X, axis=-1)
y = to_categorical(y, num_classes=len(np.unique(y)))
X = (X - np.mean(X)) / np.std(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


100%|██████████| 8732/8732 [06:48<00:00, 21.37it/s]


In [7]:
# AlexNet architecture
inputs = layers.Input(shape=(168, 168, 1))
x = layers.Conv2D(96, (11, 11), strides=4, activation='relu')(inputs)
x = layers.MaxPooling2D((3, 3), strides=2)(x)
x = layers.Conv2D(256, (5, 5), padding='same', activation='relu')(x)
x = layers.MaxPooling2D((3, 3), strides=2)(x)
x = layers.Conv2D(384, (3, 3), padding='same', activation='relu')(x)
x = layers.Conv2D(384, (3, 3), padding='same', activation='relu')(x)
x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
x = layers.MaxPooling2D((3, 3), strides=2)(x)
x = layers.Flatten()(x)
x = layers.Dense(4096, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(4096, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(y.shape[1], activation='softmax')(x)

model = models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

2025-04-29 21:19:38.300220: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [8]:
# Predict
prediction = model.predict(X)
predicted_class = np.argmax(prediction)

[1m273/273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 292ms/step


In [9]:
predicted_class

62716

In [None]:


# ✅ Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)

# ✅ Train Model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr])



Epoch 1/30
[1m175/219[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m1:01[0m 1s/step - accuracy: 0.2093 - loss: 2.2527

In [None]:


# Evaluate on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc*100:.2f}%")
print(f"Test Loss: {test_loss*100:.2f}%")

# Access training and validation accuracy from the history object
train_acc = history.history['accuracy'][-1]  # Last epoch training accuracy
val_acc = history.history['val_accuracy'][-1]  # Last epoch validation accuracy

print(f"Train Accuracy: {train_acc*100:.2f}%")
print(f"Validation Accuracy: {val_acc*100:.2f}%")



In [None]:
# Plot Training History
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title("Accuracy Over Epochs")

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Loss Over Epochs")

plt.show()

In [None]:


from sklearn.metrics import confusion_matrix

# Predict class probabilities
y_pred_probs = model.predict(X_test)  # Ensure X_test has the correct shape

# Convert probabilities to class indices
y_pred_classes = np.argmax(y_pred_probs, axis=1)  # ✅ Convert to predicted class labels
y_test_classes = np.argmax(y_test, axis=1)        # ✅ Convert y_test from one-hot to labels

# Compute confusion matrix
conf_matrix = confusion_matrix(y_test_classes, y_pred_classes)

print("Confusion Matrix:")
print(conf_matrix)



In [None]:


import seaborn as sns
import matplotlib.pyplot as plt

# Define class names if available
class_names = ["Class 0", "Class 1", "Class 2", "..."]  # Update with actual class names

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix Heatmap")
plt.show()



In [None]:
model.save(r"/kaggle/working/model/Urban_Sound8k_Alex Net.keras")

In [None]:
model.save(r"/kaggle/working/model/Urban_Sound8k_Alex Net.h5")

In [None]:
from sklearn.metrics import classification_report
import numpy as np
# Make predictions
y_pred = model.predict(X_test)

# Convert predictions to label format
if y_pred.ndim > 1 and y_pred.shape[1] > 1:
    y_pred_labels = np.argmax(y_pred, axis=1)
else:
    y_pred_labels = (y_pred > 0.5).astype(int)


# Convert one-hot encoded y_test to label format
if y_test.ndim > 1 and y_test.shape[1] > 1:
    y_test_labels = np.argmax(y_test, axis=1)
else:
    y_test_labels = y_test

# Convert predictions to label format
if y_pred.ndim > 1 and y_pred.shape[1] > 1:
    y_pred_labels = np.argmax(y_pred, axis=1)
else:
    y_pred_labels = y_pred

# Print classification report
report = classification_report(y_test_labels, y_pred_labels)
print("Classification Report:\n", report)

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt

# Binarize the labels for multi-class
lb = LabelBinarizer()
y_test_bin = lb.fit_transform(y_test)

# Get the predicted probabilities for each class
y_pred_prob = model.predict(X_test)

# Plot ROC curve for each class
plt.figure(figsize=(10, 8))

for i in range(y_test_bin.shape[1]):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_pred_prob[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {roc_auc:.2f})')

# Plot the diagonal (random classifier)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')

plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.show()