In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from keras.layers import Activation
from keras.layers import Conv1D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import MaxPooling1D
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


2024-04-06 16:30:08.826675: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-06 16:30:08.826852: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-06 16:30:09.020925: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
def load_data(data_path, metadata_path):
    features = []
    labels = []
    metadata = pd.read_csv(metadata_path)

    for index, row in metadata.iterrows():
#         file_path = os.path.join(data_path, f'fold{row['fold']}', f'{row['slice_file_name']}')
        file_path = os.path.join(data_path, 'fold{}'.format(row['fold']), '{}'.format(row['slice_file_name']))
        target_sr = 22050
        audio, sample_rate = librosa.load(file_path, sr=target_sr)
        mfccs = librosa.feature.mfcc(y=audio, sr=target_sr, n_mfcc=40)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        features.append(mfccs_scaled)
        labels.append(row['class'])
    return np.array(features), np.array(labels)

In [3]:
data_path = '/kaggle/input/urbansound8k'
metadata_path = '/kaggle/input/urbansound8k/UrbanSound8K.csv'
features, labels = load_data(data_path, metadata_path)

# Encode labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_onehot = to_categorical(labels_encoded)




In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_onehot, test_size=0.2, random_state=42, stratify=labels_onehot)


In [5]:
input_shape = (X_train.shape[1], 1)
model = Sequential()
model.add(Conv1D(64, 3, padding='same', activation='relu', input_shape=input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Conv1D(128, 3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(le.classes_), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  super().__init__(


In [6]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
print()




In [7]:
def make_predictions(model, le, file_path):
    audio, sample_rate = librosa.load(file_path, sr=22050)
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    features = mfccs_scaled.reshape(1, mfccs_scaled.shape[0], 1)
    predicted_vector = model.predict(features)
    predicted_class_index = np.argmax(predicted_vector, axis=-1)
    return le.inverse_transform(predicted_class_index)[0]


In [8]:
# Save the initial weights
initial_weights = model.get_weights()


# Dictionary to store old predictions
old_predictions = {}


# List of test files and their true labels
test_files = [
    ('/kaggle/input/urbansound8k/fold1/101415-3-0-2.wav', 'Dog bark'),
    ('/kaggle/input/urbansound8k/fold1/101415-3-0-3.wav', 'Dog bark'),
    ('/kaggle/input/urbansound8k/fold1/102305-6-0-0.wav', 'Gun shots'),
    ('/kaggle/input/urbansound8k/fold1/103074-7-0-2.wav', 'Jack hammer'),
    ('/kaggle/input/urbansound8k/fold1/103074-7-4-3.wav', 'Jack hammer')
]


# Make predictions before training
for file_path, true_label in test_files:
    predicted_label_before = make_predictions(model, le, file_path)
    old_predictions[file_path] = predicted_label_before


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


In [9]:
import arrow

time_start = arrow.now()
history = model.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=0, callbacks=None)
print('{} done training'.format(arrow.now() - time_start))

0:02:31.622663 done training


In [10]:
new_predictions = {}

# List of test files and their true labels
test_files = [
    ('/kaggle/input/urbansound8k/fold1/101415-3-0-2.wav', 'Dog bark'),
    ('/kaggle/input/urbansound8k/fold1/101415-3-0-3.wav', 'Dog bark'),
    ('/kaggle/input/urbansound8k/fold1/102305-6-0-0.wav', 'Gun shots'),
    ('/kaggle/input/urbansound8k/fold1/103074-7-0-2.wav', 'Jack hammer'),
    ('/kaggle/input/urbansound8k/fold1/103074-7-4-3.wav', 'Jack hammer')
]


# Make predictions before training
for file_path, true_label in test_files:
    new_predictions[file_path] = make_predictions(model, le, file_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


In [11]:
print(old_predictions)

{'/kaggle/input/urbansound8k/fold1/101415-3-0-2.wav': 'children_playing', '/kaggle/input/urbansound8k/fold1/101415-3-0-3.wav': 'children_playing', '/kaggle/input/urbansound8k/fold1/102305-6-0-0.wav': 'children_playing', '/kaggle/input/urbansound8k/fold1/103074-7-0-2.wav': 'children_playing', '/kaggle/input/urbansound8k/fold1/103074-7-4-3.wav': 'children_playing'}


In [12]:
print(new_predictions)

{'/kaggle/input/urbansound8k/fold1/101415-3-0-2.wav': 'dog_bark', '/kaggle/input/urbansound8k/fold1/101415-3-0-3.wav': 'dog_bark', '/kaggle/input/urbansound8k/fold1/102305-6-0-0.wav': 'gun_shot', '/kaggle/input/urbansound8k/fold1/103074-7-0-2.wav': 'jackhammer', '/kaggle/input/urbansound8k/fold1/103074-7-4-3.wav': 'jackhammer'}
