In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D

# Preprocessing the Data Set

In [16]:
def load_data(data_path, metadata_path):
  features = []
  labels = []

  metadata = pd.read_csv(metadata_path)


  for index, row in metadata.iterrows():
    file_path = os.path.join(data_path, f"fold{row['fold']}", f"{row['slice_file_name']}")

    # Load the audio file and resample it
    target_sr = 22050
    try:
      audio, sample_rate = librosa.load(file_path, sr = target_sr)

      # Extract MFCC features
      mfccs = librosa.feature.mfcc(y=audio, sr=target_sr, n_mfcc = 40)
      mfccs_scaled = np.mean(mfccs.T, axis = 0)

      # Append features and labels
      features.append(mfccs_scaled)
      labels.append(row['class'])

    except:
      pass

  print('All features extracted.')

  return np.array(features), np.array(labels)

# Import UrbanSound 8k Data Set

In [8]:
print(os.getcwd())

/content


In [17]:
data_path = "/content/UrbanSound8K"
metadata_path = "/content/UrbanSound8K/UrbanSound8KFold8.csv"


features, labels = load_data(data_path, metadata_path)

# Encode labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_onehot = to_categorical(labels_encoded)

  audio, sample_rate = librosa.load(file_path, sr = target_sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  audio, sample_rate = librosa.load(file_path, sr = target_sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [19]:
print(len(features))
print(len(labels))

774
774


In [21]:
print(np.unique(labels))

['air_conditioner' 'car_horn' 'children_playing' 'dog_bark' 'drilling'
 'engine_idling' 'gun_shot' 'jackhammer' 'siren' 'street_music']


In [25]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_onehot, test_size = 0.2, random_state = 42, stratify = labels_onehot)

In [26]:
input_shape = (X_train.shape[1], 1)

In [30]:
# Build 1D CNN model
model = Sequential()
model.add(Conv1D(64, 3, padding = 'same', activation = 'relu', input_shape = input_shape))
model.add(MaxPooling1D(pool_size = 2))
model.add(Dropout(0.25))
model.add(Conv1D(128, 3, padding = 'same', activation = 'relu'))
model.add(MaxPooling1D(pool_size = 2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(le.classes_), activation = 'softmax'))

In [31]:
# Compile the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [32]:
# Reshape the data to fit the input shape of the model
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [33]:
print(X_train.shape)
print(X_test.shape)

(619, 40, 1)
(155, 40, 1)


In [34]:
def make_predictions(model, le, file_path):
    audio, sample_rate = librosa.load(file_path, sr=22050)
    mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    features = mfccs_scaled.reshape(1, mfccs_scaled.shape[0], 1)
    predicted_vector = model.predict(features)
    predicted_class_index = np.argmax(predicted_vector, axis=-1)
    return le.inverse_transform(predicted_class_index)[0]

In [37]:
# Save the initial weights
initial_weights = model.get_weights()

# Dictionary to store old predictions
old_predictions = {}


# List of test files and their true labels
test_files = [
    ("/content/UrbanSound8K/fold8/113202-5-0-2.wav", "engine_idling"),
    ("/content/UrbanSound8K/fold8/155313-3-0-0.wav", "dog_bark"),
    ("/content/UrbanSound8K/fold8/162433-6-1-0.wav", "gun_shot"),
    ("/content/UrbanSound8K/fold8/36429-2-0-7.wav", "children_playing"),
    ("/content/UrbanSound8K/fold8/39967-9-0-56.wav", "street_music")
]


# Make predictions before training
for file_path, true_label in test_files:
    predicted_label_before = make_predictions(model, le, file_path)
    old_predictions[file_path] = predicted_label_before



In [40]:
batch_size = 32
epochs = 10
model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs, validation_data=(X_test, y_test), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e6982a767a0>

In [44]:
new_predictions = {}
# Make predictions after training
for file_path, true_label in test_files:
  predicted_label_after = make_predictions(model, le, file_path)
  new_predictions[file_path] = predicted_label_after



In [55]:
df = pd.DataFrame(columns = ['Filename', 'True Label', 'Old Prediction', 'New Prediction'])

In [56]:
file_paths_list = []
true_labels_list = []

for file_path, true_label in test_files:
  file_paths_list.append(file_path)
  true_labels_list.append(true_label)

df['Filename'] = file_paths_list
df['True Label'] = true_labels_list

In [74]:
row_count = 0

for file in df['Filename']:
  df.at[row_count, 'Old Prediction'] = old_predictions[file]
  row_count += 1

In [75]:
row_count = 0

for file in df['Filename']:
  df.at[row_count, 'New Prediction'] = new_predictions[file]
  row_count += 1

In [76]:
display(df)

Unnamed: 0,Filename,True Label,Old Prediction,New Prediction
0,/content/UrbanSound8K/fold8/113202-5-0-2.wav,engine_idling,drilling,engine_idling
1,/content/UrbanSound8K/fold8/155313-3-0-0.wav,dog_bark,jackhammer,dog_bark
2,/content/UrbanSound8K/fold8/162433-6-1-0.wav,gun_shot,drilling,gun_shot
3,/content/UrbanSound8K/fold8/36429-2-0-7.wav,children_playing,jackhammer,children_playing
4,/content/UrbanSound8K/fold8/39967-9-0-56.wav,street_music,drilling,street_music
