# Import required libraries

In [1]:
import os
import librosa
from tqdm import tqdm
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D
from keras.models import Sequential
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import BatchNormalization

# Feature extraction from audio data

In [18]:
num_folders = 10
all_audios_features_array = []
class_IDs = []
base_folder_path = r"C:\Users\icham\OneDrive\Desktop\dataset\urbansoundclasification\UrbanSound8K\UrbanSound8K\audio"
n_frames = 100  # Choose a fixed number of frames

for i in tqdm(range(1, num_folders + 1)):
    audio_folder_path = base_folder_path + "\\fold" + str(i)
    audio_files = [f for f in os.listdir(audio_folder_path) if os.path.isfile(os.path.join(audio_folder_path, f))]
    folder_audio_feature_arr = []  # Separate array for each folder
    class_id = []

    print(f"Processing folder {i}...")

    for filename in audio_files:
        file_path = os.path.join(audio_folder_path, filename)
        y, sr = librosa.load(file_path)

        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        # Pad or truncate along the time axis (columns)
        mfccs = np.pad(mfccs, ((0, 0), (0, max(0, n_frames - mfccs.shape[1]))), mode='constant')

        folder_audio_feature_arr.append(mfccs)
        class_id.append(int(filename.split('-')[1]))  # Assuming the class ID is encoded in the file name

    # Pad or truncate along the time axis (columns) for all files in the folder
    max_frames = max(arr.shape[1] for arr in folder_audio_feature_arr)
    folder_audio_feature_arr = np.array([np.pad(arr, ((0, 0), (0, max_frames - arr.shape[1])), mode='constant') for arr in folder_audio_feature_arr])

    class_id = np.array(class_id)
    class_IDs.append(class_id)
    
    all_audios_features_array.append(folder_audio_feature_arr)
    print(f"Finished processing folder {i}")

# Convert the outer list to a list of NumPy arrays
all_audios_features_array = np.array(all_audios_features_array, dtype=object)

# Print information about the generated arrays
print("Number of folders processed:", len(all_audios_features_array))
print("Shape of the first folder's feature array:", all_audios_features_array[0].shape)
print("Number of classes in each folder:", [len(np.unique(class_ids)) for class_ids in class_IDs])


  0%|                                                                                                                       | 0/10 [00:00<?, ?it/s]

Processing folder 1...


 10%|███████████                                                                                                    | 1/10 [00:13<01:58, 13.18s/it]

Finished processing folder 1
Processing folder 2...


 20%|██████████████████████▏                                                                                        | 2/10 [00:25<01:42, 12.84s/it]

Finished processing folder 2
Processing folder 3...


 30%|█████████████████████████████████▎                                                                             | 3/10 [00:38<01:30, 12.94s/it]

Finished processing folder 3
Processing folder 4...


 40%|████████████████████████████████████████████▍                                                                  | 4/10 [00:53<01:22, 13.68s/it]

Finished processing folder 4
Processing folder 5...


 50%|███████████████████████████████████████████████████████▌                                                       | 5/10 [01:07<01:08, 13.70s/it]

Finished processing folder 5
Processing folder 6...


 60%|██████████████████████████████████████████████████████████████████▌                                            | 6/10 [01:18<00:51, 12.84s/it]

Finished processing folder 6
Processing folder 7...


 70%|█████████████████████████████████████████████████████████████████████████████▋                                 | 7/10 [01:30<00:37, 12.64s/it]

Finished processing folder 7
Processing folder 8...


 80%|████████████████████████████████████████████████████████████████████████████████████████▊                      | 8/10 [01:42<00:24, 12.39s/it]

Finished processing folder 8
Processing folder 9...


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9/10 [01:54<00:12, 12.21s/it]

Finished processing folder 9
Processing folder 10...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 12.72s/it]

Finished processing folder 10
Number of folders processed: 10
Shape of the first folder's feature array: (873, 13, 173)
Number of classes in each folder: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10]





# one-hot encode

In [17]:
from tensorflow.keras.utils import to_categorical
class_IDs_one_hot = []
for folder in class_IDs:
    one_hot_encoded = to_categorical(folder,num_classes = 10)
    class_IDs_one_hot.append(one_hot_encoded)

In [18]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Build model

In [19]:
# Create the Sequential model
model = Sequential()

# Convolutional layers
model.add(Conv2D(filters=32, kernel_size=5, padding="same", activation='relu'))
model.add(MaxPooling2D(strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(filters=64, kernel_size=5, padding="same", activation='relu'))
model.add(MaxPooling2D(strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size=3, padding="same", activation='relu'))
model.add(MaxPooling2D(strides=(2, 2), padding="same"))
model.add(BatchNormalization())

# Flatten layer
model.add(Flatten())

# Dense layers
model.add(Dense(units=2048, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(rate=0.5))

model.add(Dense(units=10))

# compile it

In [20]:
model.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

#  Fit model

In [23]:
from keras.callbacks import EarlyStopping

def cnn(x_train, y_train, x_test, y_test):
    for i in range(len(x_train)): 
        original_shape_train = x_train[i].shape
        original_shape_test = x_test[0].shape
        x_train_data = np.reshape(x_train[i], (*original_shape_train, 1))
        y_train_data = y_train[i]
        x_test_data = np.reshape(x_test[0], (*original_shape_test, 1))
        y_test_data = y_test[0]
        
        # Define the EarlyStopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=3)  # Stop if validation loss does not improve for 3 epochs
        
        # Fit the model with EarlyStopping callback
        model.fit(x_train_data, y_train_data, epochs=10, batch_size=30, 
                  validation_data=(x_test_data, y_test_data), callbacks=[early_stopping])

Train on 873 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Train on 888 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Train on 925 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Train on 990 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Train on 936 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Train on 823 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Train on 838 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Train on 806 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Train on 816 samples, validate on 837 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


# Creating train & test

In [None]:
# in each iteration a new folder is assigned to testing and others are used for training

for i in range(num_folders):
    x_train = []
    y_train = []
    x_test = []
    y_test = []
    for j in range(num_folders):
           if (i == j):
                x_test.append(all_audios_features_array[i])
                y_test.append(class_IDs_one_hot[i])
           if  (i!=j):
             x_train.append(all_audios_features_array[j])
             y_train.append(class_IDs_one_hot[j])   
    cnn(x_train,y_train,x_test,y_test)         


# Evaluating model

In [24]:
def evaluate_model(model, x_test, y_test):
    """
    Evaluate the trained model on the test data.

    Args:
        model: The trained model.
        x_test: List of test features.
        y_test: List of true labels for the test data.

    Returns:
        float: Test loss.
        float: Test accuracy.
    """
    test_losses = []
    test_accuracies = []
    
    for i in range(len(x_test)):
        # Reshape the test data if necessary
        x_test_data = np.reshape(x_test[i], (*x_test[i].shape, 1))
        
        # Evaluate the model on the test data
        test_loss, test_accuracy = model.evaluate(x_test_data, y_test[i], verbose=0)
        
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
    
    avg_test_loss = np.mean(test_losses)
    avg_test_accuracy = np.mean(test_accuracies)
    
    return avg_test_loss, avg_test_accuracy


In [25]:
test_loss, test_accuracy = evaluate_model(model, x_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 1.5442957902324954
Test Accuracy: 0.89988035
