In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import pylab
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import librosa    
import glob
import tensorflow as tf
from keras.applications import VGG19, ResNet152, VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from tensorflow.keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import keras
import torchaudio
from skimage.transform import resize
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder

%matplotlib inline

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [3]:
### Define the path to  audio dataset
path = "/kaggle/input/environmental-sound-classification-50/audio/audio/44100/"

# Lists to store audio data and labels
audio_data = []
labels = []

# Iterate through audio files and extract features
for (root, dirs, files) in os.walk(path, topdown=True):
    for file in files:
        if file.endswith(".wav"):
            src = os.path.join(root, file)
            target = (file.split("-")[-1])[:-4]  # Extract target label from path
            audio, sr = librosa.load(src, sr=44100)
            mel_feat = librosa.feature.melspectrogram(y=audio, sr=sr)
            power = librosa.power_to_db(mel_feat)
            power_resized = resize(power, (224, 224))  # Resize to a consistent shape
            audio_data.append(np.stack([power_resized] * 3, axis=-1))  # Convert to 3 channels
            labels.append(target)

# Convert lists to numpy arrays
audio_data_array = np.array(audio_data)
labels_array = np.array(labels)

In [5]:
# Convert labels to categorical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels_array)
num_classes = len(label_encoder.classes_)
one_hot_labels = to_categorical(encoded_labels, num_classes=num_classes)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(audio_data_array, one_hot_labels, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)


In [6]:
vgg19 = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg19_model = Sequential()
vgg19_model.add(vgg19)
vgg19_model.add(layers.Dense(512, activation='relu'))
vgg19_model.add(layers.Dropout(0.5))
vgg19_model.add(layers.Dense(256, activation='relu'))
vgg19_model.add(layers.Flatten())
vgg19_model.add(layers.Dense(224, activation='relu'))
vgg19_model.add(layers.Dropout(0.5))
vgg19_model.add(layers.Dense(num_classes, activation='softmax'))

for layer in vgg19.layers:  
    layer.trainable = False

vgg19_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
vgg19_model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))

In [None]:
loss, accuracy = vgg19_model.evaluate(X_test, y_test)


In [7]:
resnet152 = ResNet152(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet152_model = Sequential()
resnet152_model.add(resnet152)
resnet152_model.add(layers.Dense(512, activation='relu'))
resnet152_model.add(layers.Dropout(0.5))
resnet152_model.add(layers.Dense(256, activation='relu'))
resnet152_model.add(layers.Flatten())
resnet152_model.add(layers.Dense(224, activation='relu'))
resnet152_model.add(layers.Dropout(0.5))
resnet152_model.add(layers.Dense(num_classes, activation='softmax'))

for layer in resnet152.layers:  
    layer.trainable = False

resnet152_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
resnet152_model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))


In [None]:
loss, accuracy = resnet152_model.evaluate(X_test, y_test)


In [None]:

# ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

model_CNN = Sequential()

# Layer 1: 2D convolutional layer
model_CNN.add(Conv2D(24, kernel_size=(6, 6), strides=(1, 1), input_shape=(224, 224, 3)))
model_CNN.add(Activation('relu'))

# Layer 2: Batch normalization layer
model_CNN.add(BatchNormalization())

# Layer 3: 2D convolutional layer with LeakyReLU
model_CNN.add(Conv2D(24, kernel_size=(6, 6), strides=(1, 1)))
model_CNN.add(LeakyReLU(alpha=0.1))

# Layer 4: Batch normalization layer
model_CNN.add(BatchNormalization())

# Layer 5: 2D convolutional layer with LeakyReLU
model_CNN.add(Conv2D(48, kernel_size=(5, 5), strides=(2, 2)))
model_CNN.add(LeakyReLU(alpha=0.1))

# Layer 6: Batch normalization layer
model_CNN.add(BatchNormalization())

# Layer 7: 2D convolutional layer with LeakyReLU
model_CNN.add(Conv2D(48, kernel_size=(5, 5), strides=(2, 2)))
model_CNN.add(LeakyReLU(alpha=0.1))

# Layer 8: Batch normalization layer
model_CNN.add(BatchNormalization())

# Layer 9: 2D convolutional layer with LeakyReLU
model_CNN.add(Conv2D(64, kernel_size=(4, 4), strides=(1, 1)))
model_CNN.add(LeakyReLU(alpha=0.1))

# Layer 10: Batch normalization layer
model_CNN.add(BatchNormalization())

# Layer 11: 2D convolutional layer with LeakyReLU
model_CNN.add(Conv2D(64, kernel_size=(4, 4), strides=(1, 1)))
model_CNN.add(LeakyReLU(alpha=0.1))

# Layer 12: Batch normalization layer
model_CNN.add(BatchNormalization())

model_CNN.add(Flatten())
model_CNN.add(Dense(200))
model_CNN.add(Dropout(0.25))
model_CNN.add(Dense(50, activation='softmax'))

# Compile the model
model_CNN.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
batch_size = 32
epochs = 150

train_datagen = datagen.flow(X_train, y_train, batch_size=batch_size)

history = model_CNN.fit(train_datagen, 
                        steps_per_epoch=len(X_train) // batch_size, 
                        epochs=epochs, 
                        validation_data=(X_val, y_val))


In [None]:
loss, accuracy = model_CNN.evaluate(X_test, y_test)


In [8]:
from tensorflow.keras.applications import EfficientNetB0

# Load the EfficientNetB0 model with pre-trained weights
effnet_b0 = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

effnet_b0_model = Sequential()

# Add the EfficientNetB0 base model
effnet_b0_model.add(effnet_b0)

# Add custom fully connected layers
effnet_b0_model.add(layers.Dense(512, activation='relu'))
effnet_b0_model.add(layers.Dropout(0.5))
effnet_b0_model.add(layers.Dense(256, activation='relu'))
effnet_b0_model.add(layers.Flatten())
effnet_b0_model.add(layers.Dense(224, activation='relu'))
effnet_b0_model.add(layers.Dropout(0.5))
effnet_b0_model.add(layers.Dense(num_classes, activation='softmax'))

for layer in effnet_b0.layers:
    layer.trainable = False

effnet_b0_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [None]:
effnet_b0_model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))

In [None]:
loss, accuracy = effnet_b0_model.evaluate(X_test, y_test)


In [9]:
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg16_model = Sequential()
vgg16_model.add(vgg16)
vgg16_model.add(layers.Dense(512, activation='relu'))
vgg16_model.add(layers.Dropout(0.5))
vgg16_model.add(layers.Dense(256, activation='relu'))
vgg16_model.add(layers.Flatten())
vgg16_model.add(layers.Dense(224, activation='relu'))
vgg16_model.add(layers.Dropout(0.5))
vgg16_model.add(layers.Dense(num_classes, activation='softmax'))

for layer in vgg16.layers:  
    layer.trainable = False

vgg16_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
vgg16_model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))

In [None]:
loss, accuracy = vgg16_model.evaluate(X_test, y_test)


In [None]:
models = [model_CNN, vgg16_model, vgg19_model, resnet152_model, effnet_b0_model]
model_names = ["model_CNN", "vgg16_model", "vgg19_model", "resnet152_model", "effnet_b0_model"]

test_data = [X_test, X_test, X_test, X_test, X_test]

model_metrics = []

for i, model in enumerate(models):
    print(f"Metrics for {model_names[i]}:")
    metrics = evaluate_model_graphical(model, test_data[i], y_test)
    model_metrics.append([model_names[i]] + list(metrics))

columns = ["model_name", "accuracy", "precision", "recall", "f1_score", "mean_squared_error", "specificity", "sensitivity", "prevalence"]
df = pd.DataFrame(model_metrics, columns=columns)

selected_columns = ["model_name", "specificity", "sensitivity", "prevalence"]
df_selected = df[selected_columns]

plt.figure(figsize=(12, 6))
sns.heatmap(df_selected.set_index("model_name"), annot=True, cmap="YlGnBu", fmt=".4f")
plt.title("Model Comparison: Metrics")

# Save the table as a PNG image
plt.savefig("model_metrics_table.png")
plt.show()