In [40]:
import pandas as pd
import numpy as np
import re
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import librosa
import librosa.display
import matplotlib.pyplot as plt


In [41]:
df = pd.read_csv('urbansounds_features.csv')

In [42]:
def extract_mel_spectrogram(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return log_mel_spectrogram

df['MelSpectrogram'] = df['FilePath'].apply(extract_mel_spectrogram)


KeyError: 'FilePath'

### Check interval of the values per column

In [None]:
column_intervals = df.describe().loc[['min', 'max']]
column_intervals

In [None]:
print(df.dtypes)

In [None]:
df.isnull().sum()

### CNN

In [None]:
def calculate_mean_from_string(string):
    cleaned_string = string.replace('\n', '')
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", cleaned_string)
    array = np.array(numbers, dtype=float)
    mean_value = np.mean(array)
    return mean_value


for column in df.columns:
    if column != 'Label':
        if df[column].dtype != float and df[column].dtype != int:
            df[column] = df[column].apply(calculate_mean_from_string)
    else:
        df[column] = df[column].str.split('-').str[1].astype(int)

In [None]:
# Classification
X = df.drop('Label', axis=1)
y = df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Print class distribution before SMOTE
print("Class distribution before SMOTE:")
print(pd.Series(y_train).value_counts())

In [None]:
# Oversample the features values using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [None]:
# Print class distribution after SMOTE
print("\nClass distribution after SMOTE:")
print(pd.Series(y_train_resampled).value_counts())

In [None]:
# Standardize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Encode labels to numerical values
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train_resampled)
y_test_encoded = label_encoder.transform(y_test)

In [None]:
# Reshape data for CNN
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1], 1))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1], 1))

In [None]:
# Convert labels to categorical one-hot encoding
y_train_onehot = to_categorical(y_train_encoded)
y_test_onehot = to_categorical(y_test_encoded)

In [None]:
# Define the CNN model with different activation functions for hidden layers
activation_functions = ['relu', 'sigmoid']

for activation1 in activation_functions:
    for activation2 in activation_functions:
        # Define the CNN model
        model = Sequential()
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(1, X_train_scaled.shape[1], 1)))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation=activation1))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation=activation2))
        model.add(Flatten())
        model.add(Dense(64, activation=activation1))
        model.add(Dense(y_train_onehot.shape[1], activation='sigmoid'))  # Sigmoid for the output layer

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train_reshaped, y_train_onehot, epochs=10, validation_split=0.2)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_reshaped, y_test_onehot)
print(f'Test accuracy: {test_acc}')