# Imports

In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix

# Input de DataSet

In [None]:
datasets = [pd.read_csv(f'datasets/urbansounds_features_{i}.csv') for i in range(1, 11)]

# Clean the DataSet

In [None]:
for df in datasets:
    object_columns = df.select_dtypes(include=['object']).columns
    print(object_columns)

In [None]:
def calculate_mean_from_string(string):
    cleaned_string = string.replace('\n', '')
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", cleaned_string)
    array = np.array(numbers, dtype=float)
    mean_value = np.mean(array)
    return mean_value

In [None]:
for df in datasets:
    column_intervals = df.describe().loc[['min', 'max']]
    print(column_intervals)

In [None]:
for df in datasets:
    for column in df.columns:
        if column != 'Label':
            if df[column].dtype != float and df[column].dtype != int:
                df[column] = df[column].apply(calculate_mean_from_string)
            df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())
        else:
            df[column] = df[column].str.split('-').str[1].astype(int)

In [None]:
fold = 0
for df in datasets:
    print(f"Fold {fold + 1}")
    fold += 1
    class_counts = df['Label'].value_counts()
    class_labels = class_counts.index
    class_values = class_counts.values
    
    plt.figure(figsize=(8, 6))
    plt.bar(class_labels, class_values, color='skyblue')
    plt.xlabel('Class')
    plt.ylabel('Count')
    plt.title('Class Distribution')
    plt.show()

# Classification

In [None]:
def plot_learning_curve(history):
    # Plot training & validation accuracy values
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

In [None]:
def plot_confusion_matrix(y_true, y_pred, class_labels):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

In [None]:
def prepare_datasets(fold):
    test_data = datasets[fold]
    test_labels = test_data['Label'].values
    test_data = test_data.drop(columns=['Label']).values

    train_datasets = datasets[:fold] + datasets[fold + 1:]

    train_data = pd.concat(train_datasets)
    train_labels = train_data['Label'].values
    train_data = train_data.drop(columns=['Label']).values

    return train_data, train_labels, test_data, test_labels

In [None]:
def build_MLP(input_neurons, hidden_neurons, output_neurons, learning_rate, regulizer, dropout):
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential([
        tf.keras.layers.Dense(units=input_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.L1L2(l1=regulizer, l2=regulizer)),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=hidden_neurons, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=hidden_neurons, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=output_neurons, activation='softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [None]:
def train_MLP(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs):
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    history = model.fit(train_data, train_labels,
                        epochs=num_epochs,
                        batch_size=batch_size,
                        callbacks = [early_stopping],
                        validation_data = (test_data,test_labels),
                        verbose=0)
    
    return history

In [None]:
def build_CNN(input_shape, num_classes, learning_rate, dropout_rate):
    model = tf.keras.Sequential([
        tf.keras.layers.Reshape((input_shape[0], 1), input_shape=input_shape),
        tf.keras.layers.Conv1D(64, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(2),
        tf.keras.layers.Conv1D(128, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model


In [None]:
def train_CNN(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    history = cnn_model.fit(train_data, train_labels,
                            epochs=num_epochs,
                            batch_size=batch_size,
                            callbacks=[early_stopping],
                            validation_data=(test_data, test_labels),
                            verbose=0)

    return history

# MLP

In [None]:
cv_scores_mlp = []

for fold in range(10):
    print(f"Fold {fold + 1}")
    train_data, train_labels, test_data, test_labels = prepare_datasets(fold)

    model = build_MLP(input_neurons=512,
                      hidden_neurons=512,
                      output_neurons=10,
                      learning_rate=0.0005,
                      regulizer=0.001,
                      dropout=0.5)

    history = train_MLP(train_data, train_labels, test_data, test_labels,
                        patience=20,
                        batch_size=128,
                        num_epochs=50)
    plot_learning_curve(history)  # Uncomment if you want to visualize the learning curve

    predictions = model.predict(test_data)
    predicted_labels = np.argmax(predictions, axis=1)
    plot_confusion_matrix(test_labels, predicted_labels, class_labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
    accuracy = accuracy_score(test_labels, predicted_labels)
    cv_scores_mlp.append(accuracy)

overall_average_accuracy_mlp = np.mean(cv_scores_mlp)
print(f"\nAverage Accuracy for MLP: {overall_average_accuracy_mlp:.4f}")


# CNN

In [None]:
cv_scores_cnn = []
for fold in range(10):
    print(f"Fold {fold + 1}")
    train_data, train_labels, test_data, test_labels = prepare_datasets(fold)

    cnn_model = build_CNN(input_shape=train_data.shape[1:],
                          num_classes=10,
                          learning_rate=0.0001,
                          dropout_rate=0.5)

    history = train_CNN(train_data, train_labels, test_data, test_labels,
                        patience=5,
                        batch_size=32,
                        num_epochs=50)
    #plot_learning_curve(history)  # Uncomment if you want to visualize the learning curve

    predictions_cnn = cnn_model.predict(test_data)
    predicted_labels_cnn = np.argmax(predictions_cnn, axis=1)
    #plot_confusion_matrix(test_labels, predicted_labels, class_labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
    accuracy = accuracy_score(test_labels, predicted_labels_cnn)
    cv_scores_cnn.append(accuracy)

overall_average_accuracy_cnn = np.mean(cv_scores_cnn)
print(f"\nAccuracy for CNN: {overall_average_accuracy_cnn:.4f}")
