# Imports

In [1]:
import pandas as pd
import numpy as np
import re
from imblearn.over_sampling import SMOTE
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from itertools import product

# Input de DataSet

In [2]:
datasets = [pd.read_csv(f'datasets/urbansounds_features_{i}.csv') for i in range(1, 11)]

Clean the DataSet

In [3]:
def calculate_mean_from_string(string):
    cleaned_string = string.replace('\n', '')
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", cleaned_string)
    array = np.array(numbers, dtype=float)
    mean_value = np.mean(array)
    return mean_value

In [4]:
for df in datasets:
    for column in df.columns:
        if column != 'Label':
            if df[column].dtype != float and df[column].dtype != int:
                df[column] = df[column].apply(calculate_mean_from_string)
        else:
            df[column] = df[column].str.split('-').str[1].astype(int)

# Classification

In [5]:
def heatmap(test,pred):
    cm = confusion_matrix(test, pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

In [6]:
def oversample_features(X, y):
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

def standardize_features(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled

In [7]:
def preprocess_data(dataset):
    X = dataset.drop('Label', axis=1)
    y = dataset['Label']

    # Oversample and standardize the features
    X_resampled, y_resampled = oversample_features(X, y)
    X_scaled = standardize_features(X_resampled)
    
    return X_scaled, y_resampled

In [8]:
def prepare_datasets(fold):
    # Prepare training dataset
    test_data = datasets[fold]
    test_labels = test_data['Label'].values  # Assuming 'label' is the column containing labels
    test_data = test_data.drop(columns=['Label']).values  # Assuming you drop the 'label' column for input features

    train_datasets = datasets[:fold] + datasets[fold + 1:]

    # Concatenate training sets
    train_data = pd.concat(train_datasets)
    train_labels = train_data['Label'].values
    train_data = train_data.drop(columns=['Label']).values

    return train_data, train_labels, test_data, test_labels

In [50]:
def build_MLP(mean_neurons, output_neurons, learning_rate, regulizer, dropout):

    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
    
    model = tf.keras.Sequential([
        tf.keras.layers.GlobalAveragePooling1D(input_shape=(None, 1)),
        tf.keras.layers.Dense(units=mean_neurons, activation='relu',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=regulizer, l2=regulizer)),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=mean_neurons*2, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=mean_neurons*3, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=mean_neurons*2, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=mean_neurons, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=output_neurons, activation='softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model


In [10]:
def train_MLP(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs):
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    model.fit(train_data, train_labels, batch_size=batch_size, epochs=num_epochs,
              callbacks=[early_stopping], validation_data=(test_data, test_labels))
    
    return model

In [11]:
def build_RNN(mean_neurons, output_neurons, learning_rate, regulizer, dropout):

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    model = tf.keras.Sequential([
        tf.keras.layers.SimpleRNN(units=mean_neurons, activation='relu', input_shape=(None, 1)),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(units=output_neurons, activation='softmax')
    ])

    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [12]:
def train_RNN(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs):
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience)
    model.fit(train_data, train_labels, epochs=num_epochs, batch_size=batch_size,
              callbacks=[early_stopping], validation_data=(test_data, test_labels))
    
    return model

In [13]:
def evaluate_model(model, test_data, test_labels):
    test_labels_pred_probs = model.predict(test_data)
    test_labels_pred = np.argmax(test_labels_pred_probs, axis=1)
    fold_accuracy = accuracy_score(test_labels, test_labels_pred)
    return fold_accuracy

In [14]:
num_folds = 10

# MLP

In [44]:
# Hyperparameter
num_epochs = 100
learning_rate = 0.01
batch_size = 64

# Regularization Techniques
dropout = 0.1
patience = 3
regulizer = 0.1


In [16]:
best_accuracy = 0
for num_epochs in (40,51):
    for learning_rate in [0.1,0.01,0.001]:
        for batch_size in [16,32,64,128,256]:
            for dropout in (0, 0.5, 0.1):
                for patience in [3,6,9]:
                    for regulizer in (0, 0.1, 0.01):
                        model = build_MLP(
                            datasets[0].shape[1], len(np.unique(datasets[0]['Label'])),
                            learning_rate, regulizer, dropout)
                        
                        cv_scores = []
                        for fold in range(num_folds):
                            train_data, train_labels, test_data, test_labels = prepare_datasets(fold)
                        
                            model = train_MLP(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs)
                        
                            fold_accuracy = evaluate_model(model, test_data, test_labels)
                            cv_scores.append(fold_accuracy)
                            
                        overall_average_accuracy = np.mean(cv_scores)
                        if overall_average_accuracy>best_accuracy:
                            best_accuracy = overall_average_accuracy
                            best_num_epochs = num_epochs
                            best_learning_rate = learning_rate
                            best_batch_size = batch_size
                            best_dropout = dropout
                            best_patience = patience
                            best_regulizer = regulizer
        

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/4


KeyboardInterrupt



In [None]:
print(f"\nBest Accuracy: {best_accuracy:.4f}\n"
      f"Best num_epochs: {best_num_epochs}"
      f"Best learning_rate: {best_learning_rate}"
      f"Best batch_size: {best_batch_size}"
      f"Best dropout: {best_dropout}"
      f"Best patience: {best_patience}"
      f"Best regulizer: {best_regulizer}")

In [51]:
model = build_MLP(
    datasets[0].shape[1], len(np.unique(datasets[0]['Label'])),
    learning_rate, regulizer, dropout)

cv_scores = []
for fold in range(num_folds):
    train_data, train_labels, test_data, test_labels = prepare_datasets(fold)

    model = train_MLP(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs)

    fold_accuracy = evaluate_model(model, test_data, test_labels)
    cv_scores.append(fold_accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


In [52]:
# Calculate and store the average accuracy for these hyperparameters
overall_average_accuracy = np.mean(cv_scores)
print(f"\nOverall Average Accuracy: {overall_average_accuracy:.4f}")


Overall Average Accuracy: 0.1876


# RNN

In [None]:
# Hyperparameter
num_epochs = 15
learning_rate = '0.1'
batch_size = 64

# Regularization Techniques
dropout = 0.06
patience = 15
regulizer = 0.1


In [None]:
model = build_RNN(
    datasets[0].shape[1] + len(np.unique(datasets[0]['Label'])) // 2, len(np.unique(datasets[0]['Label'])),
    learning_rate, regulizer, dropout)

cv_scores = []
for fold in range(num_folds):
    # Prepare training and validation datasets
    train_data, train_labels, test_data, test_labels = prepare_datasets(fold)

    # Reshape data for RNN
    train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], 1))
    test_data = np.reshape(test_data, (test_data.shape[0], test_data.shape[1], 1))

    
    mode = train_RNN(train_data, train_labels, test_data, test_labels, patience, batch_size, num_epochs)

    # Evaluate and store accuracy for this fold
    fold_accuracy = evaluate_model(model, test_data, test_labels)
    cv_scores.append(fold_accuracy)

In [None]:
# Calculate and store the average accuracy for these hyperparameters
overall_average_accuracy = np.mean(cv_scores)
print(f"\nOverall Average Accuracy: {overall_average_accuracy:.4f}")