In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

"Machine learning tools"
import pickle

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold, train_test_split


from classification.datasets import Dataset
from classification.utils.audio_student import AudioUtil, Feature_vector_DS

from classification.utils.plots import (
    plot_decision_boundaries,
    plot_specgram,
    show_confusion_matrix,
)
from classification.utils.utils import accuracy

In [2]:
np.random.seed(0)

In [3]:
### TO RUN
dataset = Dataset()
classnames = dataset.list_classes()

print("\n".join(classnames))

chainsaw
fire
fireworks
gunshot


In [4]:
### TO RUN
fm_dir = "data/feature_matrices/"  # where to save the features matrices
new_dataset_dir = "src/classification/datasets/new_dataset/melvecs/"
model_dir = "data/models/cnn"  # where to save the models
os.makedirs(fm_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [5]:
### TO RUN

"Creation of the dataset"
myds = Feature_vector_DS(dataset, Nft=512, nmel=20, duration=950, shift_pct=0.0)

"Some attributes..."
myds.nmel
myds.duration
myds.shift_pct
myds.sr
myds.data_aug
myds.ncol

idx = 0


In [6]:
import numpy as np

train_pct = 0.7
data_aug_factor = 1
featveclen = len(myds["fire", 0, "", ""])  # Same for all classes
classnames = ["chainsaw", "fire", "fireworks", "gunshot"]  # Or wherever you store class names
nclass = len(classnames)

# Determine number of samples per class
naudio_per_class = {"chainsaw" : 70, "fire" : 76, "fireworks" : 75, "gunshot" : 40}


# Allocate feature matrix
total_samples_basic = sum(naudio_per_class[c] for c in classnames)
X_basic = np.zeros((total_samples_basic, featveclen))
y_basic = np.zeros((total_samples_basic), dtype=object)
total_samples_basic
# Fill feature matrix
idx = 0
for class_idx, classname in enumerate(classnames):
    for i in range(naudio_per_class[classname]):
        featvec = myds[classname, i, "", ""]
        X_basic[idx, :] = featvec
        y_basic[idx] = classname
        idx += 1

# Save features and labels
np.save(fm_dir + "X_basic.npy", X_basic)
np.save(fm_dir + "y_basic.npy", y_basic)

print(f"Shape of the basic feature matrix : {X_basic.shape}")
print(f"Number of labels : {y_basic.shape}")


Shape of the basic feature matrix : (261, 400)
Number of labels : (261,)


We can now create a new augmented dataset and observe if the classification results improve. 

In [7]:

### AUGMENTED DATASET
list_augmentation = ["original", "noise", "shifting"]
myds.mod_data_aug(list_augmentation)
print("Number of transformations : ", myds.data_aug_factor)


# Calcul total des échantillons
total_aug_samples = sum(naudio_per_class[c] for c in classnames) * len(list_augmentation)
X_basic_aug = np.zeros((total_aug_samples, featveclen))
y_basic_aug = np.zeros((total_aug_samples), dtype=object)

# Remplissage des features
idx = 0
for aug in list_augmentation:
    for classname in classnames:
        for i in range(naudio_per_class[classname]):
            featvec = myds[classname, i, aug, ""]
            X_basic_aug[idx, :] = featvec
            y_basic_aug[idx] = classname
            idx += 1

# Sauvegarde
np.save(fm_dir + "X_basic_aug.npy", X_basic_aug)
np.save(fm_dir + "y_basic_aug.npy", y_basic_aug)

print(f"Shape of the feature matrix : {X_basic_aug.shape}")
print(f"Number of labels : {y_basic_aug.shape}")
print(f"------------------------------------------------------------")
print(f"Transformations: {list_augmentation}. Labels aligned dynamically with class sizes.")


Number of transformations :  3
Shape of the feature matrix : (783, 400)
Number of labels : (783,)
------------------------------------------------------------
Transformations: ['original', 'noise', 'shifting']. Labels aligned dynamically with class sizes.


In [8]:
RUN = False

if RUN:
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from classification.utils.plots import plot_specgram_textlabel

    # Charger les données
    X = np.load(os.path.join(fm_dir, "X_basic_aug.npy"), allow_pickle=True)
    y = np.load(os.path.join(fm_dir, "y_basic_aug.npy"), allow_pickle=True)

    # Dossier où sauvegarder les images
    save_dir = os.path.join("src/classification/soundfiles_melspec_augmentation")
    os.makedirs(save_dir, exist_ok=True)

    # Nombre d'exemples de base (avant augmentation)
    length_X_basic = int(len(X) / len(list_augmentation))

    # Boucle de sauvegarde
    for i in range(length_X_basic):
        for j, aug_name in enumerate(list_augmentation):
            idx = i + j * length_X_basic
            melspec = X[idx]
            class_of_spec = y[idx]

            fig, ax = plt.subplots()
            plot_specgram_textlabel(
                melspec.reshape((20, 20)),
                ax=ax,
                is_mel=True,
                title=f"MEL Spectrogram #{i} - {aug_name}",
                xlabel="Mel vector",
                textlabel=f"{class_of_spec} (aug: {aug_name})",
            )
            plt.tight_layout()
            save_path = os.path.join(save_dir, f"melspec_{i}_{aug_name}.png")
            fig.savefig(save_path)
            plt.close(fig)


FINAL MODEL SAVE

In [9]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# ========== PARAMETERS ==========

# Hyperparamètres issus de l'optimisation
conv_filters = 62
dense_units = 86
dropout_rate = 0.40
kernel_size = 4
n_conv_layers = 2
batch_norm = False
activation = 'tanh'
epochs = 28
batch_size = 19
patience = 9

TEST_SET = True
A = True  # PCA NOAUG NONORM (Ignored for CNN)
B = True  # NOPCA NOAUG NONORM
C = True  # PCA AUG NONORM (Ignored for CNN)
D = True  # NOPCA AUG NONORM
E = True  # NOPCA NOAUG NONORM
F = True  # PCA NOAUG NORM (Ignored for CNN)
G = True  # PCA AUG NORM (Ignored for CNN)
H = True  # NOPCA AUG NORM

X_basic_aug = np.load(os.path.join(fm_dir, "X_basic_aug.npy"))
y_basic_aug = np.load(os.path.join(fm_dir, "y_basic_aug.npy"), allow_pickle=True)
X_basic = np.load(os.path.join(fm_dir, "X_basic.npy"))
y_basic = np.load(os.path.join(fm_dir, "y_basic.npy"), allow_pickle=True)

label_encoder = LabelEncoder()
y_basic = label_encoder.fit_transform(y_basic)
y_basic_aug = label_encoder.transform(y_basic_aug)
n_classes = len(np.unique(y_basic))

if TEST_SET:
    X_train, X_test, y_train, y_test = train_test_split(X_basic, y_basic, test_size=0.3, random_state=42)
    X_train_aug, X_test_aug, y_train_aug, y_test_aug = train_test_split(X_basic_aug, y_basic_aug, test_size=0.3, random_state=42)
else:
    X_train, y_train = X_basic, y_basic
    X_train_aug, y_train_aug = X_basic_aug, y_basic_aug

# ========== HELPERS ==========

def reshape_for_cnn(X):
    if len(X.shape) == 2:
        side = int(np.sqrt(X.shape[1]))
        return X.reshape((-1, side, side, 1))
    return X

def build_cnn(input_shape, n_classes):
    model = Sequential()
    model.add(Input(shape=input_shape))

    for i in range(n_conv_layers):
        model.add(Conv2D(conv_filters * (2**i), (kernel_size, kernel_size), activation=activation))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(dense_units, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_save_model(X_train, y_train, filename, X_val=None, y_val=None):
    input_shape = X_train.shape[1:]
    y_train_cat = to_categorical(y_train, num_classes=n_classes)
    y_val_cat = to_categorical(y_val, num_classes=n_classes) if y_val is not None else None

    model = build_cnn(input_shape, n_classes)
    es = EarlyStopping(patience=patience, restore_best_weights=True)

    model.fit(X_train, y_train_cat, epochs=epochs, batch_size=batch_size,
              validation_data=(X_val, y_val_cat) if y_val is not None else None,
              callbacks=[es], verbose=0)

    model.save(os.path.join(model_dir, filename + ".h5"))
    return model

def evaluate_model(model, X_test, y_test, description):
    y_pred = np.argmax(model.predict(X_test), axis=1)
    print(f"\n=== {description} ===")
    print(classification_report(y_test, y_pred))

# ========== SCENARIOS ==========

if B:
    X_train_cnn = reshape_for_cnn(X_train)
    X_test_cnn = reshape_for_cnn(X_test)
    model_B = train_and_save_model(X_train_cnn, y_train, "cnn_nopca_noaug_nonorm", X_test_cnn, y_test)

if D:
    X_train_aug_cnn = reshape_for_cnn(X_train_aug)
    X_test_aug_cnn = reshape_for_cnn(X_test_aug)
    model_D = train_and_save_model(X_train_aug_cnn, y_train_aug, "cnn_nopca_aug_nonorm", X_test_aug_cnn, y_test_aug)

if H:
    scaler = StandardScaler()
    X_train_aug_scaled = scaler.fit_transform(X_train_aug)
    X_test_aug_scaled = scaler.transform(X_test_aug)
    with open(os.path.join(model_dir, "scaler_cnn_norm.pkl"), "wb") as f:
        pickle.dump(scaler, f)

    X_train_aug_norm_cnn = reshape_for_cnn(X_train_aug_scaled)
    X_test_aug_norm_cnn = reshape_for_cnn(X_test_aug_scaled)
    model_H = train_and_save_model(X_train_aug_norm_cnn, y_train_aug, "cnn_nopca_aug_norm", X_test_aug_norm_cnn, y_test_aug)

# ========== EVALUATION ==========

if TEST_SET:
    if B: evaluate_model(model_B, X_test_cnn, y_test, "Scenario B: NOAUG NONORM")
    if D: evaluate_model(model_D, X_test_aug_cnn, y_test_aug, "Scenario D: AUG NONORM")
    if H: evaluate_model(model_H, X_test_aug_norm_cnn, y_test_aug, "Scenario H: AUG NORM")


2025-04-18 13:14:46.165515: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-18 13:14:46.175727: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-18 13:14:46.224643: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-18 13:14:46.318869: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744974886.383162   27442 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744974886.41

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step

=== Scenario B: NOAUG NONORM ===
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.89      0.86      0.87        28
           2       0.88      0.74      0.80        19
           3       0.76      1.00      0.87        13

    accuracy                           0.87        79
   macro avg       0.87      0.89      0.87        79
weighted avg       0.88      0.87      0.87        79

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step

=== Scenario D: AUG NONORM ===
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        64
           1       0.96      0.96      0.96        73
           2       0.95      0.92      0.94        63
           3       0.94      0.94      0.94        35

    accuracy                           0.96       235
   macro avg       0.96      



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

=== Scenario H: AUG NORM ===
              precision    recall  f1-score   support

           0       0.92      0.94      0.93        64
           1       0.97      0.97      0.97        73
           2       0.95      0.90      0.93        63
           3       0.92      0.97      0.94        35

    accuracy                           0.94       235
   macro avg       0.94      0.95      0.94       235
weighted avg       0.95      0.94      0.94       235



HYPERPARAMETER TUNING

In [10]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from bayes_opt import BayesianOptimization

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# --- CONFIG FLAGS ---
NORMALIZATION = True
TRANSFORMATION = True

if TRANSFORMATION:
    X = np.load(os.path.join(fm_dir, "X_basic_aug.npy"))
    y = np.load(os.path.join(fm_dir, "y_basic_aug.npy"), allow_pickle=True)
else:
    X = np.load(os.path.join(fm_dir, "X_basic.npy"))
    y = np.load(os.path.join(fm_dir, "y_basic.npy"), allow_pickle=True)

le = LabelEncoder()
y = le.fit_transform(y)
n_classes = len(np.unique(y))

if NORMALIZATION:
    X = scaler.fit_transform(X)

# --- STEP 2: Reshape for CNN ---
def reshape_for_cnn(X):
    if len(X.shape) == 2:
        side = int(np.sqrt(X.shape[1]))
        return X.reshape((-1, side, side, 1))
    return X

X = reshape_for_cnn(X)

# --- STEP 3: Model builder with hyperparams ---
def build_cnn_model(conv_filters, dense_units, dropout_rate, kernel_size,
                    n_conv_layers, batch_norm, activation):

    conv_filters = int(conv_filters)
    dense_units = int(dense_units)
    kernel_size = int(kernel_size)
    n_conv_layers = int(n_conv_layers)
    batch_norm = bool(round(batch_norm))
    activation = 'relu' if round(activation) == 0 else 'tanh'

    model = Sequential()
    model.add(Input(shape=X.shape[1:]))

    for i in range(n_conv_layers):
        model.add(Conv2D(conv_filters * (2 ** i), (kernel_size, kernel_size), activation=activation))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(dense_units, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# --- STEP 4: Cross-validation function for BO ---
def cnn_cv(conv_filters, dense_units, dropout_rate, epochs, batch_size, patience,
           kernel_size, n_conv_layers, batch_norm, activation):

    acc_scores = []
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

    for train_idx, val_idx in skf.split(X, y):
        X_train_cv, X_val_cv = X[train_idx], X[val_idx]
        y_train_cv, y_val_cv = y[train_idx], y[val_idx]

        y_train_cat = to_categorical(y_train_cv, num_classes=n_classes)
        y_val_cat = to_categorical(y_val_cv, num_classes=n_classes)

        model = build_cnn_model(conv_filters, dense_units, dropout_rate,
                                kernel_size, n_conv_layers, batch_norm, activation)
        es = EarlyStopping(patience=int(patience), restore_best_weights=True)

        model.fit(X_train_cv, y_train_cat, validation_data=(X_val_cv, y_val_cat),
                  epochs=int(epochs), batch_size=int(batch_size), verbose=0,
                  callbacks=[es])

        val_pred = np.argmax(model.predict(X_val_cv, verbose=0), axis=1)
        acc_scores.append(np.mean(val_pred == y_val_cv))

    return np.mean(acc_scores)

# --- STEP 5: Bayesian Optimization space ---
pbounds = {
    'conv_filters': (16, 64),
    'dense_units': (32, 256),
    'dropout_rate': (0.1, 0.5),
    'epochs': (5, 30),
    'batch_size': (16, 64),
    'patience': (2, 10),
    'kernel_size': (3, 5),
    'n_conv_layers': (1, 3),
    'batch_norm': (0, 1),      # 0 = False, 1 = True
    'activation': (0, 1)       # 0 = relu, 1 = tanh
}

optimizer = BayesianOptimization(
    f=cnn_cv,
    pbounds=pbounds,
    random_state=42
)

# --- STEP 6: Run Bayesian Optimization ---
print("🔍 Starting Bayesian Optimization for CNN...")
optimizer.maximize(init_points=5, n_iter=45)

# --- STEP 7: Display Results ---
for i, res in enumerate(optimizer.res):
    print(f"Iteration {i+1}: Accuracy={res['target']:.4f}, Params={res['params']}")

best_params = optimizer.max['params']
activation_str = 'relu' if round(best_params['activation']) == 0 else 'tanh'
batch_norm_str = 'Yes' if round(best_params['batch_norm']) == 1 else 'No'

print("\n✅ === BEST HYPERPARAMETERS FOUND ===")
print(f"conv_filters = {int(best_params['conv_filters'])}")
print(f"dense_units = {int(best_params['dense_units'])}")
print(f"dropout_rate = {float(best_params['dropout_rate']):.2f}")
print(f"kernel_size = {int(best_params['kernel_size'])}")
print(f"n_conv_layers = {int(best_params['n_conv_layers'])}")
print(f"batch_norm = {batch_norm_str}")
print(f"activation = {activation_str}")
print(f"epochs = {int(best_params['epochs'])}")
print(f"batch_size = {int(best_params['batch_size'])}")
print(f"patience = {int(best_params['patience'])}")
print(f"CV Accuracy = {optimizer.max['target']:.4f}")

# --- STEP 8: Final Evaluation on Holdout Set ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=999)
y_train_cat = to_categorical(y_train, num_classes=n_classes)
y_test_cat = to_categorical(y_test, num_classes=n_classes)

final_model = build_cnn_model(
    conv_filters=int(best_params['conv_filters']),
    dense_units=int(best_params['dense_units']),
    dropout_rate=float(best_params['dropout_rate']),
    kernel_size=int(best_params['kernel_size']),
    n_conv_layers=int(best_params['n_conv_layers']),
    batch_norm=bool(round(best_params['batch_norm'])),
    activation=activation_str
)

es = EarlyStopping(patience=int(best_params['patience']), restore_best_weights=True)
final_model.fit(X_train, y_train_cat, validation_split=0.2,
                epochs=int(best_params['epochs']), batch_size=int(best_params['batch_size']),
                callbacks=[es], verbose=1)

# --- Evaluation ---
y_pred = np.argmax(final_model.predict(X_test), axis=1)
print("\n📊 === FINAL EVALUATION ON HOLDOUT TEST SET ===")
print(classification_report(y_test, y_pred))


🔍 Starting Bayesian Optimization for CNN...
|   iter    |  target   | activa... | batch_... | batch_... | conv_f... | dense_... | dropou... |  epochs   | kernel... | n_conv... | patience  |
-------------------------------------------------------------------------------------------------------------------------------------------------




| [39m1        [39m | [39m0.6411   [39m | [39m0.3745   [39m | [39m0.9507   [39m | [39m51.14    [39m | [39m44.74    [39m | [39m66.95    [39m | [39m0.1624   [39m | [39m6.452    [39m | [39m4.732    [39m | [39m2.202    [39m | [39m7.665    [39m |


KeyboardInterrupt: 