In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

"Machine learning tools"
import pickle

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold, train_test_split


from classification.datasets import Dataset
from classification.utils.audio_student import AudioUtil, Feature_vector_DS

from classification.utils.plots import (
    plot_decision_boundaries,
    plot_specgram,
    show_confusion_matrix,
)
from classification.utils.utils import accuracy

In [2]:
np.random.seed(0)

In [3]:
### TO RUN
dataset = Dataset()
classnames = dataset.list_classes()

print("\n".join(classnames))

chainsaw
fire
fireworks
gunshot


In [4]:
### TO RUN
fm_dir = "data/feature_matrices/"  # where to save the features matrices
new_dataset_dir = "src/classification/datasets/new_dataset/melvecs/"
model_dir = "data/models/cnn"  # where to save the models
os.makedirs(fm_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [5]:
### TO RUN

"Creation of the dataset"
myds = Feature_vector_DS(dataset, Nft=512, nmel=20, duration=950, shift_pct=0.0)

"Some attributes..."
myds.nmel
myds.duration
myds.shift_pct
myds.sr
myds.data_aug
myds.ncol

idx = 0


In [6]:
import numpy as np

train_pct = 0.7
data_aug_factor = 1
featveclen = len(myds["fire", 0, "", ""])  # Same for all classes
classnames = ["chainsaw", "fire", "fireworks", "gunshot"]  # Or wherever you store class names
nclass = len(classnames)

# Determine number of samples per class
naudio_per_class = {"chainsaw" : 76, "fire" : 76, "fireworks" : 76, "gunshot" : 40}


# Allocate feature matrix
total_samples_basic = sum(naudio_per_class[c] for c in classnames)
X_basic = np.zeros((total_samples_basic, featveclen))
y_basic = np.zeros((total_samples_basic), dtype=object)
total_samples_basic
# Fill feature matrix
idx = 0
for class_idx, classname in enumerate(classnames):
    for i in range(naudio_per_class[classname]):
        featvec = myds[classname, i, "", ""]
        X_basic[idx, :] = featvec
        y_basic[idx] = classname
        idx += 1

# Save features and labels
np.save(fm_dir + "X_basic.npy", X_basic)
np.save(fm_dir + "y_basic.npy", y_basic)

print(f"Shape of the basic feature matrix : {X_basic.shape}")
print(f"Number of labels : {y_basic.shape}")


Shape of the basic feature matrix : (268, 400)
Number of labels : (268,)


We can now create a new augmented dataset and observe if the classification results improve. 

In [7]:

### AUGMENTED DATASET
list_augmentation = ["original", "noise", "shifting"]
myds.mod_data_aug(list_augmentation)
print("Number of transformations : ", myds.data_aug_factor)


# Calcul total des échantillons
total_aug_samples = sum(naudio_per_class[c] for c in classnames) * len(list_augmentation)
X_basic_aug = np.zeros((total_aug_samples, featveclen))
y_basic_aug = np.zeros((total_aug_samples), dtype=object)

# Remplissage des features
idx = 0
for aug in list_augmentation:
    for classname in classnames:
        for i in range(naudio_per_class[classname]):
            featvec = myds[classname, i, aug, ""]
            X_basic_aug[idx, :] = featvec
            y_basic_aug[idx] = classname
            idx += 1

# Sauvegarde
np.save(fm_dir + "X_basic_aug.npy", X_basic_aug)
np.save(fm_dir + "y_basic_aug.npy", y_basic_aug)

print(f"Shape of the feature matrix : {X_basic_aug.shape}")
print(f"Number of labels : {y_basic_aug.shape}")
print(f"------------------------------------------------------------")
print(f"Transformations: {list_augmentation}. Labels aligned dynamically with class sizes.")


Number of transformations :  3
Shape of the feature matrix : (804, 400)
Number of labels : (804,)
------------------------------------------------------------
Transformations: ['original', 'noise', 'shifting']. Labels aligned dynamically with class sizes.


In [8]:
RUN = False

if RUN:
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from classification.utils.plots import plot_specgram_textlabel

    # Charger les données
    X = np.load(os.path.join(fm_dir, "X_basic_aug.npy"), allow_pickle=True)
    y = np.load(os.path.join(fm_dir, "y_basic_aug.npy"), allow_pickle=True)

    # Dossier où sauvegarder les images
    save_dir = os.path.join("src/classification/soundfiles_melspec_augmentation")
    os.makedirs(save_dir, exist_ok=True)

    # Nombre d'exemples de base (avant augmentation)
    length_X_basic = int(len(X) / len(list_augmentation))

    # Boucle de sauvegarde
    for i in range(length_X_basic):
        for j, aug_name in enumerate(list_augmentation):
            idx = i + j * length_X_basic
            melspec = X[idx]
            class_of_spec = y[idx]

            fig, ax = plt.subplots()
            plot_specgram_textlabel(
                melspec.reshape((20, 20)),
                ax=ax,
                is_mel=True,
                title=f"MEL Spectrogram #{i} - {aug_name}",
                xlabel="Mel vector",
                textlabel=f"{class_of_spec} (aug: {aug_name})",
            )
            plt.tight_layout()
            save_path = os.path.join(save_dir, f"melspec_{i}_{aug_name}.png")
            fig.savefig(save_path)
            plt.close(fig)


FINAL MODEL SAVE

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# ========== PARAMETERS ==========
conv_filters = 22 
dense_units = 228
dropout_rate = 0.3401

TEST_SET = True
A = True  # PCA NOAUG NONORM (Ignored for CNN)
B = True  # NOPCA NOAUG NONORM
C = True  # PCA AUG NONORM (Ignored for CNN)
D = True  # NOPCA AUG NONORM
E = True  # NOPCA NOAUG NONORM
F = True  # PCA NOAUG NORM (Ignored for CNN)
G = True  # PCA AUG NORM (Ignored for CNN)
H = True  # NOPCA AUG NORM

# ========== LOAD DATA ==========
X_basic_aug = np.load(os.path.join(fm_dir, "X_basic_aug.npy"))
y_basic_aug = np.load(os.path.join(fm_dir, "y_basic_aug.npy"), allow_pickle=True)
X_basic = np.load(os.path.join(fm_dir, "X_basic.npy"))
y_basic = np.load(os.path.join(fm_dir, "y_basic.npy"), allow_pickle=True)

label_encoder = LabelEncoder()
y_basic = label_encoder.fit_transform(y_basic)
y_basic_aug = label_encoder.transform(y_basic_aug)
n_classes = len(np.unique(y_basic))

if TEST_SET:
    X_train, X_test, y_train, y_test = train_test_split(X_basic, y_basic, test_size=0.3, random_state=42)
    X_train_aug, X_test_aug, y_train_aug, y_test_aug = train_test_split(X_basic_aug, y_basic_aug, test_size=0.3, random_state=42)
else:
    X_train, y_train = X_basic, y_basic
    X_train_aug, y_train_aug = X_basic_aug, y_basic_aug

# ========== HELPERS ==========
def reshape_for_cnn(X):
    if len(X.shape) == 2:
        side = int(np.sqrt(X.shape[1]))
        return X.reshape((-1, side, side, 1))
    return X

def build_cnn(input_shape, n_classes):
    model = Sequential([
        Input(shape=input_shape),
        Conv2D(conv_filters, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2),
        Conv2D(conv_filters * 2, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2),
        Flatten(),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_save_model(X_train, y_train, filename, X_val=None, y_val=None):
    input_shape = X_train.shape[1:]
    y_train_cat = to_categorical(y_train, num_classes=n_classes)
    y_val_cat = to_categorical(y_val, num_classes=n_classes) if y_val is not None else None

    model = build_cnn(input_shape, n_classes)
    es = EarlyStopping(patience=5, restore_best_weights=True)

    model.fit(X_train, y_train_cat, epochs=20, batch_size=32,
              validation_data=(X_val, y_val_cat) if y_val is not None else None,
              callbacks=[es], verbose=0)

    model.save(os.path.join(model_dir, filename + ".h5"))
    return model

def evaluate_model(model, X_test, y_test, description):
    y_pred = np.argmax(model.predict(X_test), axis=1)
    print(f"\n=== {description} ===")
    print(classification_report(y_test, y_pred))

# ========== SCENARIOS ==========
if B:
    X_train_cnn = reshape_for_cnn(X_train)
    X_test_cnn = reshape_for_cnn(X_test)
    model_B = train_and_save_model(X_train_cnn, y_train, "cnn_nopca_noaug_nonorm", X_test_cnn, y_test)

if D:
    X_train_aug_cnn = reshape_for_cnn(X_train_aug)
    X_test_aug_cnn = reshape_for_cnn(X_test_aug)
    model_D = train_and_save_model(X_train_aug_cnn, y_train_aug, "cnn_nopca_aug_nonorm", X_test_aug_cnn, y_test_aug)

if E:
    X_train_cnn = reshape_for_cnn(X_train)
    X_test_cnn = reshape_for_cnn(X_test)
    model_E = train_and_save_model(X_train_cnn, y_train, "cnn_nopca_noaug_nonorm", X_test_cnn, y_test)

if H:
    X_train_aug_norm = np.array([x / np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_train_aug])
    X_test_aug_norm = np.array([x / np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_test_aug])
    X_train_aug_norm_cnn = reshape_for_cnn(X_train_aug_norm)
    X_test_aug_norm_cnn = reshape_for_cnn(X_test_aug_norm)
    model_H = train_and_save_model(X_train_aug_norm_cnn, y_train_aug, "cnn_nopca_aug_norm", X_test_aug_norm_cnn, y_test_aug)

# ========== EVALUATION ==========
if TEST_SET:
    if B: evaluate_model(model_B, X_test_cnn, y_test, "Scenario B: CNN NOPCA NOAUG NONORM")
    if D: evaluate_model(model_D, X_test_aug_cnn, y_test_aug, "Scenario D: CNN NOPCA AUG NONORM")
    if E: evaluate_model(model_E, X_test_cnn, y_test, "Scenario E: CNN NOPCA NOAUG NONORM")
    if H: evaluate_model(model_H, X_test_aug_norm_cnn, y_test_aug, "Scenario H: CNN NOPCA AUG NORM")


2025-04-14 14:55:16.135761: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-14 14:55:16.142262: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-14 14:55:16.183204: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-14 14:55:16.221662: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744635316.268964   31564 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744635316.28

ValueError: The number of filters must be evenly divisible by the number of groups. Received: groups=1, filters=21.56.

HYPERPARAMETER TUNING

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from bayes_opt import BayesianOptimization

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# --- CONFIG FLAGS ---
NORMALIZATION = False
TRANSFORMATION = True

# --- STEP 1: Load/Select Data ---
if TRANSFORMATION:
    try:
        X = X_basic_aug
        y = y_basic_aug
    except NameError:
        raise ValueError("X_basic_aug and y_basic_aug must be defined before running this script.")
else:
    try:
        X = X_basic
        y = y_basic
    except NameError:
        raise ValueError("X_basic and y_basic must be defined before running this script.")

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
n_classes = len(np.unique(y))

if NORMALIZATION:
    X = np.array([x / np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X])

# Reshape to 2D square for CNN
def reshape_for_cnn(X):
    if len(X.shape) == 2:
        side = int(np.sqrt(X.shape[1]))
        return X.reshape((-1, side, side, 1))
    return X

X = reshape_for_cnn(X)

# --- STEP 2: Define Objective Function ---
def build_cnn_model(conv_filters=32, dense_units=64, dropout_rate=0.3, learning_rate=0.001):
    model = Sequential([
        Input(shape=X.shape[1:]),
        Conv2D(conv_filters, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2),
        Flatten(),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def cnn_cv(conv_filters, dense_units, dropout_rate):
    conv_filters = int(conv_filters)
    dense_units = int(dense_units)
    dropout_rate = float(dropout_rate)

    acc_scores = []
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

    for train_idx, val_idx in skf.split(X, y):
        X_train_cv, X_val_cv = X[train_idx], X[val_idx]
        y_train_cv, y_val_cv = y[train_idx], y[val_idx]

        y_train_cv_cat = to_categorical(y_train_cv, num_classes=n_classes)
        y_val_cv_cat = to_categorical(y_val_cv, num_classes=n_classes)

        model = build_cnn_model(conv_filters, dense_units, dropout_rate, learning_rate=0.001)
        model.fit(X_train_cv, y_train_cv_cat, epochs=10, batch_size=32, verbose=0, validation_data=(X_val_cv, y_val_cv_cat))

        val_pred = np.argmax(model.predict(X_val_cv, verbose=0), axis=1)
        acc = np.mean(val_pred == y_val_cv)
        acc_scores.append(acc)

    return np.mean(acc_scores)

# --- STEP 3: Set Up Bayesian Optimizer ---
pbounds = {
    'conv_filters': (16, 128),
    'dense_units': (32, 256),
    'dropout_rate': (0.1, 0.5)
}

optimizer = BayesianOptimization(
    f=cnn_cv,
    pbounds=pbounds,
    random_state=42
)

# --- STEP 4: Run Optimization ---
init_points = 3
n_iter = 10

print("Starting Bayesian Optimization for CNN...")
best_score_so_far = -1.0
optimizer.maximize(init_points=init_points, n_iter=n_iter)

for i, res in enumerate(optimizer.res):
    score = res['target']
    print(f"Iteration {i+1}, CV Accuracy: {score:.4f}, Parameters: {res['params']}")

# --- STEP 5: Best Hyperparameters ---
best_params = optimizer.max['params']
conv_filters = int(best_params['conv_filters'])
dense_units = int(best_params['dense_units'])
dropout_rate = float(best_params['dropout_rate'])

print("\n=== BEST HYPERPARAMETERS FOUND ===")
print(f"conv_filters = {conv_filters}")
print(f"dense_units = {dense_units}")
print(f"dropout_rate = {dropout_rate:.2f}")
print(f"CV Accuracy = {optimizer.max['target']:.4f}")

# --- STEP 6: Final Evaluation ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=999)
y_train_cat = to_categorical(y_train, num_classes=n_classes)
y_test_cat = to_categorical(y_test, num_classes=n_classes)

final_model = build_cnn_model(conv_filters, dense_units, dropout_rate, learning_rate=0.001)
es = EarlyStopping(patience=5, restore_best_weights=True)
final_model.fit(X_train, y_train_cat, validation_split=0.2, epochs=30, batch_size=32, callbacks=[es], verbose=1)

# Evaluate
pred = np.argmax(final_model.predict(X_test), axis=1)
print("\n=== FINAL EVALUATION ON HOLDOUT TEST SET ===")
print(classification_report(y_test, pred))

Starting Bayesian Optimization for CNN...
|   iter    |  target   | conv_f... | dense_... | dropou... |
-------------------------------------------------------------
| [39m1        [39m | [39m0.8818   [39m | [39m57.95    [39m | [39m245.0    [39m | [39m0.3928   [39m |
| [39m2        [39m | [39m0.8607   [39m | [39m83.05    [39m | [39m66.95    [39m | [39m0.1624   [39m |
| [35m3        [39m | [35m0.9042   [39m | [35m22.51    [39m | [35m226.0    [39m | [35m0.3404   [39m |
| [39m4        [39m | [39m0.8943   [39m | [39m23.17    [39m | [39m225.2    [39m | [39m0.4846   [39m |
| [35m5        [39m | [35m0.9129   [39m | [35m21.56    [39m | [35m228.2    [39m | [35m0.3401   [39m |
| [39m6        [39m | [39m0.8918   [39m | [39m17.51    [39m | [39m226.9    [39m | [39m0.4111   [39m |
| [39m7        [39m | [39m0.8781   [39m | [39m25.17    [39m | [39m231.6    [39m | [39m0.198    [39m |
