# Training

In [None]:
import gc
import os
import pickle
import time
from itertools import product

import IPython.display as ipd
import librosa
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.api.callbacks import EarlyStopping
from keras.api.layers import LSTM, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D
from keras.api.models import Sequential, load_model
from keras.api.regularizers import l2
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler

# Directories
DATA_DIR = "../../data"
AUDIOS_DIR = DATA_DIR + "/audios"
POS_AUDIOS_DIR = AUDIOS_DIR + "/positive"
NEG_AUDIOS_DIR = AUDIOS_DIR + "/negative"

MODELS_DIR = "../../models"
PLOTS_DIR = "../../plots"


## Mayor constants

In [20]:
MODEL = "ff"  # 'cnn', 'lstm', 'ff'
FEATURE = "lfcc"  # 'mfcc', 'chroma', 'lfcc'

### Load data

In [21]:
with open(f"{DATA_DIR}/{FEATURE}.pkl", "rb") as f:
    data = pickle.load(f)

# Positive data
positive_data = np.array(data["positive"])
positive_labels = np.ones(positive_data.shape[0])
pnames = data["pnames"]

# Negative data
negative_data = np.array(data["negative"])
negative_labels = np.zeros(negative_data.shape[0])
nnames = data["nnames"]

# Concatenate the data
X = np.concatenate((positive_data, negative_data), axis=0)
y = np.concatenate((positive_labels, negative_labels), axis=0)
names = np.concatenate((pnames, nnames), axis=0)

# Establecer la semilla aleatoria
seed = 1  # Elige cualquier número entero como semilla
rng = np.random.default_rng(seed)

# Shuffle the data using the random number generator
idx = rng.permutation(len(X))

X = X[idx]
y = y[idx]
names = names[idx]

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")


X shape: (10000, 12, 313)
y shape: (10000,)


### Preprocess pipeline

In [22]:
# Funciones auxiliares
def normalize(X):
    return (X - np.mean(X)) / np.std(X)


def sub_extra_column(X):
    return X[:, :, :-1]


def transpose(X):
    return X.transpose(0, 2, 1)


# Definición del pipeline
preprocessing_pipeline = Pipeline(
    [
        ("normalize", FunctionTransformer(normalize, validate=False)),  # Normalización
        ("scale", MinMaxScaler(feature_range=(0, 1))),  # Escalado
        (
            "sub_column",
            FunctionTransformer(sub_extra_column, validate=False),
        ),  # Quitar columna
        ("transpose", FunctionTransformer(transpose, validate=False)),  # Transponer
    ]
)


# Aplicar el pipeline
def process_and_encode(X):
    # Reshape para MinMaxScaler (flatten y restaurar)
    X = preprocessing_pipeline.named_steps["normalize"].transform(X)
    X = (
        preprocessing_pipeline.named_steps["scale"]
        .fit_transform(X.reshape(X.shape[0], -1))
        .reshape(X.shape)
    )
    X = preprocessing_pipeline.named_steps["sub_column"].transform(X)
    X = preprocessing_pipeline.named_steps["transpose"].transform(X)

    # Predicción con el encoder
    encoder = load_model(f"{MODELS_DIR}/encoders/encoder_{FEATURE}.keras")
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    XC = encoder.predict(X)
    return XC


X = process_and_encode(X)
print(X.shape)
print(X)


  saveable.load_own_variables(weights_store.get(inner_path))


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
(10000, 78, 12, 1)
[[[[0.83060974]
   [0.28658724]
   [0.59740955]
   ...
   [0.40920192]
   [0.56993747]
   [0.4996098 ]]

  [[0.84445775]
   [0.17025794]
   [0.6580201 ]
   ...
   [0.39519733]
   [0.612451  ]
   [0.4178421 ]]

  [[0.84018874]
   [0.19158062]
   [0.60329574]
   ...
   [0.40150952]
   [0.63888186]
   [0.3920681 ]]

  ...

  [[0.800269  ]
   [0.16760196]
   [0.76788884]
   ...
   [0.37595394]
   [0.6050345 ]
   [0.4806747 ]]

  [[0.78699505]
   [0.21286005]
   [0.72147405]
   ...
   [0.3155495 ]
   [0.63188136]
   [0.42771748]]

  [[0.81202483]
   [0.3372992 ]
   [0.6768875 ]
   ...
   [0.4934861 ]
   [0.65454   ]
   [0.51404107]]]


 [[[0.672771  ]
   [0.42719722]
   [0.4659889 ]
   ...
   [0.4588887 ]
   [0.5727831 ]
   [0.4673935 ]]

  [[0.6328412 ]
   [0.46765032]
   [0.4658005 ]
   ...
   [0.46982434]
   [0.5966268 ]
   [0.40538374]]

  [[0.6242092 ]
   [0.46751374]
   [0.4712818 ]
   ...
  

### Training

In [23]:
percent = 1
trn_size = 0.8
tst_size = 1 - trn_size

print(X.shape)
print(y.shape)

X_train, X_tst, y_train, y_tst, idxtrn, idxtst = train_test_split(
    X,
    y,
    np.arange(len(X)),
    train_size=trn_size * percent,
    test_size=tst_size * percent,
    stratify=y,
    random_state=seed,
)

# Mantain the names of the audios
names = np.array(names)
namestrn = names[idxtrn]
namestst = names[idxtst]

print(X_train.shape)
print(y_train.shape)
print(X_tst.shape)
print(y_tst.shape)


(10000, 78, 12, 1)
(10000,)
(8000, 78, 12, 1)
(8000,)
(2000, 78, 12, 1)
(2000,)


In [24]:
def FF(optimizer="adam", units=128, activation="relu", dropout_rate=0.2):
    model = Sequential(
        [
            Input(shape=(X.shape[1], X.shape[2], 1)),
            Flatten(),
            Dense(units, activation=activation),
            Dropout(dropout_rate),
            Dense(units // 2, activation=activation),
            Dropout(dropout_rate),
            Dense(units // 4, activation=activation),
            Dropout(dropout_rate),
            Dense(1, activation="sigmoid"),
        ],
        name="FF",
    )

    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    return model


FF().summary()

In [25]:
def CNN(optimizer="adam", units=24, activation="relu", dropout_rate=0.2):
    model = Sequential(
        [
            Input(shape=(X.shape[1], X.shape[2], X.shape[3])),
            Conv2D(4, (7, 7), activation=activation, padding="same"),
            MaxPooling2D((2, 1)),
            Dropout(dropout_rate),
            Conv2D(8, (5, 5), activation=activation, padding="same"),
            MaxPooling2D((3, 1)),
            Dropout(dropout_rate),
            Conv2D(16, (5, 5), activation=activation, padding="same"),
            MaxPooling2D((2, 2)),
            Dropout(dropout_rate),
            Conv2D(32, (3, 3), activation=activation, padding="same"),
            MaxPooling2D((2, 2)),
            Dropout(dropout_rate),
            Flatten(),
            Dense(units, activation=activation),
            Dropout(dropout_rate),
            Dense(units // 2, activation=activation),
            Dropout(dropout_rate),
            Dense(1, activation="sigmoid"),
        ],
        name="CNN",
    )

    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    return model


CNN().summary()

In [26]:
def LSTM_MODEL(optimizer="adam", units=16, activation="relu", dropout_rate=0.2):
    model = Sequential(name="LSTM")

    model.add(Input(shape=(X.shape[1], X.shape[2])))

    # Segunda capa LSTM
    model.add(
        LSTM(
            units,
            activation="tanh",
            return_sequences=False,
            kernel_regularizer=l2(0.03),
        )
    )
    model.add(Dropout(dropout_rate))

    model.add(Dense(units // 2, activation=activation))
    model.add(Dropout(dropout_rate))

    # Capa de salida para clasificación binaria
    model.add(Dense(1, activation="sigmoid"))

    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    return model


LSTM_MODEL().summary()

In [27]:
TRAINING = {
    "ff": FF,
    "cnn": CNN,
    "lstm": LSTM_MODEL,
}


def save_fig(MODEL, FEATURE, timestamp):
    ruta_carpeta = f"{PLOTS_DIR}/{MODEL}_{FEATURE}"
    ruta_archivo = f"{ruta_carpeta}/{timestamp}.png"

    # Crea la carpeta si no existe
    os.makedirs(ruta_carpeta, exist_ok=True)  # exist_ok evita error si ya existe
    plt.savefig(ruta_archivo)


# Almacena las métricas para cada combinación de parámetros
results = []

# Lista de todas las combinaciones de parámetros
param_combinations = list(
    product(
        [64, 128, 256],  # units
        [0.1, 0.2, 0.3],  # dropout_rate
        ["relu"],  # activation
    )
)


In [None]:
percent = 0.5
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=seed)
for params in param_combinations:
    units, dropout_rate, activation = params
    print(f"\nProbando combinación de parámetros: {params}")

    accuracies = []
    losses = []
    histories = []
    times = []
    fold = 1
    for train_index, val_index in kf.split(X_train):
        print(f"Fold {fold}: ")

        xtrn, xval = X_train[train_index], X_train[val_index]
        ytrn, yval = y_train[train_index], y_train[val_index] 

        xtrn = xtrn[: int(len(xtrn) * percent)]
        ytrn = ytrn[: int(len(ytrn) * percent)]

        # Crear el EarlyStopping callback
        early_stopping = EarlyStopping(
            monitor="val_loss", patience=10, min_delta=0.01, restore_best_weights=True
        )

        # Crear el modelo con los parámetros actuales
        model = TRAINING[MODEL](
            optimizer="adam",
            units=units,
            activation=activation,
            dropout_rate=dropout_rate,
        )

        # Entrenar el modelo
        start_time = time.time()
        history = model.fit(
            xtrn,
            ytrn,
            validation_data=(xval, yval),
            epochs=200,
            batch_size=64,
            verbose=1,
            callbacks=[early_stopping],
        )
        end_time = time.time()

        # Obtener la mejor epoca
        idx = np.argmin(history.history["val_loss"])
        losses.append(history.history["val_loss"][idx])
        accuracies.append(history.history["val_accuracy"][idx])
        times.append(end_time - start_time)
        print(f"Fold {fold} - Pérdida de validación: {losses[-1]:.5f}")
        print(f"Fold {fold} - Accuracy de validación: {accuracies[-1]:.5f}")

        histories.append(history)

        del model
        fold += 1

        # Liberar recursos de memoria
        tf.keras.backend.clear_session()
        gc.collect()

    # Almacenar métricas de la combinación actual
    mean_time = np.mean(times)
    time_str = time.strftime("%H:%M:%S", time.gmtime(mean_time))
    results.append(
        {
            "params": params,
            "percent": percent,
            "time": time_str,
            "mean_accuracy": np.mean(accuracies),
            "mean_loss": np.mean(losses),
        }
    )

    # Imprimir los resultados finales
    for result in results:
        print(
            f"Parámetros: {result['params']}, \
                Porcentaje: {result['percent']}, \
                Tiempo: {result['time']}, \
                Mean Accuracy: {result['mean_accuracy']:.4f}, \
                Mean Loss: {result['mean_loss']:.4f}"
        )

    # Store model with timestamp with year, month, day, hour, minute and second
    timestamp = time.strftime("%Y%m%d%H%M%S")

    # Save params in a .txt file for every model
    with open(f"../../params/{MODEL}_{FEATURE}.txt", "a") as f:
        f.write(f"ID: {timestamp}\n")
        f.write(f"Porcentaje: {result['percent']}\n")
        f.write(f"Parámetros: {result['params']}\n")
        f.write(f"Tiempo medio: {result['time']}\n")
        f.write(f"Accuracy medio: {result['mean_accuracy']:.4f}\n")
        f.write(f"Loss medio: {result['mean_loss']:.4f}\n\n")

    # Plot acc and loss for each fold, training and validation
    # Using a k, 2 subplot grid
    plt.figure(figsize=(15, 6))

    for i in range(k):
        plt.subplot(2, k, i + 1)
        plt.plot(histories[i].history["loss"], label=f"Fold {i + 1} Training")
        plt.plot(histories[i].history["val_loss"], label=f"Fold {i + 1} Validation")
        plt.title("Loss")
        plt.ylim(0, 0.8)
        plt.grid()
        plt.legend()

        plt.subplot(2, k, i + 1 + k)
        plt.plot(histories[i].history["accuracy"], label=f"Fold {i + 1} Training")
        plt.plot(histories[i].history["val_accuracy"], label=f"Fold {i + 1} Validation")
        plt.title("Accuracy")
        plt.ylim(0.5, 1)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    save_fig(MODEL, FEATURE, timestamp)
    plt.close()
    plt.show()



Probando combinación de parámetros: (64, 0.1, 'relu')
Fold 1: 
Epoch 1/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.4957 - loss: 0.7289 - val_accuracy: 0.4837 - val_loss: 0.6938
Epoch 2/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5107 - loss: 0.6942 - val_accuracy: 0.4869 - val_loss: 0.6928
Epoch 3/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5101 - loss: 0.6933 - val_accuracy: 0.5131 - val_loss: 0.6923
Epoch 4/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4911 - loss: 0.6948 - val_accuracy: 0.4944 - val_loss: 0.6918
Epoch 5/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5327 - loss: 0.6904 - val_accuracy: 0.5375 - val_loss: 0.6890
Epoch 6/200
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5218 - loss: 0.6918 - val_accur

In [28]:
# Test with best params
units = 128
dropout_rate = 0.1
activation = "relu"

# Crear el EarlyStopping callback
early_stopping = EarlyStopping(
    monitor="loss", patience=25, min_delta=0.005, restore_best_weights=True
)

# Crear el modelo con los parámetros actuales
model = TRAINING[MODEL](
    optimizer="adam", units=units, activation=activation, dropout_rate=dropout_rate
)

# Entrenar el modelo
history = model.fit(X_train, y_train, epochs=20, batch_size=64)

# Evaluar el modelo
loss, acc = model.evaluate(X_tst, y_tst)
print(f"Test Accuracy: {acc:.4f}, Test Loss: {loss:.4f}")


Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.5072 - loss: 0.7028
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5021 - loss: 0.6959
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5361 - loss: 0.6893
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5660 - loss: 0.6825
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5697 - loss: 0.6810
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5819 - loss: 0.6745
Epoch 7/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5889 - loss: 0.6741
Epoch 8/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5784 - loss: 0.6789
Epoch 9/20
[1m125/125[0m [32m━━━━━━━━

In [29]:
# Save the model
model.save(f"{MODELS_DIR}/{MODEL}_{FEATURE}_{loss:.4f}.keras")
