# AJUSTE DE HIPERPARAMETROS PARA LA RED NEURONAL PREDICTORA

En este apartado se busca obtener la mejor configuración de hiperparametros dentro de un campo de busqueda.

¡¡¡ MUY COSTOSO COMPUTACIONALMENTE !!!

Librerias y semilla estatica

In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras import regularizers, initializers
import random
from itertools import product
from sklearn.model_selection import train_test_split


# Establecer semilla estática
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

Tasa de aprendizaje personalizada

In [None]:
# Custom learning rate schedule
def custom_lr_schedule(epoch):
    initial_lr = 1e-5
    peak_lr = 1e-2
    warmup_epochs = 10
    final_epoch = 200
    final_lr = 1e-3
    if epoch < warmup_epochs:
        lr = initial_lr + (peak_lr - initial_lr) * (epoch / warmup_epochs)
    else:
        decay_rate = np.log(final_lr / peak_lr) / (final_epoch - warmup_epochs)
        lr = peak_lr * np.exp(decay_rate * (epoch - warmup_epochs))
    return lr

Obtención de datos

In [None]:
# Load data
file_path = 'supervisado_final.csv'
df = pd.read_csv(file_path, sep=';', decimal='.')

# Data processing
X = df.drop(['ID Usuario', 'Conclusion'], axis=1).values
y = df['Conclusion'].values

Codificación de etiquetas, escalado de datos y calculo de pesos de las calses

In [None]:
# Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_encoded), y=y_encoded)
class_weights = dict(enumerate(class_weights))

Dividir el conjuntoo de datos en train (70%), test (20%) y validation (10%)

In [None]:
#Split data into train, validation, and test sets
X_temp, X_test, y_temp, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=SEED)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.125, random_state=SEED)  # 10% of the original data

Campo de busqueda de configuraciones

In [None]:
# Hyperparameter values to try
layer_sizes = [[128, 64, 64, 32], [128, 64, 32, 16], [128, 32, 32, 32], [64, 32, 32, 16], [64, 32, 16, 8], [16, 8, 8, 4], [128, 64, 32], [128, 32, 32], [64, 32, 16]]
dropout_rates = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
l1_values = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
l2_values = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]

Entrenamiento, obtención y visualización de la matriz de confusion para la mejor conbinación de hiperparametros explorada

In [None]:
# Store results
results = []

# Iterate over all combinations of hyperparameters
for layers, dropout_rate, l1, l2 in product(layer_sizes, dropout_rates, l1_values, l2_values):
    # Build model
    model = Sequential()
    model.add(Dense(layers[0], activation='relu', input_shape=(X_train.shape[1],), 
                    kernel_initializer=initializers.glorot_uniform(), kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    for layer_size in layers[1:]:
        model.add(Dense(layer_size, activation='relu', 
                        kernel_initializer=initializers.glorot_uniform(), kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(len(encoder.classes_), activation='softmax', kernel_initializer=initializers.glorot_uniform()))

    # Compile model
    optimizer = Adam(learning_rate=1e-3)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
    lr_scheduler = LearningRateScheduler(custom_lr_schedule)

    # Train model
    history = model.fit(X_train, y_train, epochs=200, batch_size=16, validation_data=(X_val, y_val), 
                        class_weight=class_weights, callbacks=[lr_scheduler, early_stopping], verbose=0)

    # Evaluate model
    val_loss = min(history.history['val_loss'])
    val_accuracy = max(history.history['val_accuracy'])

    # Store the results
    results.append({
        'layers': layers,
        'dropout_rate': dropout_rate,
        'l1': l1,
        'l2': l2,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy
    })

    print(f"Layers: {layers}, Dropout: {dropout_rate}, L1: {l1}, L2: {l2}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")

# Find the best configuration
best_result = max(results, key=lambda x: x['val_accuracy'])
print(f"\nBest configuration: {best_result}")

# Evaluate the best model on the test set
best_model = Sequential()
best_model.add(Dense(best_result['layers'][0], activation='relu', input_shape=(X_train.shape[1],),
                     kernel_initializer=initializers.glorot_uniform(), kernel_regularizer=regularizers.l1_l2(l1=best_result['l1'], l2=best_result['l2'])))
best_model.add(BatchNormalization())
best_model.add(Dropout(best_result['dropout_rate']))

for layer_size in best_result['layers'][1:]:
    best_model.add(Dense(layer_size, activation='relu',
                         kernel_initializer=initializers.glorot_uniform(), kernel_regularizer=regularizers.l1_l2(l1=best_result['l1'], l2=best_result['l2'])))
    best_model.add(BatchNormalization())
    best_model.add(Dropout(best_result['dropout_rate']))

best_model.add(Dense(len(encoder.classes_), activation='softmax', kernel_initializer=initializers.glorot_uniform()))

# Compile the best model
optimizer = Adam(learning_rate=1e-3)
best_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the best model
best_model.fit(X_train, y_train, epochs=200, batch_size=16, validation_data=(X_val, y_val),
                class_weight=class_weights, callbacks=[lr_scheduler, early_stopping], verbose=0)

# Evaluate the best model on the test set
y_pred = best_model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)

# Convert the encoder.classes_ values to string once
target_names = [str(class_name) for class_name in encoder.classes_]

# Print classification report without duplicates
print(classification_report(y_test, y_pred_classes, target_names=target_names))

# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicción')
plt.ylabel('Verdadero')
plt.title('Matriz de Confusión')
plt.show()