In [58]:
import numpy as np
import os
import optuna
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from sklearn.cluster import KMeans
from keras.optimizers import Adam
from PIL import Image

In [59]:
# Directorio que contiene las imágenes
image_directory = "images/resized_dataset/"

# Inicializar una lista vacía para almacenar las imágenes
images = []
images_test = []

# Tamaño del lote
batch_size = 100

# Recorrer las subcarpetas dentro del directorio
for root, dirs, files in os.walk(image_directory):
    for directory in dirs:
        subdir = os.path.join(root, directory)
        # Obtener la lista de nombres de archivo de las imágenes en la subcarpeta
        image_files = os.listdir(subdir)
        # Procesar las imágenes en lotes
        total = len(image_files)
        porcentaje = 80
        entrenamiento = int((total/100)*porcentaje)
        prueba = int(total - entrenamiento)
        for i in range(0, len(image_files), batch_size):
            # Cargar y convertir las imágenes en matrices numpy para entrenamiento
            if (i <= entrenamiento):
                batch_images = []
                for file in image_files[i:i+batch_size]:
                    image_path = os.path.join(subdir, file)
                    image = Image.open(image_path)
                    image_array = np.array(image)
                    batch_images.append(image_array)
                
                # Concatenar las matrices del lote en un solo array
                batch_X = np.concatenate(batch_images)
                images.append(batch_X)
            else:
                # Cargar y convertir las imágenes en matrices numpy para pruebas
                batch_images_test = []
                for file in image_files[i:i+batch_size]:
                    image_path = os.path.join(subdir, file)
                    image = Image.open(image_path)
                    image_array = np.array(image)
                    batch_images_test.append(image_array)
                
                # Concatenar las matrices del lote en un solo array
                batch_X_test = np.concatenate(batch_images_test)
                images_test.append(batch_X_test)

In [60]:
# Concatenar los lotes en un solo array
X_train = np.concatenate(images)
X_test = np.concatenate(images_test)

# Aplanar las matrices de imágenes a un formato bidimensional
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Convertir las imágenes a tensores
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)

# Normalizar los datos de entrenamiento y prueba utilizando normalización min-max
#X_train = tf.keras.utils.normalize(X_train, axis=1)
#X_test = tf.keras.utils.normalize(X_test, axis=1)

print (X_train[0])

tf.Tensor(
[104. 151.  49. 104. 151.  49. 106. 151.  50. 109. 154.  53. 111. 156.
  55. 107. 152.  51. 101. 143.  41.  94. 136.  34. 104. 147.  42. 112.
 155.  50. 114. 155.  51. 103. 144.  40.  96. 137.  31. 100. 141.  35.
 110. 151.  45. 115. 158.  50.  96. 143.  27.  97. 146.  28.  98. 147.
  32.  94. 142.  30.  88. 136.  26.  87. 134.  28.  91. 136.  35.  95.
 139.  42.  93. 134.  42. 103. 143.  55. 111. 151.  65. 109. 148.  65.
 104. 141.  61. 102. 139.  59. 106. 143.  65. 109. 146.  66. 100. 137.
  57.  97. 135.  52.  95. 133.  48.  98. 136.  51. 104. 142.  57. 110.
 148.  63. 113. 151.  64. 114. 152.  65. 123. 161.  74. 114. 152.  65.
 103. 142.  53.  97. 136.  47.  98. 137.  48. 101. 140.  51. 101. 140.
  51. 100. 139.  50. 103. 138.  48. 106. 141.  51. 106. 141.  51. 104.
 139.  49. 102. 136.  49. 100. 134.  47.  93. 127.  41.  84. 118.  32.
  83. 117.  33.  77. 111.  27.  71. 104.  23.  68. 101.  20.  71. 104.
  23.  76. 109.  28.  78. 111.  32.  78. 111.  32.], shape=(192,),

In [61]:
# Entrenar el modelo K-Means para generar etiquetas de clúster
kmeans = KMeans(n_clusters=38)
kmeans.fit(X_train)

# Obtener las etiquetas de clúster asignadas a los puntos de datos
labels_train = kmeans.labels_
print(labels_train)
labels_test = kmeans.predict(X_test)
print(labels_test)



[20 31 20 ... 11 16 11]
[19 19 19 ... 36 36 36]


In [63]:
# Definir la función de pérdida para el clustering K-Means
def kmeans_loss(y_true, y_pred):
    if y_true.dtype != tf.float32:
        y_true = tf.cast(y_true, tf.float32)
    if y_pred.dtype != tf.float32:
        y_pred = tf.cast(y_pred, tf.float32)
    return tf.norm(y_pred - y_true, axis=1)

def categorical_crossentropy(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)  # Asegurar valores en el rango (epsilon, 1-epsilon) para evitar log(0)
    return -tf.reduce_sum(y_true * tf.math.log(y_pred), axis=-1)

# Definir el modelo K-Means con una capa oculta
input_shape = X_train.shape[1:]
print(input_shape)
model_input = layers.Input(shape=input_shape)
hidden_layer = layers.Dense(units=64, activation='sigmoid')(model_input) 
kmeans_output = layers.Dense(units=38, activation='softmax')(hidden_layer)

# Compilar el modelo
optimizer = Adam(learning_rate = 0.01)
model = Model(inputs=model_input, outputs=kmeans_output)
model.compile(optimizer=optimizer, loss=kmeans_loss, metrics=['accuracy'])

# Entrenar el modelo utilizando las etiquetas de clúster generadas
model.fit(X_train, labels_train, epochs=5, batch_size=2048)

# Obtener las etiquetas de clúster asignadas a los puntos de datos
labels_pred_train = model.predict(X_train)
labels_pred_test = model.predict(X_test)

# Imprimir las etiquetas de clúster
print(labels_pred_train)
print(labels_pred_test)


(192,)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[[0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 ...
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.02612459 0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.02612459 0.02635355 ... 0.02630159 0.02631099 0.02629538]]
[[0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 ...
 [0.02630265 0.0261246  0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.02612459 0.02635355 ... 0.02630159 0.02631099 0.02629538]
 [0.02630265 0.02612459 0.02635355 ... 0.02630159 0.02631099 0.02629538]]


In [64]:
def objective(trial):
    # Definir los rangos de búsqueda para los hiperparámetros
    learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.9)
    epochs = trial.suggest_int('epochs', 5, 10)
    
    # Definir el modelo K-Means con una capa oculta
    input_shape = X_train.shape[1:]
    model_input = layers.Input(shape=input_shape)
    hidden_layer = layers.Dense(units=64, activation='sigmoid')(model_input) 
    kmeans_output = layers.Dense(units=38, activation='softmax')(hidden_layer)
    
    # Compilar el modelo con los hiperparámetros sugeridos
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model = Model(inputs=model_input, outputs=kmeans_output)
    model.compile(optimizer=optimizer, loss=kmeans_loss, metrics=['accuracy'])
    
    # Entrenar el modelo utilizando las etiquetas de clúster generadas
    model.fit(X_train, labels_train, epochs=epochs, batch_size=2048, verbose=0)
    
    # Evaluar el modelo en los datos de prueba
    accuracy = model.evaluate(X_test, labels_test)[1]
    
    # Devolver el valor de métrica a optimizar (precisión en este caso)
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10, show_progress_bar=False)

# Imprimir los mejores valores encontrados
print('Mejor valor de accuracy:', study.best_value)
print('Mejores hiperparámetros:', study.best_params)

[I 2023-06-14 08:12:34,234] A new study created in memory with name: no-name-f4274951-3002-4d7d-b57a-8ace25ed0545
  learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.9)




[I 2023-06-14 08:13:25,144] Trial 0 finished with value: 0.019291074946522713 and parameters: {'learning_rate': 0.1711939257602313, 'epochs': 8}. Best is trial 0 with value: 0.019291074946522713.




[I 2023-06-14 08:14:14,137] Trial 1 finished with value: 0.018440615385770798 and parameters: {'learning_rate': 0.012619017638839531, 'epochs': 9}. Best is trial 0 with value: 0.019291074946522713.




[I 2023-06-14 08:14:50,975] Trial 2 finished with value: 0.04489409551024437 and parameters: {'learning_rate': 0.21385921042323788, 'epochs': 6}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:15:23,440] Trial 3 finished with value: 0.02467031590640545 and parameters: {'learning_rate': 0.001035603556659793, 'epochs': 5}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:16:08,056] Trial 4 finished with value: 0.012119918130338192 and parameters: {'learning_rate': 0.004813226074917951, 'epochs': 8}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:16:40,516] Trial 5 finished with value: 0.014484054408967495 and parameters: {'learning_rate': 0.1750915567814973, 'epochs': 5}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:17:32,833] Trial 6 finished with value: 0.02074175514280796 and parameters: {'learning_rate': 0.48927072802758714, 'epochs': 10}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:18:05,386] Trial 7 finished with value: 0.005877967923879623 and parameters: {'learning_rate': 0.02295807318934476, 'epochs': 5}. Best is trial 2 with value: 0.04489409551024437.




[I 2023-06-14 08:18:53,371] Trial 8 finished with value: 0.060683585703372955 and parameters: {'learning_rate': 0.001884264720655518, 'epochs': 9}. Best is trial 8 with value: 0.060683585703372955.




[I 2023-06-14 08:19:29,444] Trial 9 finished with value: 0.018101131543517113 and parameters: {'learning_rate': 0.06594596931567717, 'epochs': 6}. Best is trial 8 with value: 0.060683585703372955.


Mejor valor de accuracy: 0.060683585703372955
Mejores hiperparámetros: {'learning_rate': 0.001884264720655518, 'epochs': 9}


In [33]:
# Imprimir los mejores valores encontrados
print('Mejor valor de accuracy:', study.best_value)
print('Mejores hiperparámetros:', study.best_params)

Mejor valor de accuracy: 0.11121363937854767
Mejores hiperparámetros: {'learning_rate': 0.005726461916546583, 'epochs': 9}
