# Sin autoencoders

In [1]:
"""
Used libraries for the entire project
"""
import os
import numpy as np
import pandas as pd
from PIL import Image
import random 
import shutil
import matplotlib.pyplot as plt
import time
import optuna

from keras.models import Model
from keras.layers import Input, Dense, Conv2D, Conv2DTranspose, Flatten, MaxPooling2D, UpSampling2D, Reshape
from keras.utils import to_categorical
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, roc_curve

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
-- Load data from analysis dataset --
"""

parent_folder_path = "images/resized_dataset/"

# get the categories
categories = {}
for index, folder_name in enumerate(sorted(os.listdir(parent_folder_path))):
    folder_path = os.path.join(parent_folder_path, folder_name)
    if os.path.isdir(folder_path):
        categories[folder_name] = index

# get the amount of images per category
category_amount = []
for category in categories.keys():
    folder_path = os.path.join(parent_folder_path, category)
    image_files = os.listdir(folder_path)
    category_amount.append(len(image_files))

# get images 
arrays = []
for cat_folder, value in categories.items():

    folder_path = os.path.join(parent_folder_path, cat_folder)
    image_files = os.listdir(folder_path)

    for i, file_name in enumerate(image_files):

        file_path = os.path.join(folder_path, file_name)
        image = Image.open(file_path)
        image_array = np.array(image)

        # verify all images are of the desired size
        if image.size != (64, 64):
            print(file_path, " IS NOT 64x64, it is: ", image.size)
            continue

        arrays.append(image_array)

# generate the labels array
arrays_labels = []
for i in range(len(categories)):
    arrays_labels += [i] * category_amount[i]
arrays_labels = np.array(arrays_labels)

In [3]:
"""
-- Load autoencoder, train and testing data --

Using the distribution:
80% -> autoencoder
10% -> training data
10% -> testing data
"""

X_auto, X, y_auto, y = train_test_split(arrays, arrays_labels, test_size=0.8, random_state=42, stratify=arrays_labels)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# normalize 
X_auto = np.array(X_auto).astype('float32') / 255
X_train = np.array(X_train).astype('float32') / 255
X_test = np.array(X_test).astype('float32') / 255
X = np.array(X).astype('float32') / 255

# resize 
X_auto = X_auto.reshape((len(X_auto), 64, 64, 3))
X_train = X_train.reshape((len(X_train), 64, 64, 3))
X_test = X_test.reshape((len(X_test), 64, 64, 3))
X = X.reshape((len(X), 64, 64, 3))

# make labels as lists with possible values
y_train = to_categorical(y_train, num_classes=38)
y_test = to_categorical(y_test, num_classes=38)
y = to_categorical(y, num_classes=38)

In [30]:
def objective(trial):

    # define hyperparameters to be optimized
    lr = trial.suggest_float('lr', 0.0001, 0.5, log=True)
    epochs = trial.suggest_int('epochs', 5, 10)    

    print(f"Beggining trial with lr={lr} and epochs={epochs}")

    input_img = Input(shape=X_auto[0].shape)
    layer = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    layer = MaxPooling2D((2, 2), padding='same')(layer)
    layer = Conv2D(16, (3, 3), activation='relu', padding='same')(layer)
    layer = MaxPooling2D((2, 2), padding='same')(layer)
    layer = Flatten()(layer)
    layer = Dense(128, activation='relu')(layer)
    output = Dense(3, activation='softmax')(layer)

    model = Model(input_img, output)

    #optimizer = Adam(learning_rate=best_params['lr'])
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='MSE', metrics=['accuracy']) 

    model.fit(X_auto, y_auto,
            #epochs=best_params['epochs'],
            epochs=epochs,
            batch_size=256,
            shuffle=True,
            verbose=1)
    
    # Evaluate the model
    _, accuracy = model.evaluate(X_auto, X_auto, verbose=0)

    return accuracy

# Create optuna study and optimize the objective function
begin_time = time.time()
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1)
finish_time = time.time()

optuna_time = finish_time - begin_time
print(f"\nTime taken to find best hyperparams: {optuna_time} s")
# Print the best hyperparameters and the best objective value
best_params = study.best_params
best_value = study.best_value
print("Best Hyperparameters: ", best_params)
print("Best Accuracy: ", best_value)


[I 2023-06-14 08:07:40,928] A new study created in memory with name: no-name-5cacba87-187c-4478-9db5-3841c25f9b39


Beggining trial with lr=0.09832234678883542 and epochs=9
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


[W 2023-06-14 08:10:21,505] Trial 0 failed with parameters: {'lr': 0.09832234678883542, 'epochs': 9} because of the following error: InvalidArgumentError().
Traceback (most recent call last):
  File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Jose David\AppData\Local\Temp\ipykernel_7484\2358641602.py", line 32, in objective
    _, accuracy = model.evaluate(X_auto, X_auto, verbose=0)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\eager\execute.py", line 58, in quick_execute
    except TypeError as e:
tensorflow.

InvalidArgumentError: Graph execution error:

Detected at node 'mean_squared_error/SquaredDifference' defined at (most recent call last):
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1043, in launch_instance
      app.start()
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 725, in start
      self.io_loop.start()
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 607, in run_forever
      self._run_once()
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 1922, in _run_once
      handle._run()
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 409, in dispatch_shell
      await result
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\ipykernel\zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Jose David\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Jose David\AppData\Local\Temp\ipykernel_7484\2358641602.py", line 39, in <module>
      study.optimize(objective, n_trials=1)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\study.py", line 443, in optimize
      _optimize(
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 66, in _optimize
      _optimize_sequential(
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 163, in _optimize_sequential
      frozen_trial = _run_trial(study, func, catch)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
      value_or_values = func(trial)
    File "C:\Users\Jose David\AppData\Local\Temp\ipykernel_7484\2358641602.py", line 32, in objective
      _, accuracy = model.evaluate(X_auto, X_auto, verbose=0)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 2072, in evaluate
      tmp_logs = self.test_function(iterator)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1852, in test_function
      return step_function(self, iterator)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1836, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1824, in run_step
      outputs = model.test_step(data)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1790, in test_step
      self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1109, in compute_loss
      return self.compiled_loss(
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 142, in __call__
      losses = call_fn(y_true, y_pred)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 268, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\Jose David\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 1470, in mean_squared_error
      return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
Node: 'mean_squared_error/SquaredDifference'
Incompatible shapes: [32,64,64,3] vs. [32,3]
	 [[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_test_function_47774]

In [31]:
input_img = Input(shape=X_auto[0].shape)
layer = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
layer = MaxPooling2D((2, 2), padding='same')(layer)
layer = Conv2D(32, (3, 3), activation='relu', padding='same')(layer)
layer = MaxPooling2D((2, 2), padding='same')(layer)
layer = Flatten()(layer)
layer = Dense(128, activation='relu')(layer)
output = Dense(3, activation='softmax')(layer)

model = Model(input_img, output)

#optimizer = Adam(learning_rate=best_params['lr'])
optimizer = Adam(learning_rate=0.1)
model.compile(optimizer=optimizer, loss='MSE', metrics=['accuracy']) 

model.fit(X_auto, y_auto,
          #epochs=best_params['epochs'],
          epochs=2,
          batch_size=256,
          shuffle=True,
          verbose=1)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x25fbe613d10>

Etiquetas de cluster: [37 37 37 ... 30 30 30]
Coordenadas de los centroides: [[ 82.68056214 112.97501236  27.4433179 ]
 [128.18898249 150.27382048 100.63475171]
 [103.53261854 123.44542286  58.47832046]
 [ 66.48928981  82.81376274  43.12449198]
 [ 95.22388435 107.49300506  67.28602857]
 [ 80.38411256  97.76597809  49.55725217]
 [ 59.43721382  83.11634336  21.84985084]
 [107.06670534 132.47379359  70.09978885]
 [150.60753796 172.23537531 129.42821956]
 [116.74322026 140.66698064  96.87753581]
 [ 47.00049923  58.79401721  27.65031851]
 [ 92.39953282 118.22320574  38.74964359]
 [ 72.33551615  94.66010227  39.51681577]
 [114.77589581 130.24010561  91.50768439]
 [102.82285973 127.43517401  84.46605984]
 [109.05788029 137.71618305  51.57309867]
 [ 85.83495534 107.63602295  57.96489617]
 [136.64662713 157.72442769 116.73064686]
 [ 95.79131819 115.82798523  53.14240847]
 [116.97867712 139.1352763   80.43830581]
 [ 95.14108104 121.35788948  68.06617271]
 [ 91.10251645 106.41941339  47.00311455]

In [None]:
kmeans = KMeans(n_clusters=38, max_iter=2)
kmeans.fit(X)

# Obtener las etiquetas de cluster asignadas a cada punto de datos
labels = kmeans.labels_

# Obtener las coordenadas de los centroides
centroids = kmeans.cluster_centers_

# Imprimir las etiquetas de cluster y los centroides
print("Etiquetas de cluster:", labels)