# Modelo con datagenerator y MLP

## Funciones procesado

In [1]:
import os
import numpy as np
import cv2
from sklearn.cluster import MiniBatchKMeans
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder

2024-12-29 21:16:42.333359: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1735503402.345185   73706 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735503402.348704   73706 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-29 21:16:42.362913: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def leer_imagenes_rgb(dataset_path):
    jpgFiles = [fJPG for fJPG in os.listdir(dataset_path) if fJPG.endswith('.jpg')]
    bufferImages = []
    y = []

    # Extraer 50 patches aleatorios de cada imagen
    for filename in jpgFiles:
        img_path = os.path.join(dataset_path, filename)
        label = filename.split('.')[0][-2:]
        y.append(label)
        img = cv2.imread(img_path)
        # Convertir imagen de BGR a RGB
        img_color = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        resized_img = cv2.resize(img_color, (150, 150), interpolation=cv2.INTER_AREA)
        bufferImages.append(resized_img)
    
    bufferImages = np.array(bufferImages)

    return bufferImages, y

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin


class CustomBoVWPreprocessorSamePatches(BaseEstimator, TransformerMixin):
    def __init__(self, patchSize=10, nClusters = 81, max_patches=50, norm = False):
        self.patchSize = patchSize
        self.nClusters = nClusters
        self.max_patches = max_patches
        self.norm = norm   
    
    def extract_centric_patches(self, img, patch_size, num_patches, seed=42):
        assert img.shape[0] == 150 and img.shape[1] == 150, "La imagen debe tener resolución 150x150."
        assert patch_size > 0, "El tamaño del patch debe ser mayor que 0."
        assert patch_size * num_patches <= 150 * 150, "Demasiados patches para esta resolución."
        
        # Calcular la cuadrícula de celdas no superpuestas
        grid_rows = 150 // patch_size
        grid_cols = 150 // patch_size
        total_cells = grid_rows * grid_cols

        assert num_patches <= total_cells, "Demasiados patches para el tamaño del patch y la resolución de la imagen."
        
        # Generar las posiciones de la cuadrícula
        all_positions = [(i * patch_size, j * patch_size) for i in range(grid_rows) for j in range(grid_cols)]

        # Calcular las distancias de las celdas al centro de la imagen
        center = (150 // 2, 150 // 2)
        distances = [
            np.sqrt((row + patch_size // 2 - center[0])**2 + (col + patch_size // 2 - center[1])**2)
            for row, col in all_positions
        ]

        # Ordenar las posiciones por cercanía al centro
        sorted_positions = [pos for _, pos in sorted(zip(distances, all_positions))]
        
        # Seleccionar los patches más céntricos
        np.random.seed(seed)
        selected_positions = sorted_positions[:num_patches]
        
        # Extraer los patches en las posiciones seleccionadas
        patches = [img[row:row + patch_size, col:col + patch_size] for row, col in selected_positions]

        #self.visualize_disjoint_patches(img, patches, selected_positions, patch_size)
        
        return np.array(patches)


    def bovw(self, images, patchSize, nClusters, maxPatches):
        bufferData = []
        for i in images:
            #resized_img = cv2.resize(i, (150, 150), interpolation=cv2.INTER_AREA)
            patches = self.extract_centric_patches(i, patchSize, num_patches=maxPatches, seed=27) # shape = (maxPatches,5,5,3)
            patches = np.reshape(patches, (len(patches), -1)) # shape = (maxPatches,75)
            bufferData.append(patches)
        dataIm = np.concatenate(bufferData, axis=0) # shape = (200*maxPatches,75)  n patches x 200 imágenes
        dataIm = dataIm.astype(float)
        dataIm -= np.mean(dataIm, axis=0)
        dataIm /= np.std(dataIm, axis=0)
        kmeans = MiniBatchKMeans(n_clusters=nClusters, random_state=27, verbose=False)
        kmeans.partial_fit(dataIm)

        X = np.reshape(kmeans.labels_ , (len(images),maxPatches)) # Indice de cluster de cada patch. shape = (200,maxPatches)
        resul = [np.bincount(row, minlength=nClusters) for row in X]
        resul = np.array(resul)

        if self.norm:
            contador_palabras = np.zeros((nClusters,))
            for p in range(nClusters):
                for i in resul[:,p]:
                    if i > 0:
                        contador_palabras[p] += 1
            for p in range(nClusters):
                for imagen in resul:
                    imagen[p] = imagen[p] * np.log(len(images)/(contador_palabras[p]+0.001)) # Actualizamos el valor por el logaritmo del cociente entre N y el número de imágenes en las que aparece esa palabra

        return resul

    def transform(self, images):
        return self.bovw(images, self.patchSize, self.nClusters, self.max_patches)  # Devuelve los datos preprocesados

## Datos

In [4]:
path = '/home/pablo/Desktop/tercero/mdp/trabajo/HANDS'

imagenes, labels = leer_imagenes_rgb(path)

In [5]:
bovw = CustomBoVWPreprocessorSamePatches()
X = bovw.transform(imagenes)

In [7]:
from keras.utils import to_categorical

encoder = LabelEncoder()
labels_cat = encoder.fit_transform(labels)
y = to_categorical(labels_cat, 4) #to_categorical funciona solo con las clases como números

## Creación modelo

In [8]:
from sklearn.model_selection import train_test_split
from keras.optimizers import SGD
from keras import Sequential
from keras.layers import Dense, Input, Dropout, Flatten


In [12]:
model = Sequential()

model.add(Input(shape=(81,)))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dense(4, activation='softmax'))


model.compile(optimizer = SGD(), loss='categorical_crossentropy',
                metrics=['accuracy'])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [15]:
history = model.fit(X_train, y_train, epochs=75, batch_size=128)

Epoch 1/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6300 - loss: 0.8648
Epoch 2/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6274 - loss: 0.8608
Epoch 3/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6297 - loss: 0.8625
Epoch 4/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6277 - loss: 0.8501
Epoch 5/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6416 - loss: 0.8410
Epoch 6/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6425 - loss: 0.8326
Epoch 7/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6476 - loss: 0.8246
Epoch 8/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6450 - loss: 0.8368
Epoch 9/75
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [16]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.4f}")






Test Accuracy: 0.7930
