# Modelo de clasificación de imágenes de lengua de señas en Español

## Creación del modelo

### Librerías

In [7]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
import json

data_path = "./classes_poses_cut/"

def load_images(path):
    images = []
    labels = []
    classes = os.listdir(path)
    class_count = len(classes)

    for class_name in classes:
        class_path = os.path.join(path, class_name)
        for file_name in os.listdir(class_path):
            img_path = os.path.join(class_path, file_name)

            img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
            img = tf.keras.preprocessing.image.img_to_array(img)
            img = tf.keras.applications.mobilenet_v2.preprocess_input(img)

            images.append(img)
            labels.append(class_name)

    return np.array(images), np.array(labels), class_count

X, y, class_count = load_images(data_path)

# Imprimir el número de muestras por clase antes del balanceo
print("Número de muestras por clase antes del balanceo:")
for class_name in set(y):
    print(f"{class_name}: {np.sum(y == class_name)} muestras")

# Aplicar oversampling a las clases menos representadas
oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = oversampler.fit_resample(X.reshape(-1, 224 * 224 * 3), y)

X_resampled = X_resampled.reshape(-1, 224, 224, 3)

# Imprimir el número de muestras por clase después del balanceo
print("\nNúmero de muestras por clase después del balanceo:")
for class_name in set(y_resampled):
    print(f"{class_name}: {np.sum(y_resampled == class_name)} muestras")

# Codificar las etiquetas de clase
label_encoder = LabelEncoder()
y_resampled_encoded = label_encoder.fit_transform(y_resampled)

# Dividir el conjunto de datos en entrenamiento, validación y prueba
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(X_resampled, y_resampled_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train_encoded, y_val_encoded = train_test_split(X_train, y_train_encoded, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Crear un generador de imágenes con data augmentation para el entrenamiento
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Ajustar el generador de datos de entrenamiento
train_datagen.fit(X_train)

# Definir el modelo
def create_model(kernel_size, num_layers, learning_rate):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv2D(64, (kernel_size, kernel_size), activation='relu', input_shape=(224, 224, 3)))
    for _ in range(num_layers - 1):
        model.add(tf.keras.layers.Conv2D(64, (kernel_size, kernel_size), activation='relu'))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(class_count, activation='softmax'))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Entrenar el modelo con data augmentation
best_params = {'kernel_size': 5, 'num_layers': 3, 'learning_rate': 0.001}
final_model = create_model(**best_params)

final_model.fit(train_datagen.flow(X_train, y_train_encoded, batch_size=32),
                steps_per_epoch=len(X_train) // 32,
                validation_data=(X_val, y_val_encoded),
                epochs=5)

# Evaluar el modelo en el conjunto de prueba
y_pred = final_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test_encoded, y_pred_classes))
print(confusion_matrix(y_test_encoded, y_pred_classes))

# Guardar el modelo
final_model.save('model.h5')

# Guardar los índices de clase en un archivo JSON
class_indices = {class_name: i for i, class_name in enumerate(label_encoder.classes_)}
with open('class_indices.json', 'w') as f:
    json.dump(class_indices, f)


Número de muestras por clase antes del balanceo:
R: 131 muestras
A: 147 muestras
O: 134 muestras
Q: 130 muestras
U: 135 muestras
T: 126 muestras
F: 129 muestras
M: 130 muestras
I: 135 muestras
G: 129 muestras
D: 132 muestras
S: 135 muestras
C: 140 muestras
N: 131 muestras
K: 134 muestras
E: 134 muestras
P: 126 muestras
L: 132 muestras
B: 102 muestras

Número de muestras por clase después del balanceo:
R: 147 muestras
A: 147 muestras
O: 147 muestras
Q: 147 muestras
U: 147 muestras
T: 147 muestras
F: 147 muestras
M: 147 muestras
I: 147 muestras
G: 147 muestras
D: 147 muestras
S: 147 muestras
C: 147 muestras
N: 147 muestras
K: 147 muestras
E: 147 muestras
P: 147 muestras
L: 147 muestras
B: 147 muestras
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        24
           1       0.00      0.00      0.00        36
           2       0.00      0.00      0.00        29
           3       0.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  saving_api.save_model(


In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import cv2
from tensorflow.keras.models import load_model
import json
from tensorflow.keras.preprocessing import image

### Data Augmentation

In [None]:


data_path = "./classes_poses/"

def load_images(path):
    images = []
    labels = []
    classes = os.listdir(path)
    class_count = len(classes)

    for class_name in classes:
        class_path = os.path.join(path, class_name)
        for file_name in os.listdir(class_path):
            img_path = os.path.join(class_path, file_name)

            img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
            img = tf.keras.preprocessing.image.img_to_array(img)
            img = tf.keras.applications.mobilenet_v2.preprocess_input(img)

            images.append(img)
            labels.append(class_name)

    return np.array(images), np.array(labels), class_count

X, y, class_count = load_images(data_path)

# Imprimir el número de muestras por clase antes del balanceo
print("Número de muestras por clase antes del balanceo:")
for class_name in set(y):
    print(f"{class_name}: {np.sum(y == class_name)} muestras")

# Aplicar oversampling a las clases menos representadas
oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = oversampler.fit_resample(X.reshape(-1, 224 * 224 * 3), y)

X_resampled = X_resampled.reshape(-1, 224, 224, 3)

# Crear un generador de imágenes con data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Aplicar data augmentation al conjunto de datos resampleado
augmented_images = []
augmented_labels = []

for img, label in zip(X_resampled, y_resampled):
    img = np.expand_dims(img, axis=0)
    for _ in range(3):  # Aumentar el conjunto de datos tres veces
        augmented_img = datagen.random_transform(img[0])
        augmented_images.append(augmented_img)
        augmented_labels.append(label)

X_ = np.array(augmented_images)
y_ = np.array(augmented_labels)


# Imprimir el número de muestras por clase después del balanceo
print("\nNúmero de muestras por clase después del balanceo:")
for class_name in set(y_):
    print(f"{class_name}: {np.sum(y_ == class_name)} muestras")


### Partición de datos 

eliminar ruido mejorando la forma del augmentation,
borrar imagenes aug.

In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(X_, y_, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
num_classes = len(np.unique(y_train))

### Transformación de variables categóricas a numéricas

In [4]:
import json
class_indices = {}
for i, class_name in enumerate(np.unique(y_train)):
    class_indices[class_name] = i
with open('class_indices.json', 'w') as f:
    json.dump(class_indices, f)

y_train = np.vectorize(class_indices.get)(y_train)
y_val = np.vectorize(class_indices.get)(y_val)
y_test = np.vectorize(class_indices.get)(y_test)


### Configurar el modelo de clasificación con CNN

como se hace con un dataloader la aug en linea 



In [5]:
def create_model(kernel_size, num_layers, batch_size, learning_rate):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv2D(64, (kernel_size, kernel_size), activation='relu', input_shape=(224, 224, 3)))
    for _ in range(num_layers - 1):
        model.add(tf.keras.layers.Conv2D(64, (kernel_size, kernel_size), activation='relu'))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

### Entrenar modelo

In [6]:
# param_grid = {
#     'kernel_size': [3, 5], #!!
#     'num_layers': [2, 3],
#     'batch_size': [32, 64],
#     'learning_rate': [0.001, 0.0001]
# }
# param_grid = {
#     'kernel_size': [5],
#     'num_layers': [3],
#     'batch_size': [32],
#     'learning_rate': [0.001]
# }

# model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)
# grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

# grid_result = grid.fit(X_train, y_train)

# best_params = grid_result.best_params_

 
best_params = {'batch_size': 32, 'kernel_size': 5, 'learning_rate': 0.001, 'num_layers': 3}
final_model = create_model(**best_params)
final_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

y_pred = final_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))
print(confusion_matrix(y_test, y_pred_classes))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.05      0.04      0.04        53
           1       0.00      0.00      0.00        49
           2       0.06      0.08      0.07        50
           3       0.11      0.19      0.13        43
           4       0.07      0.03      0.04        60
           5       0.08      0.09      0.09        54
           6       0.13      0.11      0.12        46
           7       0.10      0.14      0.12        44
           8       0.17      0.16      0.16        51
           9       0.09      0.08      0.08        49
          10       0.07      0.04      0.05        45
          11       0.05      0.04      0.04        51
          12       0.08      0.15      0.10        41
          13       0.06      0.09      0.07        47
          14       0.08      0.07      0.08        43
          15       0.14 

### Guardar el modelo

In [None]:
final_model.save('model.h5')


## Uso del modelo

### Detección de clase con una sola imágen

In [2]:

model = load_model('model.h5')

class_indices = json.load(open('class_indices.json'))

def predict_image(model, img_path, class_indices):
    img = image.load_img(img_path, target_size=(224, 224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    prediction = model.predict(img)
    predicted_class = list(class_indices.keys())[np.argmax(prediction)]
    return predicted_class

img_path = "./classes_original/M/DSC01254.JPG"
predicted_class = predict_image(model, img_path, class_indices)
print(predicted_class)


M


### Detección de clases en tiempo real

In [None]:
# quiero que me haga predicciones en tiempo real con la camara


model = load_model('model.h5')

class_indices = json.load(open('class_indices.json'))

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    img = cv2.resize(frame, (224, 224))
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    prediction = model.predict(img)
    predicted_class = list(class_indices.keys())[np.argmax(prediction)]
    cv2.putText(frame, predicted_class, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()