## CNN para el reconocimiento de lenguaje de señas

Datos: https://www.kaggle.com/datamunge/sign-language-mnist

<img src="https://github.com/luise-phd/VisionComputacional/blob/main/imgs/american_sign_language.PNG?raw=true" width=70% />

<img src="https://github.com/luise-phd/VisionComputacional/blob/main/imgs/amer_sign2.png?raw=true" width=70% />

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

# Editar las rutas en Google Drive
train = pd.read_csv('/content/drive/MyDrive/Corhuila/Visión Computacional/Notebooks/data/sign_mnist_train.csv')
test = pd.read_csv('/content/drive/MyDrive/Corhuila/Visión Computacional/Notebooks/data/sign_mnist_test.csv')

In [None]:
print("El conjunto de datos tiene {} filas y {} columnas".format(train.shape[0],train.shape[1]))
train.head()

In [None]:
print("El conjunto de datos tiene {} filas y {} columnas".format(test.shape[0],test.shape[1]))
test.head()

In [None]:
y_train = train.label
x_train = train.drop("label",axis=1) / 255.0
y_test = test.label
x_test = test.drop("label",axis=1) / 255.0

In [None]:
# Cambiar las etiquetas numéricas por una letra en mayúscula
abc = [chr(65+i) for i in np.unique(y_test)]
print(abc)

In [None]:
np.unique(y_test)

In [None]:
# Alterar el índice original, dado que por la inexistencia de la J, los índices quedarían separados y no contigüos
letters_map = dict(zip(np.unique(y_test), range(len(np.unique(y_test)))))
letters_map

### Reemplazar los índices en entrenamiento y pruebas

In [None]:
y_train = y_train.replace(letters_map).values
y_test = y_test.replace(letters_map).values

### Transformar los conjuntos de datos en matrices que representen a cada imagen
<p style='text-align: justify;'>El primer parámetro indica un numero indefinido de filas, el segundo y el tercero, representan una matríz de 28x28 y el último párametro, indica el canal de información en escala de grises.</p>

In [None]:
x_train = x_train.values.reshape((-1,28,28,1))
x_test = x_test.values.reshape((-1,28,28,1))

In [None]:
plt.figure(figsize=(10,10))
for i in range(20):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    number = x_train[i].reshape(28,28)
    plt.imshow(number, cmap=plt.cm.gray)
    # A la letra 65
    plt.xlabel(abc[y_train[i]])
plt.show()

In [None]:
plt.figure(figsize=(10,10))
for i in range(20):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    number = x_test[i].reshape(28,28)
    plt.imshow(number, cmap=plt.cm.gray)
    # A la letra 65
    plt.xlabel(abc[y_test[i]])
plt.show()

# Construyendo y entrenando la NN

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models

model = models.Sequential()
model.add( layers.Conv2D(filters = 64, kernel_size = (4, 4), activation='relu', padding="same", input_shape=(28,28,1,)) )

# Definimos la frecuencia en que la red va a olvidar pesos o conexiones para evitar el sobre entrenamiento
model.add(layers.Dropout(rate = 0.4))
model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Conv2D(filters = 64, kernel_size = (4, 4), activation='relu', padding="same"))
model.add(layers.Dropout(rate = 0.4))
model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Conv2D(filters = 64, kernel_size = (4, 4), activation='relu', padding="same"))
model.add(layers.Dropout(rate = 0.4))
model.add(layers.MaxPooling2D(pool_size = (2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(rate = 0.4))
# 24 neuronas para la salida del abecedario
model.add(layers.Dense(24, activation='softmax'))
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
          validation_data = (x_test, y_test),
          epochs=10,
          batch_size = 100);

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test acc:", test_acc)

In [None]:
# Presición en datos de entrenamiento
plt.plot(history.history['accuracy'])
# Presición en datos de validación
plt.plot(history.history['val_accuracy'])
plt.ylim(0.80, 1)
plt.title("Accuracy")
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['train','test'])
plt.show()

# Probando la NN

In [None]:
predictions = model.predict(x_test)
y_hat = np.argmax(predictions,axis=1)

In [None]:
errores = x_test[y_test != y_hat]
print("Elementos de prueba: {}".format(y_test.shape[0]))
errores_count = errores.shape[0]
errores_count
print("Errores identificados: {}".format(errores_count))
porcentaje_error = ((y_test != y_hat).sum() * 100) / y_test.shape[0]
print("Porcentaje de error: {} %".format(porcentaje_error))

In [None]:
errores = x_test[y_test != y_hat]
real_labels = y_test[y_test != y_hat]
predicted_labels = y_hat[y_test != y_hat]

k = 0
for j in range(round(errores.shape[0]/5)):
    plt.figure(figsize=(10,10))
    for i in range(min(5,errores_count-5*j)):
        plt.subplot(1,min(5,errores_count-5*j),i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        number = errores[5*j+i].reshape(28,28)
        plt.imshow(number, cmap=plt.cm.gray)
        plt.xlabel("Real: {} Prediccion: {}".format
                   (abc[real_labels[5*j+i]], abc[predicted_labels[5*j+i]]))
    plt.show()
    k += 1
    if k == 5:
        break

# Matriz de confusión

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [None]:
mod_confusion_matrix = confusion_matrix(y_test, y_hat)
for i in range(24):
    mod_confusion_matrix[i][i] = 0

In [None]:
fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(1,1,1)
sns.heatmap(mod_confusion_matrix, linewidth=0.5, annot=True, cmap="YlGnBu")
plt.xticks(np.arange(24), abc)
plt.yticks(np.arange(24), abc)
plt.ylabel("Etiquetas Reales")
plt.xlabel("Etiquetas Predecidas")
ax.set_ylim(24.5,-0.5);