# Importar datos

Datasets es una función de keras que permite descargar datos de manera rápida

In [None]:
from keras.datasets import fashion_mnist
(train_X,train_Y), (test_X,test_Y) = fashion_mnist.load_data(

# Analizar datos


In [None]:
import numpy as np
from keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
print('Training data shape : ', train_X.shape, train_Y.shape)
print('Testing data shape : ', test_X.shape, test_Y.shape)

60,000 datos de 28x28 para entrenamiento y 10,000 para pruebas


In [None]:
# Find the unique numbers from the train labels
classes = np.unique(train_Y)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

Labels son las salidas del patron para conocer la clase a la que pertenece


In [None]:
plt.figure(figsize=[5,5])
# Display the first image in training data
plt.subplot(121)
plt.imshow(train_X[0,:,:], cmap='gray')
plt.title("Train-Ground Truth : {}".format(train_Y[0]))
# Display the first image in testing data
plt.subplot(122)
plt.imshow(test_X[0,:,:], cmap='gray')
plt.title("Test-Ground Truth : {}".format(test_Y[0]))

# Preprocesamiento de los datos

Cada imagen se debe de ingresar de manera individual a la CNN

Dividir los datasets en matrices de 28x28x1.

In [None]:
train_X = train_X.reshape(-1, 28,28, 1)
test_X = test_X.reshape(-1, 28,28, 1)
train_X.shape, test_X.shape
((60000, 28, 28, 1), (10000, 28, 28, 1))

Convertir los datos a float32 para la CNN y normalizarlos entre 0 y 1.


In [None]:
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
train_X = train_X / 255.
test_X = test_X / 255.

Convertir las etiquetas de salidas (Y) en vectores de activación ("one-hot encoding") en el que solamente la clase a la que pertenece contiene
un 1. Por ejemplo, para las imagenes anteriores cuya clase es 9, el vector es [0 0 0 0 0 0 0 0 0 1].


In [None]:
# Change the labels from categorical to one-hot encoding
train_Y_one_hot = to_categorical(train_Y)
test_Y_one_hot = to_categorical(test_Y)
# Display the change for category label using one-hot encoding
print('Original label:', train_Y[31])
print('After conversion to one-hot:', train_Y_one_hot[31])

Dividir el dataset de entrenamiento en 80% entrenamiento y 20% validación


In [None]:
from sklearn.model_selection import train_test_split
train_X,valid_X,train_label,valid_label = train_test_split(train_X,
train_Y_one_hot,
test_size=0.2,
random_state=13)
train_X.shape,valid_X.shape,train_label.shape,valid_label.shape

# Implementar la CNN

Importar los modelos para implementar la red

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras import Input,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import BatchNormalization
from keras.layers import LeakyReLU


Tamaño del Batch = 64 (depende de tu RAM)


In [None]:
Epochs = 20
batch_size = 64
epochs = 20
num_classes = 10


En Keras se crea la arquitectura de la red al ir agregando capa sobre capa a un modelo.

Iniciamos diciendole que el modelo es secuencial y vamos añadiendo poco a poco las capas convolución, ReLU, Pooling, etc. hasta generar el
modelo seleccionado.


In [None]:
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear', input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))

En lugar de usar decenso del gradiente "normal" para actualizar los pesos de la red, se utilizará un algoritmo de gradiente llamado Adam
optimization.
Este algoritmo es una extensión de "stochastic gradient descent" y ha dado muy buenos resultados en Deep Learning

In [None]:
fashion_model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])

Visualizar la red

In [None]:
fashion_model.summary()

# Entrenar la CNN

In [None]:
fashion_train = fashion_model.fit(train_X, train_label, batch_size=batch_size, epochs=epochs,verbose=1, validation_data=(valid_X, valid_label))


Grabar el modelo para uso futuro


In [None]:
fashion_model.save("fashion_model_overfitting.h5py")

Acc = 0.99, loss 0.0276 al parecer aprendiendo perfectamente y todo lo clasifica bien.
val_acc = 0.9187, val_loss = 0.3980, está clasificando mal muchos datos del set de validación. Esto es debido a Overfitting.


# Evaluación del modelo y pruebas con el dataset de pruebas

In [None]:
test_eval = fashion_model.evaluate(test_X, test_Y_one_hot, verbose=0)
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])

Muy bien, 91% con el dataset de pruebas pero la perdida es muy alta: 43%


Generar gráficas para comparar los dataset de entrenamiento y validación en cuanto a su perdida y exactitud.

In [None]:
accuracy = fashion_train.history['accuracy']
val_accuracy = fashion_train.history['val_accuracy']
loss = fashion_train.history['loss']
val_loss = fashion_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


Muestra clara de Overfitting


# Uso de Dropout para evitar overfitting

Dropout es una técnica para ir apagando neuronas durante el entrenamiento y de esta manera evitar el overfitting
Volver a generar el modelo con dropout usando apagado del 25%, 40% y 30% de las neuronas.

In [None]:
batch_size = 64
epochs = 20
num_classes = 10
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',
padding='same',input_shape=(28,28,1)))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Dropout(0.25))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Dropout(0.25))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Dropout(0.4))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dropout(0.3))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_train_dropout = fashion_model.fit(train_X, train_label,
batch_size=batch_size,
epochs=epochs,verbose=1,
validation_data=(valid_X, valid_label))


In [None]:
fashion_model.save("fashion_model_dropout.h5py")


In [None]:

test_eval = fashion_model.evaluate(test_X, test_Y_one_hot, verbose=0)
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])

In [None]:

accuracy = fashion_train_dropout.history['accuracy']
val_accuracy = fashion_train_dropout.history['val_accuracy']
loss = fashion_train_dropout.history['loss']
val_loss = fashion_train_dropout.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


Se aprecia una reducción significativa del error en la pérdida y en la exactitud.


# Predicción usando las etiquetas

Necesario convertir de regreso los vectores one-hot a valores entendibles por el humano.

In [None]:
predicted_classes = fashion_model.predict(test_X)
predicted_classes = np.argmax(np.round(predicted_classes),axis=1)
predicted_classes.shape, test_Y.shape

Generar vector con clases correctas y desplegar las primeras 9


In [None]:
correct = np.where(predicted_classes==test_Y)[0]
print ("Found %d correct labels" % len(correct))
for i, correct in enumerate(correct[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(test_X[correct].reshape(28,28), cmap='gray',interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[correct],test_Y[correct]))
    plt.tight_layout()

Generar vector con clases mal clasificadas y desplegar las primeras 9

In [None]:
incorrect = np.where(predicted_classes!=test_Y)[0]
print ("Found %d incorrect labels" % len(incorrect))
for i, incorrect in enumerate(incorrect[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(test_X[incorrect].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[incorrect],    test_Y[incorrect]))
    plt.tight_layout()

# Generar reporte

In [None]:
from sklearn.metrics import classification_report
target_names = ["Class {}".format(i) for i in range(num_classes)]
print(classification_report(test_Y, predicted_classes, target_names=target_names))