Este cuaderno de Jypiter sirve para entrenar el modelo clasificador MobileNet v2.

Basado en este [cuaderno](https://github.com/aryan109/medium/blob/master/Custom_Image_Classification/Custom_image_clasification.ipynb).

# Montar el Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os
os.chdir('/content/gdrive/My Drive/TFM-MaskDetection/classification-models/') # Cambia al directorio donde se encuentra training.ipynb

# Crear el dataset de entrenamiento



In [None]:
import cv2 as cv2
import matplotlib.pyplot as plt
from pandas import read_csv

# Path declaration
IMAGES_PATH = r"./images/"
DATASET_PATH = r"./dataset/"
TRAINING_DATASET_PATH = DATASET_PATH + r"train/"
EVAL_DATASET_PATH = DATASET_PATH + r"eval/"
# Training sub-dataset
paths = [["images/train_labels.csv",IMAGES_PATH+"train/",DATASET_PATH],
         ["images/test_labels.csv", IMAGES_PATH+"test/", DATASET_PATH]]
       
k = 0
for i, path in enumerate(paths):
    print(path, path[0], path[1], path[2])
    labels = read_csv(path[0])  # labels.csv file
    src_img_path = path[1]      # source image path
    dst_img_path = path[2]      # destination dataset path
    
    for row in labels.iloc:
        # Extract dataset
        img_name = row.filename # Image filename with extension
        img_id = img_name[:-4]  # Image id (name without extension)
        img_class = row.Class   # Object class
        
        # Read image and segment
        img_src = cv.imread(src_img_path + img_name)
        img_segm = img_src[row.ymin:row.ymax, row.xmin:row.xmax]
        img_segm = cv.resize(img_segm, (224, 224))
        
        # Save new image
        path = dst_img_path + img_class + '/' + str(k) + '.png'
        cv.imwrite(path, img_segm)
        
        k += 1
    
    print("Imagenes guardadas en " + path[2])
print("Terminado!")

# Entrenamiento

In [None]:
try:
  # The %tensorflow_version magic only works in colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
import tensorflow_hub as hub

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option("display.precision", 8)

In [None]:
TRAINING_DATA_DIR = r"./dataset"
MODEL_NAME = 'mobilenet_v2'
EPOCHS = 100
SAVED_PATH = os.path.join("saved_models", MODEL_NAME, str(EPOCHS))

Carga del dataset y del modelo

In [None]:
datagen_kwargs = dict(rescale=1./255, validation_split=.20)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory(
    TRAINING_DATA_DIR, 
    subset="validation", 
    shuffle=True,
    target_size=(224,224)
)
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**datagen_kwargs)
train_generator = train_datagen.flow_from_directory(
    TRAINING_DATA_DIR, 
    subset="training", 
    shuffle=True,
    target_size=(224,224)
)

for image_batch, label_batch in train_generator:
  break
print(image_batch.shape, label_batch.shape)

print(train_generator.class_indices)
labels = '\n'.join(sorted(train_generator.class_indices.keys()))
with open('labels.txt', 'w') as f:
  f.write(labels)

In [None]:
model = tf.keras.Sequential([
  hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4", 
                output_shape=[1280],
                trainable=False),
  tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(train_generator.num_classes, activation='softmax')
])

model.build([None, 224, 224, 3])
model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(
  optimizer=optimizer,
  loss='categorical_crossentropy',
  metrics=['acc'])

Entrenamiento

In [None]:
steps_per_epoch = np.ceil(train_generator.samples/train_generator.batch_size)
val_steps_per_epoch = np.ceil(valid_generator.samples/valid_generator.batch_size)

hist = model.fit(
    train_generator, 
    epochs=EPOCHS,
    verbose=1,
    steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=val_steps_per_epoch).history

Evaluación del entrenamiento

In [None]:
final_loss, final_accuracy = model.evaluate(valid_generator, steps = val_steps_per_epoch)
print("Final loss: {:.2f}".format(final_loss))
print("Final accuracy: {:.2f}%".format(final_accuracy * 100))

In [None]:
plt.figure()
plt.ylabel("Loss (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,50])
plt.plot(hist["loss"])
plt.plot(hist["val_loss"])
plt.figure()
plt.ylabel("Accuracy (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,1])
plt.plot(hist["acc"])
plt.plot(hist["val_acc"])

Guardado del modelo

In [None]:
model.save(SAVED_PATH)

Test de predicción

In [None]:
model = tf.keras.models.load_model(SAVED_PATH)

In [None]:
val_image_batch, val_label_batch = next(iter(valid_generator))
true_label_ids = np.argmax(val_label_batch, axis=-1)
print("Validation batch shape:", val_image_batch.shape)
print(val_image_batch.dtype)

dataset_labels = sorted(train_generator.class_indices.items(), key=lambda pair:pair[1])
dataset_labels = np.array([key.title() for key, value in dataset_labels])
print(dataset_labels)

tf_model_predictions = model.predict(val_image_batch)
print("Prediction results shape:", tf_model_predictions.shape)

predicted_ids = np.argmax(tf_model_predictions, axis=-1)
predicted_labels = dataset_labels[predicted_ids]
print(predicted_labels)

plt.figure(figsize=(10,9))
plt.subplots_adjust(hspace=0.5)
for n in range((len(predicted_labels)-2)):
  plt.subplot(6,5,n+1)
  plt.imshow(val_image_batch[n])
  color = "green" if predicted_ids[n] == true_label_ids[n] else "red"
  plt.title(predicted_labels[n].title(), color=color)
  plt.axis('off')
_ = plt.suptitle("Model predictions (green: correct, red: incorrect)")
