**Función:** entrenamiento de arquitectura ResNet152V2, con el dataset 10k desbalanceado.

**Requiere:**
- ../../datasets/DATASET10K/ (con una carpeta por label y las fotos sin recortar)

**Crea:** Fichero con el modelo entrenado (epoch con menor val_loss).
- modelos/tf_model_\<run_id\>

In [None]:
ejecutadoEnGoogleDrive = True # True si se ejecuta en Google Drive

In [None]:
if ejecutadoEnGoogleDrive:
    from google.colab import drive
    drive.mount('/content/drive')
    PATH = '/content/drive/clasificacion_imagenes/'
    ruta_dataset = '/content/drive/clasificacion_imagenes/datasets/flat_10k/'
else:
    PATH = '../../1_Setup_MLOps'
    ruta_dataset = '../../datasets/flat_10k/'

%cd {PATH}

In [None]:
# Common
import keras
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm

# Data
from tensorflow.image import resize
from sklearn.model_selection import StratifiedShuffleSplit
from keras.preprocessing.image import load_img, img_to_array

# Data Viz
import seaborn as sns
import matplotlib.pyplot as plt

# TL Model
from tensorflow.keras.applications import ResNet50, ResNet50V2, InceptionV3, Xception, ResNet152, ResNet152V2, EfficientNetB0

# Model
from keras import Sequential
from keras.layers import Dense, GlobalAvgPool2D, Dropout
from keras.models import load_model

# Callbacks 
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Model Performance
from sklearn.metrics import classification_report

# Model Viz
from tensorflow.keras.utils import plot_model

# OTHER MODEL
# !pip install mlflow
import mlflow
from tensorflow.random import set_seed
from numpy.random import seed
from tensorflow import keras
from tensorflow.keras.utils import image_dataset_from_directory
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, Conv2D, MaxPooling2D
from keras.models import Sequential
#from keras.optimizers import Nadam
from keras.callbacks import EarlyStopping
from keras.layers import Rescaling
from tensorflow.data import AUTOTUNE
from tensorflow import one_hot
from keras import backend as K
import pickle

In [None]:
# Definir quién está ejecutando (para no machacar el MLflow de los compañeros)
# Descomentar la linea que aplique
# yo = 'Pedro_T'
# yo = 'Toni_V'
# yo = 'Carlos_H'
# yo = 'Carlos_C'
print('Notebook ejecutado por {}'.format(yo))

In [None]:
# Este run pertenence al experimento 6, que tiene el id "5" (string)
experiment_id = "5"
mlflow.set_experiment(experiment_id = experiment_id) 
run = mlflow.start_run( # crear run en MLflow
    experiment_id=experiment_id,
    tags={"Autor": yo, "Computer": "Colab", "params":'ResNet152V2'}
)
# activar auto logging
mlflow.tensorflow.autolog()
print('Vamos empezar el run {} en el experimento {}. Este run está {}.'.format(run.info.run_id, experiment_id, run.info.status))

Vamos empezar el run 903e3a35c0d74dcb9b7abba094306cb4 en el experimento 3. Este run está RUNNING.


In [None]:
class_names = ['drink', 'food', 'inside', 'menu', 'outside']

In [None]:
IMG_SIZE = 224
epochs = 100 # máximo de epochs (si no dispara el early stopping)
validation_split = 0.2
# Fijar random state para obtener resultados reproductibles
seed(42) # So long and thanks for all the fish
set_seed(42)

In [None]:
train_ds = image_dataset_from_directory(
  ruta_dataset,
  validation_split=validation_split,
  subset="training",
  seed=42,
  image_size=(IMG_SIZE,IMG_SIZE),
  batch_size=50)\
.cache()\
.prefetch(buffer_size=AUTOTUNE).map(lambda x, y:
                                    (Rescaling(1./255)(x),
                                     one_hot(y, 5)))

Found 8312 files belonging to 5 classes.
Using 6650 files for training.


In [None]:
test_ds = image_dataset_from_directory(
  ruta_dataset,
  validation_split=validation_split,
  subset="validation",
  seed=42,
  image_size=(IMG_SIZE,IMG_SIZE),
  batch_size=50)\
.cache()\
.prefetch(buffer_size=AUTOTUNE).map(lambda x, y:
                                    (Rescaling(1./255)(x),
                                     one_hot(y, 5)))

Found 8312 files belonging to 5 classes.
Using 1662 files for validation.


In [None]:
# ResNet Model
name = 'ResNet152V2'

base_model = ResNet152V2(include_top=False, weights='imagenet', input_shape=(224,224,3))
base_model.trainable = False

# # Model
model = Sequential([
    base_model,
    GlobalAvgPool2D(),
    Dense(224, activation='relu'),
    Dropout(0.5),                   # 0.5
    Dense(len(class_names), activation='softmax')
], name=name)

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
model.summary()

Model: "ResNet152V2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet152v2 (Functional)    (None, 7, 7, 2048)        58331648  
                                                                 
 global_average_pooling2d_2   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_4 (Dense)             (None, 250)               512250    
                                                                 
 dropout_2 (Dropout)         (None, 250)               0         
                                                                 
 dense_5 (Dense)             (None, 5)                 1255      
                                                                 
Total params: 58,845,153
Trainable params: 513,505
Non-trainable params: 58,331,648
_____________________________________

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc', f1_m, precision_m, recall_m])

In [None]:
callback = EarlyStopping(monitor='val_loss',
                         patience=30,
                         restore_best_weights=True)

In [None]:
history = model.fit(train_ds,
                    validation_data=test_ds,
                    epochs=100,
                    batch_size=64,
                    callbacks=[callback],
                    verbose=1)

In [None]:
def guardar(artefacto, fichero):
    pickle.dump(artefacto,open(fichero,'wb'))
    print('Guardado: {}'.format(fichero))

In [None]:
model.save('../4_Modelacion/Experimento_6/modelos/tf_model_exp1_{}'.format(run.info.run_id))

In [None]:
mlflow.end_run()