# **EVA** IMAGE CLASSIFIER

![img_ppal](https://www.innovacion-tecnologia.com/wp-content/uploads/2020/09/Historia-de-los-humanoides.jpg)

## *ÍNDICE:* 
---
0. **INTRODUCCIÓN**
1. **DEEP LEARNING**
    - 2.1. Preparación y limpieza de datos
    - 2.2. Feature Engineering
    - 2.3. Modelado
2. RESULTADOS
    - 3.1. Visualización y reporting de los resultados
    - 3.2. Creación de un pipeline para el flujo automatizado
---

# ***DEEP LEARNING***

---

### Librerías

In [1]:
import os
import pandas as pd
import shutil
from skimage.io import imread
import cv2
import random 
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive

from PIL import Image

# batch ingestion of pics without pickle
from tensorflow.keras.preprocessing import image_dataset_from_directory

from tensorflow.keras.applications import VGG16, VGG19,Xception, InceptionResNetV2, DenseNet121, ResNet50 

from tensorflow.keras import Model, layers, optimizers, metrics, losses
from tensorflow.keras.models import Sequential, load_model

#from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, MaxPooling2D, Flatten # Input, Conv2D, 
#from tensorflow.keras.layers import Layer

# optimization
from tensorflow.keras.optimizers import Adam #, SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy, categorical_crossentropy
from tensorflow.keras.callbacks import EarlyStopping

# metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
#from sklearn.metrics import plot_precision_recall_curve
from itertools import chain # to flatten the real labels array from validation set
import json # to save in a file metrics
#from datetime import datetime # to name results

In [2]:
import pickle

In [3]:
#import yolo5

#### Funciones propias 
(reubicar posteriormente)

In [4]:
def define_subfolders(df: pd.DataFrame, class_names: dict, input_path: str, output_path: str,img_train_path: str, ds_img_path: str = 'path_img'):
    '''
    Objective: Create subfolder for each class within the training set folder

    args.
    ---
    df: pd.DataFrame; the training dataset

    class_names: dict; key define the class name and value refers to the integer label

    input_path: str; path where is the image set/ set
    
    output_path: str; path to send the image

    ret.
    ---
    None
    '''
    for k,v in class_names.items():
        image_list = list(df[df["label"]==v][ds_img_path])
        new_path_folder = img_train_path+output_path+"/"+k+"/"
        os.makedirs(new_path_folder, exist_ok=True)
        for i in image_list:
            image = i.split("/")[-1]
            old_path = img_train_path+input_path+"/"+image
            new_path = new_path_folder + image
            #print(k, old_path, "\n", new_path, "\n\n")
            shutil.copy(old_path, new_path)

In [5]:

def resize(s, input_path, input_path_res, class_names,csv_str='failed_resized.csv'): 
  
  img_not_resized = pd.DataFrame(columns=['folder_path','img_path'])
  for c in class_names:
    folder = input_path +"/"+ c
    pic_list = os.listdir(folder)
    folder_output = input_path_res +"/"+ c

    for p in pic_list:
      im_path = folder + "/" + p
      #print(im_path)
      try:
        im = Image.open(im_path)
        size = (s, int(im.size[1] * s / im.size[0]))
        im=im.convert('RGB')
        im_resized = im.resize(size, Image.ANTIALIAS)
        pname, ext = os.path.splitext(p)
        
        im_resized.save(folder_output + "/" + pname+'.jpeg',"JPEG")
      except:       
        img_not_resized=img_not_resized.append({'folder_path': folder,
                                                'img_path': pname+'.jpeg'},ignore_index=True)
        print('Fallo en: '+folder_output + "/" + pname +'.jpeg',"JPEG")
      
  img_not_resized.to_csv(IMG_TRAIN_PATH+csv_str)
  return img_not_resized

In [6]:
def copy_folder_structure(src_folder, dst_folder):
    """
    Copia la estructura de la carpeta src_folder en la carpeta dst_folder
    sin copiar los archivos que contienen
    """
    for root, dirs, _ in os.walk(src_folder):
        for directory in dirs:
            new_dir_path = os.path.join(root.replace(src_folder, dst_folder), directory)
            os.makedirs(new_dir_path, exist_ok=True)

### Definición de constantes

In [7]:
# relative path
# REL PATH=os.getcwd()
REL_PATH = '/content/drive/MyDrive/ColabNotebooks/HACKATON/hackaton_nuwe_oracle/reto_1/NuweHackatonOracle_DL_Clf_reto1'

# paths
IMG_TRAIN_PATH = REL_PATH + '/data/' # + example_path (from csv train)
IMG_TEST_PATH = REL_PATH + '/data/' # + example_path (from csv test)
TRAIN_PATH = REL_PATH + '/data/train.csv'
TEST_PATH = REL_PATH + '/data/test.csv'

OUTPUT_PATH = REL_PATH + 'output/' 
INPUT_PATH = IMG_TRAIN_PATH + "/train_test_data/train_modified"

IMG_TRAIN_PATH_RES = REL_PATH + '/data/train_resized'

# processing
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_CHANNELS = 3
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
BATCH_SIZE = 32
EPOCHS = 10

In [8]:
%pwd

'/content'

In [9]:
os.chdir(REL_PATH)

In [10]:
ls

0_Introduccion.ipynb  1_DL_baseline.ipynb  [0m[01;34mdata[0m/  [01;34msetup[0m/  [01;34mutility[0m/


### Preparación y limpieza de los datos

1. En la columna path_img se incluye la carpeta 

In [8]:
ds_train=pd.read_csv(TRAIN_PATH)

In [9]:
categories = ['burger','chicken','donut','fries','hotdog','pizza','sandwich','potatoes']

In [10]:
class_names={str(i)+'-'+categories[i]: i for i in range(len(categories))}

In [11]:
class_names

{'0-burger': 0,
 '1-chicken': 1,
 '2-donut': 2,
 '3-fries': 3,
 '4-hotdog': 4,
 '5-pizza': 5,
 '6-sandwich': 6,
 '7-potatoes': 7}

In [12]:
df_train=ds_train.copy()

In [13]:
df_train[['path_img_folder', 'path_img']] = df_train['path_img'].str.split('/', expand=True)

In [14]:
df_train.head()

Unnamed: 0,idx_train,path_img,label,path_img_folder
0,0,bc7696f4-1460-4d0b-a63d-f84b3be4da0f.jpeg,0,all_imgs
1,1,f8d50663-60d8-4da5-a8b8-79f954aec503.jpeg,2,all_imgs
2,2,51df0f29-758b-4741-ab74-a0ff8e21c044.jpeg,4,all_imgs
3,3,f61b81d3-3b79-4162-b4d6-4f1b39518c4c.jpeg,0,all_imgs
4,4,b21e0668-bd09-4794-9e90-da8ecffc4c1c.jpeg,0,all_imgs


In [None]:
#define_subfolders(df_train, class_names, 'all_imgs', 'train',IMG_TRAIN_PATH, 'path_img')

In [None]:
#len(os.listdir(IMG_TRAIN_PATH+'train'+'/0-burger'))

In [None]:
1396*8

11168

In [15]:
train_img_path = IMG_TRAIN_PATH + 'train'


In [None]:
#copy_folder_structure(train_img_path,IMG_TRAIN_PATH_RES)

In [16]:
class_names.keys()

dict_keys(['0-burger', '1-chicken', '2-donut', '3-fries', '4-hotdog', '5-pizza', '6-sandwich', '7-potatoes'])

In [None]:
#fails=resize(IMAGE_WIDTH, train_img_path, IMG_TRAIN_PATH_RES, class_names)

In [None]:
#fails.head()

In [17]:
# train set
# https://www.tensorflow.org/tutorials/images/classification
train_ds = image_dataset_from_directory(
    IMG_TRAIN_PATH_RES,
    class_names = class_names.keys(),
    labels='inferred',
    # label_mode='categorical',
    validation_split=0.3,
    subset="training",
    seed=42,
    image_size= IMAGE_SIZE,
    batch_size= BATCH_SIZE,
    color_mode='rgb'
)

Found 11189 files belonging to 8 classes.
Using 7833 files for training.


In [18]:
# validation set (use the same seed and split)
val_ds = image_dataset_from_directory( # utilizar el de validación como test
    IMG_TRAIN_PATH_RES,
    labels='inferred',
    # label_mode='categorical',
    class_names=class_names.keys(),
    validation_split=0.3,
    subset="validation",
    seed=42,
    image_size= IMAGE_SIZE,
    batch_size= BATCH_SIZE,
    color_mode='rgb'
)

Found 11189 files belonging to 8 classes.
Using 3356 files for validation.


In [19]:
len(class_names)

8

In [20]:
num_classes = len(class_names)

In [None]:
model = Sequential([
  layers.Rescaling(1./255, input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='softmax'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='softmax'),
  layers.Dense(num_classes)
])

In [None]:
model.compile(optimizer='adam',
              loss=SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_6 (Conv2D)           (None, 64, 64, 16)        448       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 32, 32, 16)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 32, 32, 32)        4640      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 16, 16, 64)       

In [None]:
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=EPOCHS
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
history.history

{'loss': [2.0617799758911133,
  1.9982173442840576,
  1.9612239599227905,
  1.9254908561706543,
  1.8924587965011597,
  1.8569415807724,
  1.8238829374313354,
  1.7953547239303589,
  1.7642848491668701,
  1.7336548566818237],
 'accuracy': [0.16162389516830444,
  0.23783990740776062,
  0.2576279938220978,
  0.27741605043411255,
  0.297587126493454,
  0.3204391598701477,
  0.33269500732421875,
  0.34405720233917236,
  0.35465338826179504,
  0.37124985456466675],
 'val_loss': [2.024747610092163,
  1.9819353818893433,
  1.951957106590271,
  1.9179913997650146,
  1.9005470275878906,
  1.8650431632995605,
  1.8472830057144165,
  1.8432155847549438,
  1.805385708808899,
  1.8008601665496826],
 'val_accuracy': [0.21543504297733307,
  0.24404051899909973,
  0.2508939206600189,
  0.2759237289428711,
  0.27294397354125977,
  0.3003575801849365,
  0.3131704330444336,
  0.3140643537044525,
  0.32836711406707764,
  0.3253873586654663]}

In [None]:
#model.save(IMG_TRAIN_PATH+'models/model1.h5')

In [22]:
# https://www.tensorflow.org/guide/keras/preprocessing_layers
"""data_augmentation = Sequential([  
    #layers.RandomBrightness(factor=[0,0.3], seed=42), # this 2 were introducing erros in the build of the model
    #layers.RandomContrast(factor=[0,0.3], seed=77),
    #layers.RandomFlip("horizontal",input_shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.005),
    #random_invert(0.2),  # color inversion
    layers.RandomContrast(0.1),
    #Contrast(),
    #layers.RandomCrop(IMAGE_SIZE[0],IMAGE_SIZE[1]), 
    layers.RandomWidth(0.05, interpolation="gaussian"),
    layers.RandomHeight(0.05),
    layers.RandomTranslation((-0.2,0.3),(-0.2,0.3), fill_mode='constant',interpolation='bilinear', seed=None, fill_value=0.0),
  ]
)"""
data_augmentation = Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
])

In [None]:
model_baseline2 = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='softmax'),
  layers.Dense(num_classes)
])

In [None]:
model_baseline2.compile(optimizer='adam',
              loss=SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
history2 = model_baseline2.fit(
  train_ds,
  validation_data=val_ds,
  epochs=EPOCHS
)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
#model_baseline2.save(IMG_TRAIN_PATH+'models/model2.h5')

In [None]:
!pip install tensorflow_addons
import tensorflow_addons as tfa

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [21]:
from keras import backend as K


In [22]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision
    
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [23]:
model_baseline3 = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, activation='softmax')
])

In [24]:
model_baseline3.compile(optimizer='adam', loss=losses.SparseCategoricalCrossentropy(), metrics=['accuracy',precision_m,recall_m,f1_m])


In [None]:
df_train.head(
    
)

Unnamed: 0,idx_train,path_img,label,path_img_folder
0,0,bc7696f4-1460-4d0b-a63d-f84b3be4da0f.jpeg,0,all_imgs
1,1,f8d50663-60d8-4da5-a8b8-79f954aec503.jpeg,2,all_imgs
2,2,51df0f29-758b-4741-ab74-a0ff8e21c044.jpeg,4,all_imgs
3,3,f61b81d3-3b79-4162-b4d6-4f1b39518c4c.jpeg,0,all_imgs
4,4,b21e0668-bd09-4794-9e90-da8ecffc4c1c.jpeg,0,all_imgs


In [None]:
type(train_ds)

tensorflow.python.data.ops.dataset_ops.BatchDataset

In [25]:
history3 = model_baseline3.fit(
  train_ds,
  validation_data=val_ds,
  epochs=EPOCHS
)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [26]:
model_baseline3.save(IMG_TRAIN_PATH+'models/model3.h5')

In [28]:
# Define the model
model_baseline4 = Sequential([
    # Tenga en cuenta que la forma de entrada es el tamaño deseado de la imagen 300x300 con 3 bytes de color
    # Esta es la primera convolución
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    # La segunda convolución
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.Dropout(0.5),
    layers.MaxPooling2D(2,2),
    # La tercera convolución
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.Dropout(0.5),
    layers.MaxPooling2D(2,2),
    # La cuarta convolución
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(2,2),
    # La quinta convolución
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    # Aplanar los resultados para alimentar a un DNN
    
    layers.Flatten(),
    layers.Dropout(0.5),
    # Capa oculta de 512 neuronas
    layers.Dense(512, activation='relu'),
    layers.Dense(8, activation='softmax')
])
# Define the optimizer
optimizer = optimizers.Adam()

# Define the loss function
loss_fn = losses.SparseCategoricalCrossentropy()


# Compile the model
model_baseline4.compile(optimizer=optimizer, loss=losses.SparseCategoricalCrossentropy(), metrics=['accuracy',precision_m,recall_m,f1_m])

# Train the model
history4 = model_baseline4.fit(train_ds,
  validation_data=val_ds,
  epochs=EPOCHS, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
model_baseline4.save(IMG_TRAIN_PATH+'models/model4.h5')

In [23]:
data_augmentation = Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
])

In [None]:
# Define the model
model_baseline5 = Sequential([
    data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, activation='softmax')
  ])
# Define the optimizer
optimizer = optimizers.Adam()

# Define the loss function
loss_fn = losses.SparseCategoricalCrossentropy()


# Compile the model
model_baseline5.compile(optimizer=optimizer, loss=losses.SparseCategoricalCrossentropy(), metrics=['accuracy',precision_m,recall_m,f1_m])

# Train the model
history5 = model_baseline5.fit(train_ds,
  validation_data=val_ds,
  epochs=EPOCHS, verbose=1)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model_baseline5.save(IMG_TRAIN_PATH+'models/model5.h5')