# <n><u>PARTE 3:</u></n> Deteccion de Genero
## Florencia Migues & Santiago Juani

## Imports

In [1]:
%load_ext tensorboard

In [2]:
import cv2
import os
import sys
from glob import glob
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

In [3]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.optimizers import SGD

## Data Loader

In [4]:
def prep_image(img):
    if len(img.shape)==3 and img.shape[2]==3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return img/255.0

In [5]:
def load_training_data(training_data_path, training_labels_path):
    
    training_data = []
    training_labels = []
    
    images = sorted(glob(training_data_path + '/*'))
    labels = pd.read_pickle(os.path.join(training_labels_path, 'training_labels.pkl'))
    
    print(f'##Loading {len(images)} face images:', flush=True)
    for img_file in tqdm(images, total=len(images)):
        image_name = os.path.basename(img_file)
        
        image = cv2.imread(img_file)[...,::-1]
        image = cv2.resize(image, [128, 128], cv2.INTER_AREA)
        training_data.append(prep_image(image))
        
        label = labels.loc[labels["image_name"] == image_name]
        #training_labels.append(label['Male'].values[0])
        
        if label['Male'].values[0] == 'male':
            training_labels.append([1,0])
        else:
            training_labels.append([0,1])
        
    training_data = np.asarray(training_data)
    training_labels = np.asarray(training_labels)
    return training_data, training_labels


def load_validation_data(validation_data_path, validation_labels_path):
    
    val_imgs = []
    val_labels = []
    
    images = sorted(glob(validation_data_path + '/*.jpg')) 
    
    bbox_pkl = pd.read_pickle(os.path.join(validation_data_path,'validation_bbox.pickle'))
    labels = pd.read_pickle(os.path.join(validation_labels_path, 'testing_labels.pkl'))
    
    labels.describe()
    
    print(f'## Loading {len(images)} face images:', flush=True)
    for img_file in tqdm(images, total=len(images)):
        image_name = os.path.basename(img_file)
        
        big_image = prep_image(cv2.imread(img_file)[...,::-1])
        
        bbox = bbox_pkl.loc[bbox_pkl["image_id"]==image_name]
        cut_image = big_image[bbox['y_top'].values[0] : bbox['y_top'].values[0]+bbox['height'].values[0],
                              bbox['x_left'].values[0] : bbox['x_left'].values[0]+bbox['width'].values[0]]
        try:
            cut_image = cv2.resize(cut_image, [128, 128], cv2.INTER_AREA)
            val_imgs.append(cut_image)
            label = labels.loc[labels["image_name"] == image_name]
            #val_labels.append(label['Male'].values[0])

            if label['Male'].values[0] == 'male':
                val_labels.append([1,0])
            else:
                val_labels.append([0,1])
            
        except:
            pass

    val_imgs = np.asarray(val_imgs)
    val_labels = np.asarray(val_labels)
    return val_imgs, val_labels

## Ubicacion de los datos

In [6]:
data_dir = './'
training_data_dir = os.path.join(data_dir, 'face_detection/cropped_faces')
training_labels_dir = os.path.join(data_dir, 'gender_labels')

validation_data_dir = os.path.join(data_dir, 'face_detection/testing')
validation_labels_dir = os.path.join(data_dir, 'gender_labels')

## Load Data

In [7]:
training_data, training_labels = load_training_data(training_data_dir,
                                                    training_labels_dir)

##Loading 9914 face images:


100%|██████████| 9914/9914 [00:21<00:00, 467.85it/s]


In [8]:
validation_data, validation_labels = load_validation_data(validation_data_dir,
                                                          validation_labels_dir)

## Loading 175 face images:


100%|██████████| 175/175 [00:00<00:00, 283.37it/s]


## Modelo: Keras CNN

### Definition

In [None]:
model = Sequential()

model.add(Conv2D(32, (3,3), input_shape=(128, 128, 1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))

model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',
             optimizer=opt,
             metrics=['accuracy'])

### Training

In [None]:
%tensorboard --logdir ./logs --host localhost

In [None]:
if not os.path.exists('./checkpoints'):
    os.makedirs('./checkpoints')
check_point_model = os.path.join('./checkpoints','checkpoint_gender_model.hdf5')

In [None]:
if os.path.exists('./logs'):
    shutil.rmtree('./logs')
    
checkpoint = ModelCheckpoint(check_point_model,
                            monitor='val_accuracy',
                            verbose=1,
                            save_best_only=True,
                            mode='max')

history = model.fit(training_data, training_labels,
                    epochs=20,
                    batch_size=16,
                    validation_data=(validation_data, validation_labels),
                    callbacks=[checkpoint, TensorBoard(f'./logs/gender_model')])

## Save/load model

In [None]:
model.save('gender_classifier_test.h5')

In [9]:
classifier = keras.models.load_model('gender_classifier.h5')

## Evaluate Classifier

In [10]:
test_loss, test_acc = classifier.evaluate(validation_data,  validation_labels, verbose=2)

6/6 - 1s - loss: 0.1970 - accuracy: 0.9249 - 556ms/epoch - 93ms/step


In [11]:
print(f'Accuracy: {test_acc*100}%')

Accuracy: 92.48554706573486%


## <n>Accuracy</n> 92.5%