# <n><u>PARTE 3:</u></n> Deteccion de Genero
## Florencia Migues & Santiago Juani

## Imports

In [1]:
%load_ext tensorboard

In [2]:
import cv2
import os
import sys
from glob import glob
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

In [3]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.optimizers import SGD

## Data Loader

In [7]:
tl = pd.read_pickle(os.path.join(training_labels_dir, 'training_labels.pkl'))
vbbs = pd.read_pickle(os.path.join(validation_data_dir, 'validation_bbox.pickle'))
vl = pd.read_pickle(os.path.join(validation_labels_dir, 'validation_labels.pkl'))

tl.to_csv('training_labels.csv')
vbbs.to_csv('validation_bbox.csv')
vl.to_csv('validation_labels.csv')

In [4]:
def prep_image(img):
    if len(img.shape)==3 and img.shape[2]==3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return img/255.0

In [5]:
def load_training_data(training_data_path, training_labels_path):
    
    training_data = []
    training_labels = []
    
    images = sorted(glob(training_data_path + '/*'))
    labels = pd.read_pickle(os.path.join(training_labels_path, 'training_labels.pkl'))
    
    print(f'##Loading {len(images)} face images:', flush=True)
    for img_file in tqdm(images, total=len(images)):
        image_name = os.path.basename(img_file)
        
        image = cv2.imread(img_file)[...,::-1]
        image = cv2.resize(image, [128, 128], cv2.INTER_AREA)
        training_data.append(prep_image(image))
        
        label = labels.loc[labels["image_name"] == image_name]
        #training_labels.append(label['Male'].values[0])
        
        if label['Male'].values[0] == 'male':
            training_labels.append([1,0])
        else:
            training_labels.append([0,1])
        
    training_data = np.asarray(training_data)
    training_labels = np.asarray(training_labels)
    return training_data, training_labels


def load_validation_data(validation_data_path, validation_labels_path):
    
    val_imgs = []
    val_labels = []
    
    images = sorted(glob(validation_data_path + '/*.jpg')) 
    
    bbox_pkl = pd.read_pickle(os.path.join(validation_data_path,'validation_bbox.pickle'))
    labels = pd.read_pickle(os.path.join(validation_labels_path, 'validation_labels.pkl'))
    
    print(f'## Loading {len(images)} face images:', flush=True)
    for img_file in tqdm(images, total=len(images)):
        image_name = os.path.basename(img_file)
        
        big_image = prep_image(cv2.imread(img_file)[...,::-1])
        
        bbox = bbox_pkl.loc[bbox_pkl["image_id"]==image_name]
        cut_image = big_image[bbox['y_top'].values[0] : bbox['y_top'].values[0]+bbox['height'].values[0],
                              bbox['x_left'].values[0] : bbox['x_left'].values[0]+bbox['width'].values[0]]
        try:
            cut_image = cv2.resize(cut_image, [128, 128], cv2.INTER_AREA)
            val_imgs.append(cut_image)
            label = labels.loc[labels["image_name"] == image_name]
            #val_labels.append(label['Male'].values[0])

            if label['Male'].values[0] == 'male':
                val_labels.append([1,0])
            else:
                val_labels.append([0,1])
            
        except:
            pass

    val_imgs = np.asarray(val_imgs)
    val_labels = np.asarray(val_labels)
    return val_imgs, val_labels

## Ubicacion de los datos

In [4]:
data_dir = './'
training_data_dir = os.path.join(data_dir, 'face_detection/cropped_faces')
training_labels_dir = os.path.join(data_dir, 'gender_labels')

validation_data_dir = os.path.join(data_dir, 'face_detection/validation')
validation_labels_dir = os.path.join(data_dir, 'gender_labels')

## Load Data

In [7]:
training_data, training_labels = load_training_data(training_data_dir,
                                                    training_labels_dir)

validation_data, validation_labels = load_validation_data(validation_data_dir,
                                                          validation_labels_dir)

##Loading 9914 face images:


100%|██████████| 9914/9914 [00:21<00:00, 461.19it/s]


## Loading 175 face images:


100%|██████████| 175/175 [00:00<00:00, 267.75it/s]


## Modelo: Keras CNN

### Definition

In [41]:
model = Sequential()

model.add(Conv2D(32, (3,3), input_shape=(128, 128, 1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))

model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

opt = SGD(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',
             optimizer=opt,
             metrics=['accuracy'])

### Training

In [31]:
%tensorboard --logdir ./logs/ --host localhost

Reusing TensorBoard on port 6006 (pid 268), started 2 days, 22:07:56 ago. (Use '!kill 268' to kill it.)

In [10]:
if not os.path.exists('./checkpoints'):
    os.makedirs('./checkpoints')
check_point_model = os.path.join('./checkpoints','checkpoint_gender_model.hdf5')

In [12]:
if os.path.exists('./logs'):
    shutil.rmtree('./logs')
    
checkpoint = ModelCheckpoint(check_point_model,
                            monitor='val_loss',
                            verbose=1,
                            save_best_only=True,
                            mode='min')

history = model.fit(training_data, training_labels,
                    epochs=100,
                    batch_size=16,
                    validation_data=(validation_data, validation_labels),
                    callbacks=[checkpoint, TensorBoard(f'./logs/gender_model')])

Epoch 1/20
Epoch 1: val_loss improved from inf to 0.72526, saving model to ./checkpoints\checkpoint_gender_model.hdf5
Epoch 2/20
Epoch 2: val_loss improved from 0.72526 to 0.44104, saving model to ./checkpoints\checkpoint_gender_model.hdf5
Epoch 3/20
Epoch 3: val_loss improved from 0.44104 to 0.36890, saving model to ./checkpoints\checkpoint_gender_model.hdf5
Epoch 4/20
Epoch 4: val_loss did not improve from 0.36890
Epoch 5/20
Epoch 5: val_loss improved from 0.36890 to 0.33061, saving model to ./checkpoints\checkpoint_gender_model.hdf5
Epoch 6/20
Epoch 6: val_loss improved from 0.33061 to 0.29133, saving model to ./checkpoints\checkpoint_gender_model.hdf5
Epoch 7/20
Epoch 7: val_loss did not improve from 0.29133
Epoch 8/20
Epoch 8: val_loss did not improve from 0.29133
Epoch 9/20
Epoch 9: val_loss did not improve from 0.29133
Epoch 10/20
Epoch 10: val_loss did not improve from 0.29133
Epoch 11/20
Epoch 11: val_loss improved from 0.29133 to 0.25483, saving model to ./checkpoints\checkpo

KeyboardInterrupt: 

## Save/load model

In [28]:
model.save('gender_classifier.h5')

In [42]:
model.load_weights(os.path.join('./checkpoints','checkpoint_gender_model.hdf5'))

In [48]:
classifier = keras.models.load_model('gender_classifier.h5')

## Evaluate Classifier

In [51]:
test_loss, test_acc = classifier.evaluate(validation_data,  validation_labels, verbose=2)

6/6 - 0s - loss: 0.2285 - accuracy: 0.9368 - 486ms/epoch - 81ms/step


In [66]:
aciertos = 0

for i in range(len(validation_data)):
    score = classifier.predict(np.asarray([validation_data[i]]))[0]
    if (score[0]>=score[1] and validation_labels[i][0] > validation_labels[i][1]) or \
       (score[0]<score[1] and validation_labels[i][0] < validation_labels[i][1]):
        aciertos += 1

174 163


In [69]:
print(len(validation_data), aciertos)
print(f"Accuracy: {(aciertos*100)/len(validation_data)}%")

174 163
Accuracy: 93.67816091954023%


In [52]:
print(f'Accuracy: {test_acc*100}%')

Accuracy: 93.67815852165222%
