### This code use a trained model EfficientNetB1-gender-98.00.h5 with an F1 score on the test set of 98% to make predictions on either a single image or on multiple images. When multiple images are used they should be of the SAME person. The kernel has a function crop which will crop the image to be just that of the face. See directory images to predict for the types of images to use. If the images you have are already cropped to the face then set crop=False in the kernels parameter list.

In [None]:
!pip install mtcnn

In [None]:
from mtcnn import MTCNN
import os
import numpy as np
import pandas as pd
import cv2 as cv2
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from tensorflow.keras.models import Model, load_model
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
pd.set_option('display.max_columns', 80)

### define a function that takes in an image and detects the faces in the image
### if there are multiple faces in the image it selects the faces having the most
### pixels in it and returns that as the cropped facial image

In [None]:
def crop(img):
    # x, y, width, height = result['box']
    s=1.2
    height=img.shape[0]
    width=img.shape[1]    
    detector = MTCNN()
    data=detector.detect_faces(img) 
    if data==[]:
        return False, None
    else:
        for i, faces in enumerate(data): # iterate through all the faces found
            box=faces['box']  # get the box for each face
            biggest=0                    
            area = box[2] * box[3]
            if area>biggest:
                biggest=area
                bbox=box
            x,y,w,h=bbox  
            xn=int(x +w/2)-int(w * s/2)
            yn=int(y+h/2)- int(h * s/2)
            xen=int(x +w/2) + int(w * s/2)
            yen=int(y+h/2) + int(h * s/2)
            bbox[0]= 0 if bbox[0]<0 else bbox[0]
            xn=0 if xn<0 else xn
            yn=0 if yn<0 else yn
            xen= width if xen>width else xen
            yen= height if yen>height else yen
            img=img[yn:yen, xn:xen]            
            return True, img

In [None]:
def classify(sdir, csv_path,  model_path, name, crop_image = False):    
    # read in the csv file
    e=1.2
    class_df=pd.read_csv(csv_path)    
    img_height=int(class_df['height'].iloc[0])
    img_width =int(class_df['width'].iloc[0])
    img_size=(img_height, img_width)    
    scale=class_df['scale by'].iloc[0]     
    try: 
        s=int(scale)
        s2=s
        s1=0
    except:
        split=scale.split('-')
        s1=float(split[1])
        s2=float(split[0].split('*')[1]) 
    
    path_list=[]
    paths=os.listdir(sdir)
    for f in paths:
        path_list.append(os.path.join(sdir,f))
    print (' Model is being loaded- this will take about 10 seconds')
    model=load_model(model_path)
    image_count=len(path_list) 
    index_list=[] 
    prob_list=[]
    cropped_image_list=[]
    good_image_count=0
    for i in range (image_count):       
        img=plt.imread(path_list[i])        
        if crop_image == True:
            status, img=crop(img)            
        else:
            status=True
        if status== True:
            good_image_count +=1
            img=cv2.resize(img, img_size)             
            cropped_image_list.append(img)
            img=img*s2 - s1
            img=np.expand_dims(img, axis=0)
            p= np.squeeze (model.predict(img))           
            index=np.argmax(p)             
            prob=p[index]
            index_list.append(index)
            prob_list.append(prob)
    if good_image_count==1:        
        class_name= class_df['class'].iloc[index_list[0]]
        probability= prob_list[0]
        img=cropped_image_list [0]/255 
        plt.title(class_name, color='blue', fontsize=16)
        plt.axis('off')
        plt.imshow(img)
        return class_name, probability
    elif good_image_count == 0:
        return None, None
    most=0
    for i in range (len(index_list)-1):
        key= index_list[i]
        keycount=0
        for j in range (i+1, len(index_list)):
            nkey= index_list[j]            
            if nkey == key:
                keycount +=1                
        if keycount> most:
            most=keycount
            isave=i             
    best_index=index_list[isave]    
    psum=0
    bestsum=0
    for i in range (len(index_list)):
        psum += prob_list[i]
        if index_list[i]==best_index:
            bestsum += prob_list[i]
        else:
            bestsum += 1-prob_list[i]
    img= cropped_image_list[isave]/255    
    class_name=class_df['class'].iloc[best_index]
    plt.title(class_name, color='blue', fontsize=16)
    plt.axis('off')
    plt.imshow(img)
    return class_name, bestsum/image_count

### code below process 5 images of Elizabet Hurley from the images to predict directory.
### images should be different images of the same person. The kernel independently
### predicts each image along with its probability and provides an averaged probability
### result.

In [None]:
predict_dir=r'../input/gender-classification-from-an-image/gender_rev2/images to predict'  
name='Elizabeth Hurley '
mloc=r'../input/gender-classification-from-an-image/gender_rev2/EfficientNetB1-gender-98.00.h5'
csvloc=r'../input/gender-classification-from-an-image/gender_rev2/class_dict.csv'
result, probability=classify(predict_dir, csvloc,mloc, name, crop_image=True)
print (f' {name} averaged predicted is {result} with a probability of {probability * 100:5.2f} %')

### code below demonstrates use with a single image

In [None]:
predict_dir=r'../input/gender-classification-from-an-image/gender_rev2/single image to predict'
name='Elizabeth Hurley'
result, probability=classify(predict_dir, csvloc,mloc, name, crop_image=True)
print (f' {name} is predicted as being {result} with a probability of {probability * 100:5.2f} %')