https://www.thepythoncode.com/article/gender-detection-using-opencv-in-python

In [1]:
import sys
import json
import cv2
import time
import numpy as np
from urllib.request import Request, urlopen

In [11]:
# The gender model architecture
# https://drive.google.com/open?id=1W_moLzMlGiELyPxWiYQJ9KFaXroQ_NFQ
GENDER_MODEL = 'weights/deploy_gender.prototxt'
# The gender model pre-trained weights
# https://drive.google.com/open?id=1AW3WduLk1haTVAxHOkVS_BEzel1WXQHP
GENDER_PROTO = 'weights/gender_net.caffemodel'
# Each Caffe Model impose the shape of the input image also image preprocessing is required like mean
# substraction to eliminate the effect of illunination changes
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
# Represent the gender classes
GENDER_LIST = ['male', 'female']
# https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt
FACE_PROTO = "weights/deploy.prototxt.txt"
# https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel
FACE_MODEL = "weights/res10_300x300_ssd_iter_140000_fp16.caffemodel"

In [12]:
# load face Caffe model
face_net = cv2.dnn.readNetFromCaffe(FACE_PROTO, FACE_MODEL)
# Load gender prediction model
gender_net = cv2.dnn.readNetFromCaffe(GENDER_MODEL, GENDER_PROTO)

In [13]:
# https://stackoverflow.com/questions/44650888/resize-an-image-without-distortion-opencv
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]
    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image
    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)
    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))
    # resize the image
    return cv2.resize(image, dim, interpolation = inter)

In [14]:
def get_face(frame, confidence_threshold=0.5):
    # convert the frame into a blob to be ready for NN input
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104, 177.0, 123.0))
    # set the image as input to the NN
    face_net.setInput(blob)
    # perform inference and get predictions
    output = np.squeeze(face_net.forward())
    # initialize the result list
    # Loop over the faces detected
    for i in range(output.shape[0]):
        confidence = output[i, 2]
        if confidence > confidence_threshold:
            box = output[i, 3:7] * \
                np.array([frame.shape[1], frame.shape[0],
                         frame.shape[1], frame.shape[0]])
            # convert to integers
            start_x, start_y, end_x, end_y = box.astype(np.int)
            # widen the box a little
            start_x, start_y, end_x, end_y = start_x - \
                10, start_y - 10, end_x + 10, end_y + 10
            start_x = 0 if start_x < 0 else start_x
            start_y = 0 if start_y < 0 else start_y
            end_x = 0 if end_x < 0 else end_x
            end_y = 0 if end_y < 0 else end_y
            # append to our list
            return start_x, start_y, end_x, end_y
    raise Exception("Face not found")

In [15]:
def predict_gender(url):
    """Predict the gender of the faces showing in the image"""
    # Read Input Image
    # https://stackoverflow.com/questions/21061814/how-can-i-read-an-image-from-an-internet-url-in-python-cv2-scikit-image-and-mah
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    webpage = urlopen(req).read()
    arr = np.asarray(bytearray(webpage), dtype=np.uint8)
    img = cv2.imdecode(arr, -1)
    # grey scale --> convert to 3 channels
    if len(img.shape) == 2:
        img = cv2.merge((img,img,img))
    
    # Initialize frame size
    frame_width = 640
    frame_height = 360

    # Take a copy of the initial image and resize it
    frame = img.copy()
    #if frame.shape[1] > frame_width:
    frame = image_resize(frame, width=frame_width)
    # predict the faces
    start_x, start_y, end_x, end_y = get_face(frame)
    face_img = frame[start_y: end_y, start_x: end_x]
    
    # image --> Input image to preprocess before passing it through our dnn for classification.
    # scale factor = After performing mean substraction we can optionally scale the image by some factor. (if 1 -> no scaling)
    # size = The spatial size that the CNN expects. Options are = (224*224, 227*227 or 299*299)
    # mean = mean substraction values to be substracted from every channel of the image.
    # swapRB=OpenCV assumes images in BGR whereas the mean is supplied in RGB. To resolve this we set swapRB to True.
    blob = cv2.dnn.blobFromImage(image=face_img, scalefactor=1.0, size=(
            227, 227), mean=MODEL_MEAN_VALUES, swapRB=False, crop=False)

    gender_net.setInput(blob)
    gender_preds = gender_net.forward()
    i = gender_preds[0].argmax()
    gender = GENDER_LIST[i]
    gender_confidence_score = gender_preds[0][i]
    return gender, gender_confidence_score
        
    # Cleanup
    cv2.destroyAllWindows()

In [16]:
with open(sys.path[0] + '/../data/columbia_test_sans_processing_w_name_gender_race.json') as f:
    columbia = json.load(f)

In [17]:
for department in columbia:
    print(department.get('department'))
    start = time.time()
    if department.get('profiles') is not None:
        for p in department.get('profiles'):
            gender = None,
            gender_confidence_score = None
            img_url = p.get('img')
            try:
                gender, gender_confidence_score = predict_gender(img_url)
                p.update({'gender_img':gender, 'gender_confidence_score_img':str(gender_confidence_score_img)})
            except:
                p.update({'gender_img':None, 'gender_confidence_score_img':None})
                pass
        end = time.time()
        print(f'Time taken = {end-start:.1f} seconds for {len(department.get("profiles"))} profiles')
    else:
        print("No profiles")
    print("____________________________________")

Accounting Division
Time taken = 21.8 seconds for 25 profiles
____________________________________
African American and African Diaspora Studies Department
Time taken = 8.3 seconds for 14 profiles
____________________________________
Africana Studies (Barnard College)
Time taken = 1.9 seconds for 5 profiles
____________________________________
Anthropology (Barnard College)
Time taken = 4.4 seconds for 9 profiles
____________________________________
Anthropology Department
No profiles
____________________________________
Applied Physics and Applied Mathematics Department
Time taken = 4.0 seconds for 11 profiles
____________________________________
Architecture (Barnard College)
Time taken = 3.4 seconds for 15 profiles
____________________________________
Art History (Barnard College)
No profiles
____________________________________
Art History and Archaeology Department
No profiles
____________________________________
Asian and Middle Eastern Cultures (Barnard College)
No profiles
____

In [None]:
with open(sys.path[0] + '/../data/columbia_test_sans_processing_w_name_gender_race_img.json', 'w') as f:
    json.dump(columbia, f)