In [1]:
import sys
import json
from urllib.request import Request, urlopen
import pandas as pd
from census_name_guesser import CensusNameGuesser
from ethnicolr import census_ln, pred_census_ln
import numpy as np
import cv2

In [2]:
import deepface.DeepFace as DeepFace

In [3]:
guesser = CensusNameGuesser()

In [4]:
with open(sys.path[0] + '/../data/columbia/columbia_w_profiles_labeled.json') as f:
    columbia = json.load(f)

In [5]:
tmp_prof = columbia[0].get('profiles')[2]

In [6]:
tmp_prof

{'name': 'Matthias Breuer',
 'title': 'Assistant Professor of Business',
 'img': 'https://www8.gsb.columbia.edu/cbs-directory/photo/mb4468/110x90',
 'rank': 'assistant professor',
 'first_name': 'Matthias',
 'middle_name': None,
 'last_name': 'Breuer'}

# Gender Race IMG

In [7]:
url = tmp_prof['img']
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
arr = np.asarray(bytearray(webpage), dtype=np.uint8)
img = cv2.imdecode(arr, -1)

In [8]:
obj = DeepFace.analyze(img_path=img, actions=['gender', 'race'])

Action: race: 100%|██████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.26it/s]


In [9]:
obj

{'gender': {'Woman': 0.031784133170731366, 'Man': 99.96821880340576},
 'dominant_gender': 'Man',
 'region': {'x': 21, 'y': 14, 'w': 55, 'h': 55},
 'race': {'asian': 0.011072922575908755,
  'indian': 0.0009576589607251095,
  'black': 0.00011164505150806698,
  'white': 99.25931687708598,
  'middle eastern': 0.36109252656802693,
  'latino hispanic': 0.3674512948066768},
 'dominant_race': 'white'}

In [14]:
race_img = obj.get('race')

In [15]:
def format_race_img(dictionary):
    new_dict = {}
    new_dict['pctwhite'] = max(dictionary.get('white'), dictionary.get('middle eastern'))
    new_dict['pctblack'] = dictionary.get('black')
    new_dict['pctapi'] = max(dictionary.get('asian'), dictionary.get('indian'))
    new_dict['pcthispanic'] = dictionary.get('latino hispanic')
    return new_dict

In [16]:
race_img = format_race_img(race_img)

In [17]:
race_img

{'pctwhite': 99.25931687708598,
 'pctblack': 0.00011164505150806698,
 'pctapi': 0.011072922575908755,
 'pcthispanic': 0.3674512948066768}

In [27]:
gender_img = obj.get('gender')

In [28]:
gender_img

{'Woman': 0.031784133170731366, 'Man': 99.96821880340576}

In [29]:
def format_gender_img(dictionary):
    new_dict = {}
    new_dict['pctmale'] = dictionary.get('Man')
    new_dict['pctfemale'] = dictionary.get('Woman')
    return new_dict

In [30]:
gender_img = format_gender_img(gender_img)

In [31]:
gender_img

{'pctmale': 99.96821880340576, 'pctfemale': 0.031784133170731366}

# Gender NAME

In [32]:
gender_name = guesser.gender_guesser(tmp_prof.get('first_name'))

In [33]:
gender_name

{'pctmale': 100.0, 'pctfemale': 0.0}

# Race NAME

In [34]:
race_name = guesser.race_guesser(tmp_prof.get('last_name'))

In [35]:
race_name

{'pctwhite': 95.58927668184117,
 'pctapi': 0.5665149215983815,
 'pctblack': 1.274658573596358,
 'pcthispanic': 2.0333839150227613,
 'pctaian': 0.5361659079413252}

In [36]:
print(race_img)
print(race_name)
print(gender_img)
print(gender_name)

{'pctwhite': 99.25931687708598, 'pctblack': 0.00011164505150806698, 'pctapi': 0.011072922575908755, 'pcthispanic': 0.3674512948066768}
{'pctwhite': 95.58927668184117, 'pctapi': 0.5665149215983815, 'pctblack': 1.274658573596358, 'pcthispanic': 2.0333839150227613, 'pctaian': 0.5361659079413252}
{'pctmale': 99.96821880340576, 'pctfemale': 0.031784133170731366}
{'pctmale': 100.0, 'pctfemale': 0.0}
