## Photo Labels
Use each photo on the `images` directory and obtain the labels from Betafaceapi.com

In [1]:
import requests
import os
import base64
import pandas as pd
import pickle
from tqdm import tqdm


In [2]:
if os.path.exists('data/image_labels.pkl'):
    print('Importing pickle...')
    f = open("data/image_labels.pkl","rb")
    
    image_labels = pickle.load(f)
    f.close()

else:
    print('Pickle not found, creating new dictionary')
    image_labels = dict()
    

Importing pickle...


In [3]:

headers = {
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.40 Safari/537.36'
}


counter = 0

for file in tqdm(sorted(os.listdir('images/'))):
    
    if file in image_labels and len(image_labels[file]) > 0:
        continue
        
    with open('images/' + file, 'rb') as file_stream:
        file_content = file_stream.read()
        
    data = {
        'api_key': "d45fd466-51e2-4701-8da8-04351c872236",
        'detection_flags': "cropface,recognition,content,classifiers,basicpoints,propoints",
        'original_filename': file,
        'file_base64': base64.b64encode(file_content).decode('ascii')
        
    }
    #print(file, data)
    
    response = requests.post('https://www.betafaceapi.com/api/v2/media', json=data, headers=headers)
    response_json = response.json()
    
    if 'media' not in response_json or 'faces' not in response_json['media'] or response_json['media']['faces'] is None or len(response_json['media']['faces']) == 0:
        print('Error detecting', file, response_json)
        image_labels[file] = {}
        continue
        
    face = response_json['media']['faces'][0]
    
    face_tags = {}
    
    for tag in face['tags']:
        face_tags[tag['name'] + '.label'] = tag['value']
        face_tags[tag['name'] + '.confidence'] = tag['confidence']
    
    image_labels[file] = face_tags
    
    
    if counter % 10 == 0:
        pd.DataFrame.from_dict(image_labels, orient='index').to_csv('data/image_labels.csv')

    counter = counter + 1
    
    

  0%|          | 5/1600 [00:06<33:10,  1.25s/it]Error detecting A5.jpg {'media': {'media_uuid': '8222099c-641a-4fb8-8151-21f1ace42eb8', 'checksum': '7c523f8504d1e55c6833f73594dd636487753924be1740337e0e0dc59abeb5f8', 'faces': None, 'tags': [{'name': 'adult content', 'value': 'no', 'confidence': 0.06, 'x': 0.0, 'y': 0.0, 'width': 0.0, 'height': 0.0, 'angle': 0.0, 'instance_id': 0, 'start': '00:00:00', 'duration': '00:00:00'}], 'original_filename': 'A5.jpg', 'duration': '00:00:00'}, 'recognize': None}
  2%|▏         | 30/1600 [00:09<23:58,  1.09it/s]Error detecting AL1.jpg {'media': {'media_uuid': '45f5e755-3c82-4232-992a-a18fffc2fdf9', 'checksum': '2ddcdb0dc064002eb3a7bf22c634e6ed725c0e899fe80ae327e1d0366cc675a8', 'faces': None, 'tags': [{'name': 'adult content', 'value': 'no', 'confidence': 0.01, 'x': 0.0, 'y': 0.0, 'width': 0.0, 'height': 0.0, 'angle': 0.0, 'instance_id': 0, 'start': '00:00:00', 'duration': '00:00:00'}], 'original_filename': 'AL1.jpg', 'duration': '00:00:00'}, 'recogni

In [7]:
df = pd.DataFrame.from_dict(image_labels, orient='index')
df[['race.label', 'race.confidence', 'gender.label', 'gender.confidence']].head(20)

Unnamed: 0,race.label,race.confidence,gender.label,gender.confidence
A1.jpg,asian,1.0,male,1.0
A2.jpg,white,1.0,female,0.05
A3.jpg,white,1.0,male,0.21
A4.jpg,white,1.0,male,0.94
A6.jpg,white,0.73,male,0.95
AD1.jpg,white,1.0,male,0.89
AD10.jpg,asian,1.0,male,0.16
AD2.jpg,black,0.55,female,1.0
AD3.jpg,white,1.0,male,0.56
AD4.jpg,white,1.0,male,0.57


In [9]:
df.to_csv('data/image_labels.csv')

In [10]:
f = open("data/image_labels.pkl","wb")
pickle.dump(image_labels, f)
f.close()
