## Photo Labels
Use each photo on the `images` directory and obtain the labels from Betafaceapi.com

In [108]:
import requests
import os
import base64
import pandas as pd
import pickle
from tqdm import tqdm


In [117]:
if os.path.exists('data/image_labels.pkl'):
    print('Importing pickle...')
    f = open("data/image_labels.pkl","rb")
    
    image_labels = pickle.load(f)
    f.close()

else:
    print('Pickle not found, creating new dictionary')
    image_labels = dict()
    

Importing pickle...


In [118]:

headers = {
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.40 Safari/537.36'
}


counter = 0

for file in tqdm(sorted(os.listdir('images/'))):
    
    if file in image_labels and len(image_labels[file]) > 0:
        continue
        
    with open('images/' + file, 'rb') as file_stream:
        file_content = file_stream.read()
        
    data = {
        'api_key': "d45fd466-51e2-4701-8da8-04351c872236",
        'detection_flags': "cropface,recognition,content,classifiers,basicpoints,propoints",
        'original_filename': file,
        'file_base64': base64.b64encode(file_content).decode('ascii')
        
    }
    #print(file, data)
    
    response = requests.post('https://www.betafaceapi.com/api/v2/media', json=data, headers=headers)
    response_json = response.json()
    
    if 'media' not in response_json or 'faces' not in response_json['media'] or response_json['media']['faces'] is None or len(response_json['media']['faces']) == 0:
        print('Error detecting', file, response_json)
        image_labels[file] = {}
        continue
        
    face = response_json['media']['faces'][0]
    
    face_tags = {}
    
    for tag in face['tags']:
        face_tags[tag['name'] + '.label'] = tag['value']
        face_tags[tag['name'] + '.confidence'] = tag['confidence']
    
    image_labels[file] = face_tags
    
    
    if counter % 10 == 0:
        pd.DataFrame.from_dict(image_labels, orient='index').to_csv('data/image_labels.csv')

    counter = counter + 1
    
    

  0%|          | 1/1600 [00:01<29:25,  1.10s/it]

Error detecting A1.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 2/1600 [00:01<20:43,  1.29it/s]

Error detecting A2.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 3/1600 [00:02<17:31,  1.52it/s]

Error detecting A3.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 4/1600 [00:02<16:11,  1.64it/s]

Error detecting A4.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 5/1600 [00:03<15:15,  1.74it/s]

Error detecting A5.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 6/1600 [00:03<14:52,  1.79it/s]

Error detecting A6.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  0%|          | 7/1600 [00:04<14:24,  1.84it/s]

Error detecting AD1.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  2%|▏         | 30/1600 [00:04<01:44, 15.03it/s]

Error detecting AL1.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  2%|▏         | 32/1600 [00:05<02:14, 11.62it/s]

Error detecting AL2.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  2%|▏         | 37/1600 [00:05<02:23, 10.93it/s]

Error detecting ALA4.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  2%|▎         | 40/1600 [00:06<02:45,  9.41it/s]

Error detecting ALA7.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  3%|▎         | 42/1600 [00:06<03:25,  7.57it/s]

Error detecting ALA9.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  5%|▌         | 85/1600 [00:07<00:50, 30.13it/s]

Error detecting BATB3.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}
Error detecting BATB5.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


  6%|▌         | 89/1600 [00:08<01:23, 17.99it/s]

Error detecting BATB6.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


 10%|▉         | 152/1600 [00:08<00:32, 45.18it/s]

Error detecting BOBB1.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}
Error detecting BOBB2.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}
Error detecting BOBB3.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}
Error detecting BOBB4.jpg {'error_code': -57, 'error_description': 'you have reached your daily processed images limit'}


 10%|▉         | 155/1600 [00:10<01:38, 14.65it/s]


KeyboardInterrupt: 

In [114]:
df = pd.DataFrame.from_dict(image_labels, orient='index')
df[['race.label', 'race.confidence', 'gender.label', 'gender.confidence']].head(20)

Unnamed: 0,race.label,race.confidence,gender.label,gender.confidence
AD10.jpg,asian,1.0,male,0.16
AD2.jpg,black,0.55,female,1.0
AD3.jpg,white,1.0,male,0.56
AD4.jpg,white,1.0,male,0.57
AD5.jpg,white,0.92,female,0.19
AD6.jpg,white,1.0,male,0.54
AD7.jpg,white,1.0,male,1.0
AD8.jpg,asian,0.62,male,0.54
AD9.jpg,white,0.99,male,0.42
AHX1.jpg,white,1.0,male,0.92


In [115]:
df.to_csv('data/image_labels.csv')

In [116]:
f = open("data/image_labels.pkl","wb")
pickle.dump(image_labels, f)
f.close()
