<a href="https://colab.research.google.com/github/justadudewhohacks/ipynbs/blob/master/age_gender_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [0]:
!pip install -U -q PyDrive
!pip install git+https://github.com/justadudewhohacks/image_augment.py
!pip install git+https://github.com/justadudewhohacks/colabsnippets

# Download Data

In [0]:
from colabsnippets.DataDownloader import DataDownloader
from colabsnippets.utils import load_json

data_downloader = DataDownloader(data_dir = './data')

data_downloader.drive.CreateFile({ 'id': '17BPRF73QXEP65NP3sWwBNPbKjT1MJzuM' }).GetContentFile('./age_gender_ethnicity_dbs.json')
dbs = load_json('./age_gender_ethnicity_dbs.json')

data_downloader.download_data(dbs)

print('downloading data split and labels ...')
data_downloader.drive.CreateFile({ 'id': '1Vndxi_V0s3USsE9c6evRAPQHZB1rg-HC' }).GetContentFile('./data/trainData.json')
data_downloader.drive.CreateFile({ 'id': '1jd7lYHv0pR4nlNFLgBeHHInHeoGKKL7T' }).GetContentFile('./data/testData.json')
data_downloader.drive.CreateFile({ 'id': '1z2C1M0zyf9xF2faCQJUsaFYx9u9vSDep' }).GetContentFile('./data/chalearn/labels.json')
data_downloader.drive.CreateFile({ 'id': '1SC3PuQj-CQb4O87YHIJDu1J7APZvQRsa' }).GetContentFile('./data/wiki/labels.json')
data_downloader.drive.CreateFile({ 'id': '1KeqNCL35SR5MeJkkdEiqLxBhZihDn4Sq' }).GetContentFile('./data/imdb/labels.json')
data_downloader.drive.CreateFile({ 'id': '1nU6xuPV2R-TRg388eGftgGwLrj-n2Ra_' }).GetContentFile('./data/megaage/labels.json')
data_downloader.drive.CreateFile({ 'id': '1_SzULpNyws920UUZdcVDKBGDvDNpFSf0' }).GetContentFile('./data/megaage-asian/labels.json')

print('done!')

downloading data for db: fgnet
downloading data for shard 0
unzipping images done in 0.9271001815795898s
unzipping landmarks done in 0.26310300827026367s
downloading data for db: chalearn
downloading data for shard 0
unzipping images done in 14.007997274398804s
unzipping landmarks done in 9.231802463531494s
downloading data for db: utk
downloading data for shard 0
unzipping images done in 16.64055633544922s
unzipping landmarks done in 13.267534732818604s
downloading data for shard 1
unzipping images done in 16.76087999343872s
unzipping landmarks done in 12.440708637237549s
downloading data for shard 2
unzipping images done in 16.804337739944458s
unzipping landmarks done in 12.240557670593262s
downloading data for db: megaage
downloading data for shard 0
unzipping images done in 13.258168458938599s
unzipping landmarks done in 11.733147382736206s
downloading data for shard 1
unzipping images done in 12.739650964736938s
unzipping landmarks done in 10.997202634811401s
downloading data for 

# Training

## Common

In [0]:
import cv2
import math
import json
import random
import time
import types
import os
import numpy as np
import tensorflow as tf
from augment import ImageAugmentor
from colabsnippets.utils import load_json
from colabsnippets import BatchLoader

'''
--------------------------------------------------------------------------------

Data Loader

--------------------------------------------------------------------------------
'''


chalearn_labels = load_json('./data/chalearn/labels.json')
wiki_labels = load_json('./data/wiki/labels.json')
imdb_labels = load_json('./data/imdb/labels.json')
megaage_labels = load_json('./data/megaage/labels.json')
megaage_asian_labels = load_json('./data/megaage-asian/labels.json')

# 0 male, 1 female
def gender_code_to_label(gender_code, safe_mode = True):
  gender = 'female' if gender_code == 1 else ('male' if gender_code == 0 else None)
  if gender is None and safe_mode:
    raise Exception ("gender_code_to_label - invalid gender code '{}'".format(gender_code))
  return gender
  
# 0 male, 1 female
def get_gender_one_hot(label):
  if label == 'male':
    return [1, 0]
  if label == 'female':
    return [0, 1]
  raise Exception('unknown gender label: ' + str(label))
  
  
def extract_data_labels(data):
  db = data['db']
  img_file = data['file']

  if db == 'utk':
    age = int(float(img_file.split('_')[0]))
    gender_code = int(img_file.split('_')[1])
    gender = gender_code_to_label(gender_code, safe_mode = False)
    if gender is None:
      print ("utk invalid gender code '{}' for file: {}".format(gender_code, img_file))
      return age, None
    return age, gender
  if db == 'fgnet':
    age = int(float(img_file.split('_')[0].split('A')[1][0:2]))
    return age, None
  elif db == 'chalearn':
    age = chalearn_labels[img_file]['realAge']
    gender = chalearn_labels[img_file]['gender']
    return age, gender
  elif db == 'megaage':
    age = megaage_labels[img_file]
    return age, None
  elif db == 'megaage-asian':
    age = megaage_asian_labels[img_file]
    return age, None
  elif db == 'wiki':
    age = wiki_labels[img_file]['age']
    gender = wiki_labels[img_file]['gender']
    return age, gender
  elif db == 'imdb':
    age = imdb_labels[img_file]['age']
    gender = imdb_labels[img_file]['gender']
    return age, gender
  elif db == 'cacd':
    age = int(img_file[0:2])
    return age, None
  else: raise Exception('unknown db: ' + db)
    
def resolve_image_path(data):
  db = data['db']
  img_file = data['file']
  img_dir = "images-shard{}".format(data['shard']) if 'shard' in data else 'cropped-images'
  img_path = "./data/{}/{}/{}".format(db, img_dir, img_file)
  return img_path

def min_bbox(landmarks):
  min_x, min_y, max_x, max_y = 1.0, 1.0, 0, 0
  for pt in landmarks:
    min_x = pt['x'] if pt['x'] < min_x else min_x
    min_y = pt['y'] if pt['y'] < min_y else min_y
    max_x = max_x if pt['x'] < max_x else pt['x']
    max_y = max_y if pt['y'] < max_y else pt['y']

  return [min_x, min_y, max_x, max_y]

def augment_image_factory(image_augmentor):
  def augment_image(img, data):
    db = data['db']
    img_file = data['file']
    file_suffix = 'chip_0' if db == 'utk' else ('face_0' if db == 'appareal' else '')
    landmarks_file = img_file.replace(file_suffix + '.jpg', file_suffix + '.json')
    landmarks_dir = "landmarks-shard{}".format(data['shard']) if 'shard' in data else 'landmarks'
    landmarks_path = "./data/{}/{}/{}".format(db, landmarks_dir, landmarks_file)
    return image_augmentor.augment(img, random_crop = min_bbox(load_json(landmarks_path)))
  return augment_image

class DataLoader(BatchLoader):
  def __init__(self, data, image_augmentor = None, start_epoch = None, is_test = False):   
    BatchLoader.__init__(
      self, 
      data if type(data) is types.FunctionType else lambda: data, 
      resolve_image_path, 
      extract_data_labels, 
      augment_image = augment_image_factory(image_augmentor) if image_augmentor is not None else None, 
      start_epoch = start_epoch, 
      is_test = is_test
    )


'''
--------------------------------------------------------------------------------

utility

--------------------------------------------------------------------------------
'''

def gpu_session(callback):
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  config.allow_soft_placement = True
  config.log_device_placement = True
  with tf.Session(config = config) as session:
    with tf.device('/gpu:0'):
      return callback(session)

def get_checkpoint(epoch):
  return model_name + '.ckpt-' + str(epoch)

def filter_data_in_age_range(data, min_age, max_age):
  filtered_data = []
  for d in data:
    age, _ = extract_data_labels(d)
    if (min_age <= age and age <= max_age):
      filtered_data.append(d)
  return filtered_data

def forward_dbs_factory(all_data, dbs, transform_label = lambda x: x, transform_prediction = lambda x: x):
  def forward_dbs(forward):
    results = []

    for db in dbs:
      db_data = []
      for data in all_data:
        if data['db'] == db:
          db_data.append(data)

      data_loader = DataLoader(db_data, is_test = True)

      db_results = []
      next_batch = data_loader.next_batch(batch_size, image_size = image_size)
      while next_batch != None:
        batch_x, batch_y = next_batch
        pred_age, pred_gender = forward(batch_x)
        for idx, y in enumerate(batch_y):
          db_results.append((db, transform_label(y), transform_prediction(pred_age[idx], pred_gender[idx])))
        next_batch = data_loader.next_batch(batch_size, image_size = image_size)

      results += db_results

    return results
  return forward_dbs

## Train Age Range Regressor

In [0]:
from colabsnippets.age_gender_recognition import AgeGenderXceptionTiny
from colabsnippets.utils import shuffle_array

tf.reset_default_graph()

net = AgeGenderXceptionTiny(num_blocks = 2)
model_name = net.name + '_augmented4_blocks2_5_5000_rgb'

# training parameters
learning_rate = 0.00001
start_epoch = 164
end_epoch = 2000
batch_size = 32
image_size = 112

image_augmentor = ImageAugmentor.load('./augmentor_4.json')

train_data = load_json('./data/trainData.json')

def get_epoch_data():
  age_category_range = 5
  max_per_category = 5000
  num_bins = int(120 / age_category_range)
  data_by_age_category = []
  for i in range(0, num_bins):
    data_by_age_category.append([])
  
  for data in train_data:
    true_age, _ = extract_data_labels(data)
    hist_bin = int(math.floor(true_age / age_category_range))
    data_by_age_category[hist_bin].append(data)
  
  epoch_data = []
  for datas in data_by_age_category:
    epoch_data += shuffle_array(datas)[0:max_per_category]
    
  return epoch_data
  
data_loader = DataLoader(get_epoch_data, start_epoch = start_epoch, image_augmentor = image_augmentor)
net.init_trainable_weights()
#net.load_weights('./feature_extractor_dense_mobilenet_4_4_augmented4')

X = tf.placeholder(tf.float32, [batch_size, image_size, image_size, 3])
AGE = tf.placeholder(tf.float32, [batch_size])
GENDER_ONE_HOT = tf.placeholder(tf.float32, [batch_size, 2])
GENDER_MASK = tf.placeholder(tf.float32, [batch_size])
age_op, gender_op = net.forward(X)
age_loss_op = tf.losses.absolute_difference(AGE, age_op)
gender_loss_op = tf.losses.softmax_cross_entropy(GENDER_ONE_HOT, gender_op, weights=GENDER_MASK)
gender_loss_weight = 1.0
joint_loss_op = age_loss_op + (gender_loss_weight * gender_loss_op)
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(joint_loss_op)

saver = tf.train.Saver(max_to_keep = None)
  
print(len(data_loader.buffered_data))
log_file = open('./log.txt', 'w')

def train(sess):
  total_loss = 0
  total_age_loss = 0
  total_gender_loss = 0
  total_gender_correct_pred = 0
  num_gender_labels = 0
  iteration_count = 0
  pred_count = 0
  ts_epoch = time.time()
  
  sess.run(tf.global_variables_initializer())

  if (start_epoch != 0):
    checkpoint = get_checkpoint(start_epoch - 1)
    saver.restore(sess, checkpoint)
    print('done restoring session')

  while data_loader.epoch <= end_epoch:
    epoch = data_loader.epoch
    current_idx = data_loader.current_idx
    end_idx = data_loader.get_end_idx()

    ts = time.time()

    batch_x, batch_y = data_loader.next_batch(batch_size, image_size)
    batch_age = []
    batch_gender_one_hot = []
    batch_gender_mask = []
    for age_label, gender_label in batch_y:
      has_gender_label = gender_label is not None
      batch_age.append(age_label)
      batch_gender_one_hot.append(get_gender_one_hot(gender_label) if has_gender_label else [0, 0])
      batch_gender_mask.append(1.0 if has_gender_label else 0.0)
      num_gender_labels += (1.0 if has_gender_label else 0.0)
      
    age_loss, gender_loss, loss, gender, _ = sess.run([age_loss_op, gender_loss_op, joint_loss_op, gender_op, train_op], feed_dict = { X: batch_x, AGE: batch_age, GENDER_ONE_HOT: batch_gender_one_hot, GENDER_MASK: batch_gender_mask })

    total_loss += loss
    total_age_loss += age_loss
    total_gender_loss += gender_loss
    iteration_count += 1
    
    for batch_idx, pred_gender in enumerate(gender):
      pred_gender = gender_code_to_label(np.argmax(pred_gender))
      true_gender = gender_code_to_label(np.argmax(batch_gender_one_hot[batch_idx]))
      if batch_gender_mask[batch_idx] == 1.0 and pred_gender == true_gender:
        total_gender_correct_pred += 1.0

    log_file.write("epoch " + str(epoch) + ", (" + str(current_idx) + " of " + str(end_idx) + "), loss= " + "{:.4f}".format(loss) 
          + ", time= " + str((time.time() - ts) * 1000) + "ms \n")

    if epoch != data_loader.epoch:
      avg_loss = total_loss / iteration_count
      avg_age_loss = total_age_loss / iteration_count
      avg_gender_loss = total_gender_loss / iteration_count
      avg_gender_accuracy = total_gender_correct_pred / num_gender_labels
      print('next epoch: ' + str(data_loader.epoch))
      print("avg_loss= {:.4f}".format(avg_loss))
      print("avg_age_loss= {:.4f}".format(avg_age_loss))
      print("avg_gender_loss= {:.4f}".format(avg_gender_loss))
      print("avg_gender_accuracy= {:.4f}".format(avg_gender_accuracy))
      saver.save(sess, model_name + '.ckpt', global_step = epoch)
      epoch_txt_file_path = 'epoch_' + str(epoch) + '.txt'
      epoch_txt = open(epoch_txt_file_path, 'w')
      epoch_txt.write("total_loss= {:.4f}\n".format(total_loss))
      epoch_txt.write("avg_loss= {:.4f}\n".format(avg_loss))
      epoch_txt.write("avg_age_loss= {:.4f}\n".format(avg_age_loss))
      epoch_txt.write("avg_gender_loss= {:.4f}\n".format(avg_gender_loss))
      epoch_txt.write("avg_gender_accuracy= {:.4f}\n".format(avg_gender_accuracy))
      epoch_txt.write("learning_rate= {}\n".format(learning_rate))
      epoch_txt.write("batch_size= {}\n".format(batch_size))
      epoch_txt.write("epoch_time= {}\n".format(time.time() - ts_epoch))
      epoch_txt.close()

      total_loss = 0
      total_age_loss = 0
      total_gender_loss = 0
      total_gender_correct_pred = 0
      
      num_gender_labels = 0
      iteration_count = 0        
      ts_epoch = time.time()
        
  print('done!')
  log_file.close() 
    
gpu_session(train)

71292
INFO:tensorflow:Restoring parameters from age_gender_xception_tiny_augmented4_blocks2_5_5000_rgb.ckpt-163
done restoring session
next epoch: 165
avg_loss= 4.4457
avg_age_loss= 4.2168
avg_gender_loss= 0.2289
avg_gender_accuracy= 0.9166
INFO:tensorflow:age_gender_xception_tiny_augmented4_blocks2_5_5000_rgb.ckpt-164 is not in all_model_checkpoint_paths. Manually adding it.
next epoch: 166
avg_loss= 4.4475
avg_age_loss= 4.2263
avg_gender_loss= 0.2212
avg_gender_accuracy= 0.9197
INFO:tensorflow:age_gender_xception_tiny_augmented4_blocks2_5_5000_rgb.ckpt-165 is not in all_model_checkpoint_paths. Manually adding it.
next epoch: 167
avg_loss= 4.4639
avg_age_loss= 4.2387
avg_gender_loss= 0.2252
avg_gender_accuracy= 0.9200
INFO:tensorflow:age_gender_xception_tiny_augmented4_blocks2_5_5000_rgb.ckpt-166 is not in all_model_checkpoint_paths. Manually adding it.
next epoch: 168
avg_loss= 4.4406
avg_age_loss= 4.2141
avg_gender_loss= 0.2265
avg_gender_accuracy= 0.9177
INFO:tensorflow:age_gender_

# Testing

## Forward Age Range Regressor

In [0]:
from colabsnippets.nn import XceptionTiny
from colabsnippets.utils import forward_factory

# inputs
# ------------------------------------------------------------------

min_age = 0
max_age = 150
delta = 0
net = AgeGenderXceptionTiny(num_blocks = 2)
model_name = net.name + '_augmented4_blocks2_5_5000_rgb'
#net = Xception(num_blocks = 2)
#model_name = net.name + '_augmented4_tiny_blocks2'

predict_age = lambda X: net.forward(X)
transform_label = lambda labels: labels
transform_prediction = lambda pred_age, pred_gender: (pred_age, gender_code_to_label(np.argmax(pred_gender)))

start_epoch = 163
end_epoch = 163

batch_size = 32
image_size = 112

tf.reset_default_graph()

dbs = ['utk', 'fgnet', 'chalearn', 'wiki', 'cacd', 'imdb', 'megaage', 'megaage-asian']
test_data = filter_data_in_age_range(load_json('./data/testData.json'), min_age - delta, max_age + delta)

forward_dbs = forward_dbs_factory(test_data, dbs = dbs, transform_label = transform_label, transform_prediction = transform_prediction)

for epoch in range(start_epoch, end_epoch - 1, -1):
  print(epoch)
  tf.reset_default_graph()
  net.init_trainable_weights()

  forward = forward_factory(predict_age, batch_size, image_size)
  saver = tf.train.Saver(max_to_keep = None)

  def test(sess):
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, get_checkpoint(epoch))
    return forward_dbs(lambda batch_x: forward(sess, batch_x))

  results = gpu_session(test)
  np.save(model_name + '_test_epoch_' + str(epoch) + '.pkl', results)

  

## Evaluate

In [0]:
# inputs
# ------------------------------------------------------------------
net = AgeGenderXceptionTiny(num_blocks = 2)
model_name = net.name + '_augmented4_blocks2_5_5000_rgb'
#net = Xception(num_blocks = 2)
#model_name = net.name + '_augmented4_tiny_blocks2'
start_epoch = 163
end_epoch = 0
min_age = 0
max_age = 80

compute_loss = lambda y, x: abs(y - x)
#compute_loss = lambda y, x: 1 if abs(y - x) < 1 else 0

#get_test_result_file_name = lambda epoch: 'dense_mobilenet_4_4_augmented4_test_epoch_132.npy'
#get_test_result_file_name = lambda epoch: 'dense_mobilenet_4_4_augmented4_test_epoch_132.npy'
#get_test_result_file_name = lambda epoch: 'age_category_classifier_augmented4_test_epoch_51.npy'
#get_test_result_file_name = lambda epoch: 'age_category_classifier_lg_augmented4_test_epoch_94.npy'
get_test_result_file_name = lambda epoch: model_name + '_test_epoch_' + str(epoch) + '.pkl.npy'
# ------------------------------------------------------------------


def extract_hist_results(results, category_range, target_db = None):
  num_bins = int((max_age - min_age) / category_range)
  counts = np.zeros(num_bins)
  acc_hist = np.zeros(num_bins)
  
  num_gender_preds = np.zeros(num_bins)
  num_correct_gender_preds = np.zeros(num_bins)
  
  for result in results:
    db, labels, predictions = result
    pred_age, pred_gender = predictions
    true_age, true_gender = labels
    true_age, pred_age = float(true_age), float(pred_age)
    
    if (true_age < min_age or true_age >= max_age):
      continue
    
    if target_db is not None and db != target_db:
      continue

    err = abs(true_age - pred_age)
    hist_bin = int(math.floor((true_age - min_age) / category_range))
    counts[hist_bin] += 1
    acc_hist[hist_bin] += err
    
    if true_gender is not None:
      num_gender_preds[hist_bin] += 1
      num_correct_gender_preds[hist_bin] += (1 if true_gender == pred_gender else 0)

  for i in range(0, len(counts)):
    if counts[i] == 0:
      counts[i] = 1
    if num_gender_preds[i] == 0:
      num_gender_preds[i] = 1
    
  category_maes = np.divide(acc_hist, counts)
  avg_category_maes = category_maes.sum() / num_bins
  category_gender_accuracy = np.divide(num_correct_gender_preds, num_gender_preds)
  gender_accuracy = category_gender_accuracy.sum() / num_bins
  
  upper = 1.4
  lower = 0.8
  
  weighted_category_maes = (category_maes * np.arange(upper, lower, -((upper - lower) / num_bins) )).sum() / num_bins
  mae = acc_hist.sum() / counts.sum()
  return mae, category_maes, avg_category_maes, weighted_category_maes, category_gender_accuracy, gender_accuracy

def evaluate_model_results(file):
  results = np.load(file, allow_pickle=True)
  dbs_data_count = {}

  for result in results:
    db, labels, predictions = result
    pred_age, pred_gender = predictions
    true_age, true_gender = labels
    true_age, pred_age = float(true_age), float(pred_age)
    true_age, pred_age = float(true_age), float(pred_age)
    if (true_age < min_age or true_age > max_age):
      continue
    
    if not db in dbs_data_count:
      dbs_data_count[db] = 0
    dbs_data_count[db] += 1
      
  data_count = 0
  for db in dbs_data_count.keys():
    db_data_count = dbs_data_count[db]
    data_count += db_data_count
    mae, category_maes, avg_category_maes, weighted_category_maes, category_gender_accuracy, gender_accuracy = extract_hist_results(results, 5, db)
    print(db + ' : ' + "mae: {:.4f}".format(mae) + ", gender accuracy:  {:.4f}".format(gender_accuracy) + ', ' +  str(db_data_count))
    #print(db + ' : ' + "{:.4f}".format(mae) + ', '+ "{:.4f}".format(avg_category_maes) + ', ' +  str(category_maes) + ', ' +  str(db_data_count))
     
  mae, category_maes, avg_category_maes, weighted_category_maes, category_gender_accuracy, gender_accuracy = extract_hist_results(results, 5)
  print('gender accuracy: ' + "{:.4f}".format(gender_accuracy))
  print('weighted cat mae: ' + "{:.4f}".format(weighted_category_maes))
  print('avg cat mae: ' + "{:.4f}".format(avg_category_maes))
  print('mae: ' + "{:.4f}".format(mae))
  print(''.join("{:.2f} | ".format(v) for v in category_maes))
  print(''.join("{:.2f} | ".format(v) for v in category_gender_accuracy))

for epoch in range(start_epoch, end_epoch - 1, -1):
  print(epoch)
  evaluate_model_results(get_test_result_file_name(epoch))

## Plot Confusion

In [0]:
import numpy as np
import matplotlib.pyplot as plt

file = 'xception_tiny_augmented4_blocks2_5_5000_test_epoch_57.npy'
results = np.load(file)

min_age = 0
max_age = 150

def extract_results(target_db = None):
  x, y = [], []
  for result in results:
    db, true_age, pred_age = result
    true_age, pred_age = float(true_age), float(pred_age)
    
    if (true_age < min_age or true_age > max_age):
      continue
    
    if target_db is not None and db != target_db:
      continue

    x.append(round(pred_age))
    y.append(round(true_age))
    
  return x, y


x_utk, y_utk = extract_results('utk')
x_chalearn, y_chalearn = extract_results('chalearn')
x_fgnet, y_fgnet = extract_results('fgnet')
x_wiki, y_wiki = extract_results('wiki')
x_megaage, y_megaage = extract_results('megaage')
x_megaage_asian, y_megaage_asian = extract_results('megaage-asian')
x_all, y_all = extract_results()

plt.figure(figsize = (10, 10))
plt.xticks(np.arange(-10, 120, 10))
plt.yticks(np.arange(-10, 120, 10))
plt.scatter(x_all, y_all, s = 5,  c = 'b')
plt.show()
plt.scatter(x_utk, y_utk, s = 5,  c = 'r')
plt.show()
plt.scatter(x_chalearn, y_chalearn, s = 5,  c = 'g')
plt.show()
plt.scatter(x_fgnet, y_fgnet, s = 5,  c = 'y')
plt.show()
plt.scatter(x_wiki, y_wiki, s = 5,  c = 'b')
plt.show()
plt.scatter(x_megaage, y_megaage, s = 5,  c = 'r')
plt.show()
plt.scatter(x_megaage_asian, y_megaage_asian, s = 5,  c = 'y')
plt.show()

## Plot MAE

In [0]:
import numpy as np
import matplotlib.pyplot as plt

file = 'xception_tiny_augmented4_blocks2_5_5000_test_epoch_56.npy'
results = np.load(file)

min_age = 0
max_age = 80

cat_range = 2
num_bins = int((max_age - min_age) / cat_range)

def extract_results(target_db = None):
  
  counts = np.ones(num_bins)
  acc_hist = np.zeros(num_bins)
  for result in results:
    db, true_age, pred_age = result
    true_age, pred_age = float(true_age), float(pred_age)
    
    if (true_age < min_age or true_age >= max_age):
      continue
    
    if target_db is not None and db != target_db:
      continue

    err = abs(true_age - pred_age)
    hist_bin = int(math.floor(true_age / cat_range))
    counts[hist_bin] += 1
    acc_hist[hist_bin] += err
    
  hist = np.divide(acc_hist, counts)
  print(target_db, hist.sum() / num_bins, acc_hist.sum() / counts.sum())
  return hist
  
categories = np.arange(num_bins) * cat_range
  
#plt.show()
plt.scatter(categories, extract_results('utk'), c = 'r', s = 10)
#plt.show()
plt.scatter(categories, extract_results('chalearn'), c = 'g', s = 10)
#plt.show()
plt.scatter(categories, extract_results('fgnet'), c = 'y', s = 10)
plt.scatter(categories, extract_results(), c = 'b', s = 20)
plt.show()  
#plt.show()
plt.scatter(categories, extract_results('megaage'), c = 'r', s = 10)
plt.scatter(categories, extract_results('megaage-asian'), c = 'b', s = 10)
plt.show()
extract_results('wiki')
extract_results('imdb')
extract_results('cacd')
pass

## Show Results

In [0]:
!rm -rf ./show_results && mkdir ./show_results


from IPython.display import Image, display
from colabsnippets.age_gender_recognition import AgeGenderXceptionTiny
from colabsnippets.utils import forward_factory, shuffle_array

# inputs
# ------------------------------------------------------------------

tf.reset_default_graph()
  
min_age = 0
max_age = 150
db = 'utk'
num_inputs = 24
num_images_per_row = 8

epoch = 92
batch_size = 1
image_size = 112

net = AgeGenderXceptionTiny(num_blocks = 2)
model_name = net.name + '_augmented4_blocks2_5_5000'
net.init_trainable_weights()

predict_age_gender = lambda X: net.forward(X)
forward = forward_factory(predict_age_gender, batch_size, image_size)
saver = tf.train.Saver(max_to_keep = None)


all_data = filter_data_in_age_range(load_json('./data/testData.json'), min_age, max_age)
db_data = []
for data in all_data:
  if db is None or data['db'] == db:
    db_data.append(data)
    
data_loader = DataLoader(shuffle_array(db_data)[0: num_inputs], is_test = True)
    
def run(sess):
  sess.run(tf.global_variables_initializer())
  saver.restore(sess, get_checkpoint(epoch))

  all_x, all_true_ages, all_pred_ages, all_pred_genders = [], [], [], []
  next_batch = data_loader.next_batch(batch_size, image_size)
  while next_batch != None:
    batch_x, batch_y = next_batch
    pred_ages, pred_genders = forward(sess, batch_x)
    for batch_idx in range(0, batch_size):
      #all_x.append(batch_x[batch_idx])
      all_x.append(cv2.resize(batch_x[batch_idx], (130, 130)))
      all_true_ages.append(float(batch_y[batch_idx][0]))
      all_pred_ages.append(pred_ages[batch_idx])
      pred_gender = np.argmax(pred_genders[batch_idx])
      all_pred_genders.append(gender_code_to_label(pred_gender))
    next_batch = data_loader.next_batch(1)
    
  return all_x, all_true_ages, all_pred_ages, all_pred_genders
  
all_x, all_true_ages, all_pred_ages, all_pred_genders = gpu_session(run)

# display
file_idx = 0
idx = 0
while idx < num_inputs:
  imgs = all_x[idx : idx + num_images_per_row]
  true_ages = all_true_ages[idx : idx + num_images_per_row]
  pred_ages = all_pred_ages[idx : idx + num_images_per_row]
  pred_genders = all_pred_genders[idx : idx + num_images_per_row]
  for i in range(0, len(imgs)):
    text = "{} | {}".format(round(true_ages[i]), round(pred_ages[i]))
    #cv2.putText(imgs[i], text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    #cv2.putText(imgs[i], pred_genders[i], (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    
    text = "{} | {}".format(int(round(pred_ages[i])), pred_genders[i])
    cv2.putText(imgs[i], text, (2, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2, cv2.LINE_AA)
    
  merged_img = np.concatenate(imgs, axis = 1)
  file = './show_results/' + str(file_idx) + '.jpg'
  cv2.imwrite(file, merged_img)
  display(Image(file))
  
  file_idx += 1
  idx += num_images_per_row

!rm -rf ./show_results

## Save Intermediate Activations

In [0]:
!rm -rf ./show_results && mkdir ./show_results


from IPython.display import Image, display
from colabsnippets.age_gender_recognition import AgeGenderXceptionTiny
from colabsnippets.utils import forward_factory, shuffle_array, save_weights
from colabsnippets.preprocess import resize_preserve_aspect_ratio, pad_to_square
from colabsnippets import WeightInitializer

# inputs
# ------------------------------------------------------------------

tf.reset_default_graph()

#inputs = ["70.96_male0.97.png", "3.36_male0.51.png", "17.42_male0.88.png", "21.05_male0.59.png", "37.63_female0.5.png", "59.39_male0.94.png"]
inputs = [ "37.63_female0.5_gt.png", "37.63_female0.5_p.png", "37.63_female0.5_p2.png"]

num_images_per_row = 8

epoch = 92
batch_size = 1
image_size = 112

def load_weights(net, checkpoint_file, weight_initializer = tf.keras.initializers.glorot_normal(), bias_initializer = tf.keras.initializers.Zeros()):
  checkpoint_data = np.fromfile(checkpoint_file, dtype = 'float32')

  idx = 0
  data_idx = 0

  def initialize_weights_factory(initializer):
    def initialize_weights(name, shape):
      nonlocal idx, data_idx
      size = 1
      for val in shape:
        size = size * val
      initial_value = np.reshape(checkpoint_data[data_idx:data_idx + size], shape)

      data_idx += size

      var = tf.get_variable(name, initializer = initial_value.astype(np.float32))

      idx += 1

      return var

    return initialize_weights

  net.initialize_weights(WeightInitializer(initialize_weights_factory(weight_initializer), initialize_weights_factory(bias_initializer)))

def save_weights(var_list, checkpoint_file):
  checkpoint_data = np.array([], dtype = 'float32')
  meta_data = []
  for var in var_list:
    meta_data.append({ 'shape': var.get_shape().as_list(), 'name': var.name })
    checkpoint_data = np.append(checkpoint_data, var.eval().flatten())

  meta_json = open(checkpoint_file + '.json', 'w')
  meta_json.write(json.dumps(meta_data))
  meta_json.close()
  print(checkpoint_data.dtype)
  checkpoint_data.tofile(checkpoint_file)

net = AgeGenderXceptionTiny(num_blocks = 2)
#model_name = net.name + '_augmented4_blocks2_5_5000'
#weights = np.fromfile('tmp.weights.npy', dtype = 'float64')
#np.save('age_gender.weights2.npy', weights.astype('float32'))
#weights = np.fromfile('age_gender.weights', dtype = 'float32')
#np.save('age_gender.weights.npy', weights)
#net.load_weights('age_gender.weights')
load_weights(net, 'tmp.weights')
#net.init_trainable_weights()

predict_age_gender = lambda X: net.forward(X)

# auto recompile ops in case of new batch size
X = tf.placeholder(tf.float32, [batch_size, image_size, image_size, 3])
forward_op = predict_age_gender(X)

def forward(sess, batch_x):
  local_X, local_forward_op = X, forward_op
  if batch_x.shape[0] != X.shape[0]:
    local_X = tf.placeholder(tf.float32, [batch_x.shape[0], image_size, image_size, 3])
    local_forward_op = compile_forward_op(local_X)
  return sess.run(local_forward_op, feed_dict = { local_X: batch_x })

saver = tf.train.Saver(max_to_keep = None)

def pad_to_square2(img):
  if len(img.shape) == 2:
    img = np.expand_dims(img, axis = 2)

  height, width, channels = img.shape
  max_dim = max(height, width)
  square_img = np.zeros([max_dim, max_dim, channels], dtype = img.dtype)
  #square_img.fill(255)

  dx = math.floor(abs(max_dim - width) / 2)
  dy = math.floor(abs(max_dim - height) / 2)
  square_img[dy:dy + height,dx:dx + width] = img

  return square_img
print(tf.get_default_graph().get_operations())
def run(sess):
  sess.run(tf.global_variables_initializer())
  #saver.restore(sess, get_checkpoint(epoch))
  #save_weights(tf.global_variables(), 'tmp.weights')

  
  all_x, all_true_ages, all_true_genders, all_pred_ages, all_pred_genders = [], [], [], [], []
  
  for file in inputs:
    img = cv2.imread(file)
    true_age, true_gender = file.split('_')[0:2]
    all_true_ages.append(float(true_age))
    all_true_genders.append(true_gender)
    
    img = pad_to_square2(resize_preserve_aspect_ratio(img, 112))
    #op = tf.get_default_graph().get_operation_by_name('Placeholder')
    #print(op)
    #out = sess.run(op.outputs[0], feed_dict = { X: np.array([img]) })
    #print(out.dtype, out.shape)
    #out.tofile('out')
    all_x.append(img)
    ages, genders = forward(sess, np.array([img]))
    gender_probs = tf.nn.softmax(genders[0]).eval()
    gender_code = np.argmax(genders[0])
    pred_gender = gender_code_to_label(gender_code) + "{:.2f}".format(gender_probs[gender_code])
    all_pred_ages.append(ages[0])
    all_pred_genders.append(pred_gender)
    
  return all_x, all_true_ages, all_true_ages, all_pred_ages, all_pred_genders
  
all_x, all_true_ages, all_true_ages, all_pred_ages, all_pred_genders = gpu_session(run)

# display
file_idx = 0
idx = 0
while idx < len(all_true_ages):
  imgs = all_x[idx : idx + num_images_per_row]
  true_ages = all_true_ages[idx : idx + num_images_per_row]
  pred_ages = all_pred_ages[idx : idx + num_images_per_row]
  pred_genders = all_pred_genders[idx : idx + num_images_per_row]
  for i in range(0, len(imgs)):
    text = "{} | {}".format(round(true_ages[i]), round(pred_ages[i]))
    #cv2.putText(imgs[i], text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    #cv2.putText(imgs[i], pred_genders[i], (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    
    text = "{} | {}".format(int(round(pred_ages[i])), pred_genders[i])
    cv2.putText(imgs[i], text, (2, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 1, cv2.LINE_AA)
    
  merged_img = np.concatenate(imgs, axis = 1)
  file = './show_results/' + str(file_idx) + '.jpg'
  cv2.imwrite(file, merged_img)
  display(Image(file))
  
  file_idx += 1
  idx += num_images_per_row

!rm -rf ./show_results

# Debug

## Check Inputs

In [0]:
!rm -rf ./check_inputs && mkdir ./check_inputs

from IPython.display import Image, display

num_inputs = 50
image_size = 112
num_images_per_row = 10
db = 'megaage-asian'

image_augmentor = ImageAugmentor.load('./augmentor_4.json')
train_data = load_json('./data/testData.json')

db_data = []
for data in train_data:
  if db is None or data['db'] == db:
    db_data.append(data)
    
data_loader = DataLoader(db_data, start_epoch = 0, image_augmentor = image_augmentor)
batch_x, _ = data_loader.next_batch(num_inputs, image_size)

file_idx = 0
idx = 0
while idx < num_inputs:
  imgs = batch_x[idx : idx + num_images_per_row]
  merged_img = np.concatenate(imgs, axis = 1)
  
  file = './check_inputs/' + str(file_idx) + '.jpg'
  cv2.imwrite(file, merged_img)
  display(Image(file))
  
  file_idx += 1
  idx += num_images_per_row

!rm -rf ./check_inputs

## Plot Age Distribution

In [0]:
import matplotlib.pyplot as plt

from colabsnippets.utils import shuffle_array

num_bins = 60

train_data = load_json('./data/trainData.json')
def get_epoch_data():
  age_category_range = 5
  max_per_category = 5000
  num_bins = int(120 / age_category_range)
  data_by_age_category = []
  for i in range(0, num_bins):
    data_by_age_category.append([])
  
  for data in train_data:
    true_age = extract_data_labels(data)
    hist_bin = int(math.floor(true_age / age_category_range))
    data_by_age_category[hist_bin].append(data)
  
  epoch_data = []
  for datas in data_by_age_category:
    epoch_data += shuffle_array(datas)[0:max_per_category]
    
  return epoch_data

labels = []
for data in get_epoch_data():
  labels.append(extract_data_labels(data))
foo = plt.hist(labels, num_bins, facecolor='green', alpha=0.75)