In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy.ndimage import rotate
import Utils
import cv2
from facenet_pytorch import MTCNN
from Weights import *

In [2]:
data_root = '../../data/'
def filtered_labels(path, columns = ['skin_tone','gender','age']):
    df = pd.read_csv(data_root + path)
    if columns is not None and len(columns) > 0:
        df = df.dropna(how='any',subset=columns)
        
    df['skin_tone'] = df.skin_tone.apply(lambda x: int(x.replace('monk_',''))-1)
    df['gender'] = df.gender.apply(lambda x: int(x == 'male'))
    age_map = {
        '0_17': 0,
        '18_30': 1,
        '31_60': 2,
        '61_100': 3,
    }
    df['age'] = df.age.apply(lambda x: age_map[x])
    
    return df

# train_labels = filtered_labels('train_labels.csv')
# test_labels = filtered_labels('labels.csv')

In [3]:
def detect_nonfaces(df,root,image_size=256):
    mtcnn = MTCNN(image_size = image_size)
    def is_face(file):
        img = Image.open(root+file)
        try:
            face = mtcnn(img)
            if face is None:
                return False
            return True
        except Exception as e:
            print(e,file)
            return True
        
    df['is_face'] = df.name.apply(is_face)
    return df

# train_labels = detect_nonfaces(train_labels,data_root)
# test_labels = detect_nonfaces(test_labels,data_root)
# train_labels[train_labels.is_face]

In [4]:
# validation_labels = train_labels.sample(frac=.2,replace=False)
# train_labels = train_labels.drop(validation_labels.index)
# train_labels

In [5]:
# train_labels.to_csv('train_data_clean.csv',index=False)
# validation_labels.to_csv('validation_data_clean.csv',index=False)
# test_labels.to_csv('test_data_clean.csv',index=False)

In [6]:
labels = pd.read_csv('train_data_augmented_balanceddualhistogram.csv')
labels

Unnamed: 0.1,Unnamed: 0,name,skin_tone,gender,age,is_face,0-0-0_anchor,0-0-0_bias,0-0-1_anchor,0-0-1_bias,...,9-1-1_anchor,9-1-1_bias,9-2-0_anchor,9-2-0_bias,9-2-1_anchor,9-2-1_bias,9-3-0_anchor,9-3-0_bias,9-3-1_anchor,9-3-1_bias
0,0,TRAIN0001.png,0,0,1,False,0.0,0.025769,0.0,0.975044,...,0.0,1.485796,0.0,0.513853,0.0,1.463127,0.0,0.025415,0.0,0.974689
1,1,TRAIN0002.png,5,1,0,True,0.0,1.654119,0.0,1.345881,...,0.0,0.345881,0.0,0.654119,0.0,0.345881,0.0,0.654119,0.0,0.345881
2,2,TRAIN0005.png,1,1,0,False,0.0,0.433162,0.0,0.622616,...,0.0,0.898250,0.0,1.075956,0.0,1.265410,0.0,0.422038,0.0,0.611491
3,3,TRAIN0007.png,1,0,1,True,0.0,2.917475,0.0,1.946971,...,0.0,0.031364,0.0,0.986136,0.0,0.015633,0.0,0.985252,0.0,0.014748
4,4,TRAIN0009.png,7,0,1,False,0.0,0.024288,0.0,0.975989,...,0.0,0.989455,0.0,0.037822,0.0,0.989523,0.0,1.037686,0.0,1.989387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6837,6837,TRAIN9992.png,4,0,2,True,0.0,2.322163,0.0,2.605432,...,0.0,0.642963,0.0,0.358371,0.0,0.641640,0.0,0.358366,0.0,0.641634
6838,6838,TRAIN9993.png,1,1,1,True,0.0,1.139229,0.0,0.291743,...,0.0,0.325924,0.0,1.674003,0.0,0.826517,0.0,0.923751,0.0,0.076265
6839,6839,TRAIN9995.png,8,0,1,True,0.0,0.999805,0.0,0.000379,...,0.0,0.067027,0.0,1.934254,0.0,0.934828,0.0,1.000400,0.0,0.000974
6840,6840,TRAIN9998.png,4,1,1,False,0.0,0.040941,0.0,0.963131,...,0.0,1.957443,0.0,0.042272,0.0,0.964462,0.0,0.038910,0.0,0.961100


In [9]:
from DataLoaders import *
test = FaceGenerator(labels,Constants.data_root,random_upsample=True,softmax=True)
for i,t in enumerate(test):
    print(t[0].shape,t[0].mean())
    if i > 10:
        break

(4869, 166)
torch.Size([100, 3, 160, 160]) tensor(0.3281)
torch.Size([100, 3, 160, 160]) tensor(0.3099)
torch.Size([100, 3, 160, 160]) tensor(0.2990)
torch.Size([100, 3, 160, 160]) tensor(0.3126)
torch.Size([100, 3, 160, 160]) tensor(0.3080)
torch.Size([100, 3, 160, 160]) tensor(0.3096)
torch.Size([100, 3, 160, 160]) tensor(0.3233)
torch.Size([100, 3, 160, 160]) tensor(0.3130)
torch.Size([100, 3, 160, 160]) tensor(0.3192)
torch.Size([100, 3, 160, 160]) tensor(0.3143)
torch.Size([100, 3, 160, 160]) tensor(0.3344)
torch.Size([100, 3, 160, 160]) tensor(0.3176)


In [8]:
# len(test)