# Calculations OpenFace

## load & init 

In [1]:
# imports
%load_ext autoreload
%autoreload 2

# import
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import helpers as hlp
from scipy.linalg import orth
from sklearn.linear_model import LogisticRegression

mystore = r'C:\Daten\_tmp\FRdata.h5'
rd = hlp.H5Reader2()

with pd.HDFStore(mystore) as store:
    print(store.keys())

['/accuracies', '/accuracies_gblinded', '/accuracies_logit_only_128', '/accuracies_rblinded', '/accuracies_rgblinded', '/awareness_predictions', '/clf_predict_indices', '/clf_predict_indices_facenet', '/cluster_eucl_distances', '/cluster_eucl_distances_3d', '/ethnicity_fr_rates', '/logit_race_f1', '/nn_eucl_distances', '/nn_indices', '/pair_predictions', '/pair_predictions_facenet', '/t_SNE_FaceNet', '/t_SNE_aware', '/t_SNE_blinded', '/t_SNE_openface', '/t_SNE_openface2', '/t_SNE_vgg128']


In [3]:
with pd.HDFStore(mystore) as store:
    #store.remove(nnkey)
    pass

In [7]:
# loading the data
reader = hlp.H5Reader2()
X, df = reader.read('OpenFace')
yr = df.race.cat.codes.values
race_labels = df.race.astype(str).values
yg = 1-df.gender.cat.codes.values
ya = df.age.cat.codes.values
ids = df.subject.values
imgs = df.img.values

In [9]:
np.linalg.norm(X,axis=1)

array([0.99999998, 1.00000005, 0.99999996, ..., 0.99999999, 0.99999997,
       0.99999998])

## t-SNE

In [47]:
# ======== takes a long time to run ==========
from sklearn.manifold import TSNE
Xt = TSNE(n_components=2).fit_transform(X)
tmp = pd.concat([df[['race','gender','age']].astype(str), pd.DataFrame(Xt,columns=['x','y'])],1)
tmp.to_hdf(mystore,'t_SNE_openface')

## Awareness with different classifiers

In [8]:
# this will take ages
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import NearestCentroid

mb = hlp.ModelBenchmarker(ids,n_splits=2)

clf_dict = {'Centroid':NearestCentroid(),
            'Logit':LogisticRegression(random_state=0,max_iter=500,solver='sag'),
            # 'KNN5':KNeighborsClassifier(5),
            'NN1':MLPClassifier(alpha=1, max_iter=1000,hidden_layer_sizes=[100],tol=1e-5) ,
            'NN2':MLPClassifier(alpha=1, max_iter=1000,hidden_layer_sizes=[100,20],tol=1e-5)}

y_dict = {'r':yr, 'g':yg}
X_dict = {'OpenFace':X}
y_args_dict = {'n':[],'r':[yr],'g':[yg]}
for X_name,Xi in hlp.X_generator(X_dict,y_args_dict, cosine=False):
    for clf_name, clf in clf_dict.items():
        for y_name, y in y_dict.items():
            col_name = clf_name + '-' + X_name + '-' + y_name
            y_pred = mb.predict(clf, Xi, y)
            hlp.hdf_add_col(mystore,'clf_predict_indices_openface',col_name,y_pred)
            print(col_name,'done ...')

Centroid-OpenFace-n-e-r done ...
Centroid-OpenFace-n-e-g done ...
Logit-OpenFace-n-e-r done ...
Logit-OpenFace-n-e-g done ...
NN1-OpenFace-n-e-r done ...
NN1-OpenFace-n-e-g done ...
NN2-OpenFace-n-e-r done ...
NN2-OpenFace-n-e-g done ...
Centroid-OpenFace-r-e-r done ...
Centroid-OpenFace-r-e-g done ...
Logit-OpenFace-r-e-r done ...
Logit-OpenFace-r-e-g done ...
NN1-OpenFace-r-e-r done ...
NN1-OpenFace-r-e-g done ...
NN2-OpenFace-r-e-r done ...
NN2-OpenFace-r-e-g done ...
Centroid-OpenFace-g-e-r done ...
Centroid-OpenFace-g-e-g done ...
Logit-OpenFace-g-e-r done ...
Logit-OpenFace-g-e-g done ...
NN1-OpenFace-g-e-r done ...
NN1-OpenFace-g-e-g done ...
NN2-OpenFace-g-e-r done ...
NN2-OpenFace-g-e-g done ...


## Face Recognition Rate (with pair metric)

In [6]:
# prepare distance class
frm = hlp.RFWDistances()
frm._df.insert(3,'subject2', frm._df['label2'].apply(lambda s: s[:-5]))
frm._df.insert(3,'subject1', frm._df['label1'].apply(lambda s: s[:-5]))
frm._df.insert(1,'age', rd.agegen_df.reindex(frm._df.subject1)['age'].astype(str).values)
frm._df.insert(1,'gender', rd.agegen_df.reindex(frm._df.subject1)['gender'].astype(str).values)
frm._df.head()

Unnamed: 0,race,gender,age,label1,label2,subject1,subject2,issame
0,Caucasian,Female,30-45,Caucasian_027n6t8_0003,Caucasian_027n6t8_0002,Caucasian_027n6t8,Caucasian_027n6t8,1
1,Caucasian,Male,30-45,Caucasian_016lb3_0004,Caucasian_016lb3_0002,Caucasian_016lb3,Caucasian_016lb3,1
2,Caucasian,Male,30-45,Caucasian_02_4qt_0003,Caucasian_02_4qt_0002,Caucasian_02_4qt,Caucasian_02_4qt,1
3,Caucasian,Male,30-45,Caucasian_09g8fj_0003,Caucasian_09g8fj_0001,Caucasian_09g8fj,Caucasian_09g8fj,1
4,Caucasian,Female,30-45,Caucasian_01q3xr_0001,Caucasian_01q3xr_0003,Caucasian_01q3xr,Caucasian_01q3xr,1


In [4]:
# prepare distance class for 
X_dict = {'OpenFace':X}
y_args_dict = {'n':[],'r':[yr],'g':[yg]}
dist_df = pd.DataFrame()
for name,Xi in hlp.X_generator(X_dict,y_args_dict,verbose=True,cosine=False):
    print(name)
    dist_df[name] = frm.get_distance_df(Xi,imgs)['eucl']

OpenFace-n-e
eigenvalues of B = [1. 1. 1.]
OpenFace-r-e
eigenvalues of B = [1.]
OpenFace-g-e


In [5]:
#from sklearn.neighbors import NearestCentroid
#clf = NearestCentroid()
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
pred_df = dist_df.copy() * np.nan
for col in dist_df.columns:
    sel = np.isfinite(dist_df[col])
    X = dist_df[[col]].values
    y = frm._df.issame
    clf.fit(X[sel,:],y[sel])
    pred_df.loc[sel,col] = np.float16(clf.predict(X[sel]) == y[sel])

pred_df = pd.concat([frm._df,pred_df],1)
pred_df.to_hdf(mystore,'pair_predictions_openface')