In [153]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf

import os

from sklearn.neighbors import NearestNeighbors

from sklearn.manifold import TSNE
import plotly.express as px
from cuda import cuda



In [None]:
# !pip install -U deepface

In [154]:
from deepface import DeepFace

In [155]:
folder_path = '../raw_data/output_imdb_top100_trans' # provide your path here

file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))]
for n, i in enumerate(file_list): print(n, os.path.splitext(os.path.basename(i))[0])

0 faye_dunaway_face_aligned
1 blake_lively_face_aligned
2 vera_farmiga_face_aligned
3 gregory_peck_face_aligned
4 alyson_hannigan_face_aligned
5 elizabeth_banks_face_aligned
6 christopher_walken_face_aligned
7 rooney_mara_face_aligned
8 anne_hathaway_face_aligned
9 nicole_kidman_face_aligned
10 sidney_poitier_face_aligned
11 alan_arkin_face_aligned
12 marlon_brando_face_aligned
13 robert_redford_face_aligned
14 ben_kingsley_face_aligned
15 burt_lancaster_face_aligned
16 cate_blanchett_face_aligned
17 bill_murray_face_aligned
18 tom_cruise_face_aligned
19 gary_oldman_face_aligned
20 rosamund_pike_face_aligned
21 will_smith_face_aligned
22 jim_carrey_face_aligned
23 tom_hanks_face_aligned
24 jeff_bridges_face_aligned
25 meryl_streep_face_aligned
26 emma_stone_face_aligned
27 charles_chaplin_face_aligned
28 daniel_day-lewis_face_aligned
29 geoffrey_rush_face_aligned
30 richard_burton_face_aligned
31 natalie_portman_face_aligned
32 emily_blunt_face_aligned
33 sean_penn_face_aligned
34 pete

There are several models available in DeepFace:
- VGG-Face
- FaceNet (128D, 512D)
- OpenFace
- DeepID
- DeepFace
- ArcFace

They are different in performance

In [134]:
deepface_vectors = np.array([np.array(DeepFace.represent(i, model_name='DeepFace', enforce_detection=False)[0]['embedding']) for i in file_list])

In [135]:
print(tf.__version__)
print(tf.config.list_physical_devices())

2.12.0
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [136]:
knn_deepface = NearestNeighbors(n_neighbors=len(deepface_vectors), n_jobs=-1)

knn_deepface.fit(deepface_vectors)

deepface_distances, deepface_indices = knn_deepface.kneighbors(deepface_vectors)

In [137]:
deepface_df = pd.DataFrame([os.path.splitext(os.path.basename(i))[0] for i in file_list])\
        .rename(columns={0:'name'})
deepface_df = pd.concat([deepface_df, pd.DataFrame(deepface_indices[:,1:])], axis=1)#.rename(columns={0:'vector'})

In [138]:
deepface_df[deepface_df.columns[1:]] = deepface_df[deepface_df.columns[1:]].applymap(lambda i: deepface_df.name.iloc[i])
deepface_df.head()

Unnamed: 0,name,0,1,2,3,4,5,6,7,8,...,190,191,192,193,194,195,196,197,198,199
0,ellen_burstyn,colin_firth,michael_douglas,kirsten_dunst,michelle_pfeiffer,maggie_gyllenhaal,diane_kruger,matt_damon,saoirse_ronan,cate_blanchett,...,adele_exarchopoulos,charles_laughton,emma_stone,marlene_dietrich,joaquin_phoenix,milla_jovovich,virginia_cherrill,liv_ullmann,felicity_jones,kim_novak
1,julia_roberts,jennifer_jason_leigh,al_pacino,demi_moore,julia_louis-dreyfus,robert_de_niro,alan_arkin,ralph_fiennes,dustin_hoffman,robin_wright,...,shelley_duvall,laurence_olivier,joaquin_phoenix,marlene_dietrich,charles_laughton,liv_ullmann,virginia_cherrill,kim_novak,felicity_jones,milla_jovovich
2,meryl_streep,gene_hackman,martin_sheen,jeff_bridges,john_wayne,al_pacino,anthony_hopkins,hugh_jackman,tommy_lee_jones,julia_louis-dreyfus,...,orson_welles,bibi_andersson,virginia_cherrill,joaquin_phoenix,ingrid_bergman,liv_ullmann,laurence_olivier,kim_novak,felicity_jones,milla_jovovich
3,jack_lemmon,vera_farmiga,tommy_lee_jones,russell_crowe,amanda_plummer,gene_hackman,morgan_freeman,meryl_streep,jeff_bridges,peter_o'toole,...,buster_keaton,orson_welles,janet_leigh,felicity_jones,joaquin_phoenix,sean_connery,ingrid_bergman,kim_novak,bibi_andersson,laurence_olivier
4,jennifer_lawrence,scarlett_johansson,robin_wright,russell_crowe,benicio_del_toro,robert_duvall,blake_lively,morgan_freeman,sharon_stone,alicia_vikander,...,bette_davis,charles_laughton,adele_exarchopoulos,marlene_dietrich,joaquin_phoenix,virginia_cherrill,kim_novak,milla_jovovich,felicity_jones,liv_ullmann


In [139]:
overall_distances_df = pd.concat([pd.DataFrame([os.path.splitext(os.path.basename(i))[0] for i in file_list])\
        .rename(columns={0:'name'}), pd.DataFrame(deepface_distances[:,1:])], axis=1).set_index('name').T

overall_indices_df = pd.concat([pd.DataFrame([os.path.splitext(os.path.basename(i))[0] for i in file_list])\
        .rename(columns={0:'name'}), pd.DataFrame(deepface_indices[:,1:])], axis=1).set_index('name').T

overall_scoring_df = overall_distances_df.copy()

for column in overall_scoring_df.columns:
    overall_scoring_df[column] = overall_scoring_df[column].apply(lambda x: round((x- overall_scoring_df[column].max())/(overall_scoring_df[column].min()-overall_scoring_df[column].max())*100, 1) - 1 if x >1.01 else x).apply(abs)

overall_scoring_df.head(10)
overall_scoring_df.to_csv("../raw_data/scoring.csv")

In [140]:
names_df = deepface_df.set_index('name').T
names_df.head()
names_df.to_csv("../raw_data/names.csv")

In [141]:
hidden_celebrity = 'saoirse_ronan'

In [142]:
def celeb_and_score_query(celebrity, hidden_celebrity=hidden_celebrity, names_df=names_df, score_df=overall_scoring_df):
    celeb_rank = names_df[hidden_celebrity][names_df[hidden_celebrity] == celebrity].index
    celeb_score = overall_scoring_df[hidden_celebrity].loc[celeb_rank].iloc[0]
    return celebrity, celeb_score
    

In [143]:
celeb_and_score_query('hugh_jackman')

('hugh_jackman', 65.7)

In [144]:
names_df.head(2)

name,ellen_burstyn,julia_roberts,meryl_streep,jack_lemmon,jennifer_lawrence,kate_mara,adele_exarchopoulos,marilyn_monroe,michael_caine,spencer_tracy,...,eva_green,emily_blunt,hugh_jackman,robert_duvall,clark_gable,octavia_spencer,joan_allen,bill_murray,jeremy_irons,edward_g._robinson
0,colin_firth,jennifer_jason_leigh,gene_hackman,vera_farmiga,scarlett_johansson,jennifer_connelly,carey_mulligan,james_cagney,peter_o'toole,anthony_hopkins,...,brie_larson,jennifer_jason_leigh,al_pacino,peter_o'toole,hugh_jackman,russell_crowe,russell_crowe,gene_hackman,russell_crowe,hugh_jackman
1,michael_douglas,al_pacino,martin_sheen,tommy_lee_jones,robin_wright,emma_watson,jim_carrey,gloria_swanson,russell_crowe,angelina_jolie,...,julie_delpy,amy_adams,robert_duvall,russell_crowe,jennifer_aniston,julia_louis-dreyfus,morgan_freeman,michael_douglas,tom_hanks,russell_crowe


In [145]:
tsne = TSNE(n_components=3, random_state=0, perplexity=5)
projections = tsne.fit_transform(deepface_vectors, )

plotting_df = pd.concat([deepface_df['name'], pd.DataFrame(projections)], axis=1)

fig = px.scatter_3d(
    plotting_df, x=0, y=1, z=2, hover_data='name'
)
fig.update_traces(marker_size=4)
fig.show()

In [None]:
#Try to apply it to your embedding dataset and let me know
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Calculate t-SNE embedding
tsne = TSNE(n_components=2, random_state=42)
XY_tsne = tsne.fit_transform(deepface_vectors)
XY_tsne

In [168]:
# select the vector by your choice
desired_vector = XY_tsne[0]
desired_vector[0]


-0.16557075

In [172]:
# Center the t-SNE space on the desired vector
XY_tsne_centered = XY_tsne - np.mean(XY_tsne, axis=0) + desired_vector

new_desired_vector = XY_tsne_centered[0]

plotting_df = pd.concat([deepface_df['name'], pd.DataFrame(XY_tsne_centered)], axis=1)
plotting_df.columns = ["name", "X","Y"]

In [173]:
plotting_df

Unnamed: 0,name,X,Y
0,ellen_burstyn,-0.537749,-10.858600
1,julia_roberts,1.421389,-5.354323
2,meryl_streep,-0.838155,-1.681599
3,jack_lemmon,-3.752321,-1.004141
4,jennifer_lawrence,3.292961,-7.510081
...,...,...,...
196,octavia_spencer,1.647058,-4.285516
197,joan_allen,-2.581752,-0.733440
198,bill_murray,-1.398767,-3.903929
199,jeremy_irons,0.636901,-1.977958


In [174]:


fig = px.scatter(x=plotting_df["X"], y=plotting_df["Y"], color_discrete_sequence=['blue'], hover_name=plotting_df["name"])
# fig = px.scatter(XY_tsne["X"], XY_tsne["Y"],  color_discrete_sequence=['blue'],hover_name=plotting_df["name"])


fig.add_annotation(x=new_desired_vector[0], y=new_desired_vector[1],
            text="hidden celebrity",
            bgcolor="red",
            showarrow=True,
            arrowhead=1)
fig.show()