In [1]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
from os import listdir
from os.path import join
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import matplotlib._color_data as mcd

In [2]:
emb_dir = './embeddings/radius_2.0'
emb_list = listdir(emb_dir)
emb_list.sort()

In [3]:
vectors, labels = [], []
for label in emb_list:
    loaded = np.load(join(emb_dir,label), allow_pickle=True)

    for e in loaded:
        vectors.append(e.detach().numpy()[0])
        labels.append(label[:-4])

x = np.asarray(vectors)

In [4]:
# Normalize Data
x_norm = StandardScaler().fit_transform(x) # normalizing the features

print('Untouched:\t ', np.mean(x),'\t', np.std(x))
print('Normalized:\t ', np.mean(x_norm),'\t', np.std(x_norm))
x_norm.shape

Untouched:	  0.50067055 	 0.073979214
Normalized:	  -1.0525927e-08 	 1.0


(3760, 1280)

In [5]:

pca = PCA(n_components=3)
components_ear = pca.fit_transform(x_norm)

ear_df = pd.DataFrame(data = components_ear, index = labels, columns = ['Component 1', 'Component 2', 'Component 3'],)


ear_df.head()

Unnamed: 0,Component 1,Component 2,Component 3
alexander_bec,-2.897444,-8.432858,12.079668
alexander_bec,-7.278646,-5.762889,12.076149
alexander_bec,-4.21593,-11.224479,10.930253
alexander_bec,-6.226907,-5.715588,12.380111
alexander_bec,1.519285,-8.938177,2.30037


In [6]:
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
print('Total variance explained: {}'.format(sum(pca.explained_variance_ratio_)))


Explained variation per principal component: [0.42371824 0.29322845 0.2630762 ]
Total variance explained: 0.9800228774547577


In [7]:
# Only for creating colors
ear_df['label'] = pd.Categorical(labels)
# my_color = ear_df['label'].cat.codes
# ear_df = ear_df.drop('persons', 1)

In [8]:
unique = list(set(labels))
unique.sort()

pseudonyms = dict()
for i, proband in enumerate(unique):
    pseudonyms[proband] = "Proband_"+str(i+1)

cluster_center = dict()
for person in unique:
    cluster_center[person] = (sum(ear_df.loc[person, 'Component 1'])/80, sum(ear_df.loc[person, 'Component 2'])/80, sum(ear_df.loc[person, 'Component 3'])/80)

In [9]:
for i in pseudonyms:
    print(i, '\t--\t', pseudonyms[i])

alexander_bec 	--	 Proband_1
alina_sch 	--	 Proband_2
alissa_buh 	--	 Proband_3
amanda_dab 	--	 Proband_4
anna_kab 	--	 Proband_5
anni_qua 	--	 Proband_6
beatrix_mah 	--	 Proband_7
clara_pau 	--	 Proband_8
clemens_blu 	--	 Proband_9
collin_sch 	--	 Proband_10
david_fau 	--	 Proband_11
falco_len 	--	 Proband_12
felix_mec 	--	 Proband_13
gregor_spi 	--	 Proband_14
hammam_als 	--	 Proband_15
janna_qua 	--	 Proband_16
janole_pen 	--	 Proband_17
jesse_kru 	--	 Proband_18
johannes_wie 	--	 Proband_19
jule_dre 	--	 Proband_20
julia_fis 	--	 Proband_21
konrad_von 	--	 Proband_22
lars_fin 	--	 Proband_23
linus_fal 	--	 Proband_24
lynn_man 	--	 Proband_25
maike_her 	--	 Proband_26
malte_gas 	--	 Proband_27
marcel_nim 	--	 Proband_28
marcus_jue 	--	 Proband_29
marejke_wen 	--	 Proband_30
marina_fri 	--	 Proband_31
marina_han 	--	 Proband_32
matilda_kni 	--	 Proband_33
meiko_pri 	--	 Proband_34
mila_wol 	--	 Proband_35
mohammed_muh 	--	 Proband_36
moritz_bor 	--	 Proband_37
moritz_mei 	--	 Proband

In [None]:
## DEFINE X- and Y- AXIS
x_component = 1
y_component = 3
assert x_component >= 1 and x_component <=3 and y_component >= 1 and y_component <=3
# Plot preparations
fig = plt.figure(figsize = (30,30))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component ' + str(x_component), fontsize = 15)
ax.set_ylabel('Principal Component ' + str(y_component), fontsize = 15)
ax.set_title('PCA of ear embeddings - 2 of three dimensions ', fontsize = 20)
targets = unique
colors = list(mcd.XKCD_COLORS.values())
for target, color in zip(targets,colors):
    indicesToKeep = ear_df['label'] == target
    # ax.scatter(x=cluster_center[target][0], y=cluster_center[target][1], s=5, c='red', marker='*')
    ax.scatter(ear_df.loc[indicesToKeep, 'Component ' + str(x_component)]
               , ear_df.loc[indicesToKeep, 'Component ' + str(y_component)]
               , c = color
               , s = 15
               , alpha=0.6)

# SWITCH annotations and legend respectively if pseudonyms are wanted
    ax.annotate(s=pseudonyms[target], xy=(cluster_center[target][x_component-1], cluster_center[target][y_component-1]), textcoords='data' )
    # ax.annotate(s=target, xy=(cluster_center[target][x_component-1], cluster_center[target][y_component-1]), textcoords='data' )
ax.legend(pseudonyms.values())
# ax.legend(targets)

ax.grid()

In [10]:
def sort_dict_by_component(dictionary, component, reverse=False):
    '''Function to sort the cluster center dictionary by the component
    Arguments
    ----------
    component: 1, 2 or 3 representing the 3 dimensions
    reverse: default is false, returning an ascending order

    Returns
    ----------
    sorted_reduced: a list containing the tuples of (key, component) of specified component
    '''

    assert component>=1 and component<=3
    # change to array space
    component -= 1

    sorted_values = sorted(dictionary.items(), key=lambda x: x[1][component], reverse=reverse)

    sorted_reduced = [(label, values[component]) for (label, values) in sorted_values]
    return sorted_reduced

In [11]:
## Set component 1, 2 or 3
c = 3

sort_values = sort_dict_by_component(cluster_center, c)
print('These are the values of the component ', c, '\n')
for l,v in sort_values:
    # SWITCH respectively if pseudonyms are wanted
    print('{:>20} : {:<}'.format(pseudonyms[l],v))
    # print('{:>20} : {:<}'.format(l,v))


These are the values of the component  3 

          Proband_33 : -33.52497308254242
          Proband_14 : -32.77197790145874
           Proband_3 : -29.932307505607604
           Proband_8 : -25.84393093585968
          Proband_39 : -22.9295375585556
           Proband_4 : -21.152161049842835
          Proband_46 : -19.94115219116211
          Proband_43 : -17.71762643456459
          Proband_30 : -16.83368911743164
          Proband_18 : -16.832468223571777
          Proband_20 : -15.230828738212585
           Proband_2 : -13.690478479862213
          Proband_34 : -12.422663629055023
          Proband_37 : -11.864496368169785
          Proband_16 : -10.628432929515839
          Proband_12 : -10.58404398560524
           Proband_7 : -7.377546975761652
          Proband_21 : -6.1434707075357435
          Proband_47 : -5.9656054601073265
          Proband_15 : -5.744850511848926
           Proband_6 : -4.964224353432655
          Proband_25 : -4.415202879905701
          Proband_26 : -

In [12]:
val = [v for _,v in sort_values]
steps = []
for i in range(len(val)-1):
    steps.append(abs(val[i]-val[i+1]))

steps.sort()

print('Average step size\t: ', np.mean(steps))
print('Median of step size\t: ', np.median(steps))
steps_int = [int(x) for x in steps]
print(steps)
print(steps_int)

Average step size	:  1.5984138887861499
Median of step size	:  1.2225425634533162
[0.0012208938598625707, 0.03706762194633484, 0.04438894391059911, 0.17786524742841703, 0.20579653388704156, 0.22075494825840014, 0.2502613722346725, 0.3162907479098067, 0.3459497332572923, 0.3943356394767754, 0.40778121501207387, 0.5490214735269543, 0.5581672608852379, 0.5748540547210723, 0.5910240173339858, 0.7529951810836764, 0.780626158416271, 0.7953636765480052, 0.8839373171329505, 0.8918156084895603, 1.0301668541040272, 1.1975933980895206, 1.2110088586807244, 1.234076268225908, 1.2360634386539466, 1.2403188943862915, 1.2678148508071896, 1.3474671229720112, 1.5403502583503723, 1.6016394853591915, 1.7596863351762293, 1.7762688875198371, 1.7773765087127664, 2.223525756597521, 2.68499091863632, 2.8396703958511367, 2.91439337730408, 2.998966264724732, 3.0981374621391318, 3.206497009843588, 3.343193006515502, 3.9687279224395766, 4.088376569747922, 4.1577679517213255, 4.5409814938902855, 6.462461948394772]


In [27]:
from PIL import Image
from PIL import ImageStat
import PIL
def brightness( im_file ):
   im = Image.open(im_file)
   stat = ImageStat.Stat(im)
   return stat.mean[0]

brightness('../samples/fusion2040_gray/konrad_von.png')
# def add(a, b):
#     return a+b

# hist_positive = list(np.zeros(255))

# d = Image.open('../samples/fusion2040_gray/konrad_von.png')
# hist_positive = list( map(add, hist_positive, d.histogram()) )
# hist_positive

115.08344720496895

In [23]:
# Extract histograms of certain images
from PIL import Image
import PIL
from PIL import ImageStat

def brightness_and_rms( im_file ):
   im = Image.open(im_file)
   stat = ImageStat.Stat(im)
   return stat.mean[0], stat.rms[0]

path_to_grayscale = '../samples/fusion2040_gray'

av_bright_negative, rms_bright_neg = 0, 0
neg_count = 0
av_bright_positive, rms_bright_pos = 0, 0
pos_count = 0

# SET COMPONENT 1,2 or 3
c = 3
val_thresh = 10
# Above, cluster_center is created, containing the people and their component's center value
# We will have a look at the histogram of all greyscale images above compared to the images with a component value below zero
for person in cluster_center:
    val = cluster_center[person][c-1]
    val_bright, val_bright_rms = brightness_and_rms(join(path_to_grayscale, person+'.png'))
    if val < -val_thresh:
        av_bright_negative += val_bright
        rms_bright_neg += val_bright_rms
        neg_count += 1
    elif val > val_thresh:
        av_bright_positive += val_bright
        rms_bright_pos += val_bright_rms
        pos_count +=1

print(neg_count, ' "Negative" Images')
print('Negative Value Images Mean brightness\t: ', av_bright_negative/neg_count)
print('Negative Value Images RMS brightness\t: ', rms_bright_neg/neg_count, '\n')
print(pos_count, ' "Positive" Images')
print('Positive Value Images Mean brightness\t: ', av_bright_positive/pos_count)
print('Positive Value Images RMS brightness\t: ', rms_bright_pos/pos_count)

count = -1
temp = 0
for person,_ in sort_values:
    val_bright, val_bright_rms = brightness_and_rms(join(path_to_grayscale, person+'.png'))
    print(val_bright)
    if val_bright > temp:
        count += 1
    temp = val_bright
print('\n', 'Amount of direct increments\t: ', count)

16  "Negative" Images
Negative Value Images Mean brightness	:  102.13541052018635
Negative Value Images RMS brightness	:  108.73903037322536 

15  "Positive" Images
Positive Value Images Mean brightness	:  106.85719358178054
Positive Value Images RMS brightness	:  112.02845810404907
73.76187888198758
94.4145652173913
64.28358695652175
79.5810248447205
105.5138354037267
111.57551242236025
113.11277950310559
136.88402173913045
93.17798136645963
101.28855590062112
100.7179347826087
96.1641149068323
125.93472049689441
103.83360248447205
94.68857142857142
139.23388198757763
104.8139596273292
99.35299689440994
129.7648602484472
105.92031055900621
113.42054347826087
109.9758850931677
105.9346894409938
132.34391304347827
99.3858850931677
115.65122670807453
101.30777950310559
106.70130434782608
103.5724844720497
92.67478260869565
113.94495341614906
115.08344720496895
92.48563664596273
103.44155279503106
108.05318322981367
99.32218944099378
98.695
101.22108695652175
111.1398602484472
104.7986335