# Semantic space exploration
Following this paper: [Interpreting the Latent Space of GANs for Semantic Face Editing](https://openaccess.thecvf.com/content_CVPR_2020/papers/Shen_Interpreting_the_Latent_Space_of_GANs_for_Semantic_Face_Editing_CVPR_2020_paper.pdf)

In [13]:
import numpy as np
import pandas as pd
import os
from sklearn.svm import LinearSVC
from glob import glob
from pprint import pprint

nokdb_root = "./src/dataset/nokdb"
persons_df = pd.read_csv(f"{nokdb_root}/nokdb-persons.csv")
images_df = pd.read_csv(f"{nokdb_root}/nokdb-images.csv")

In [14]:
mean_age = images_df["age"].mean()
mean_age

31.689972899728996

In [17]:
X = []
y_age = []
y_sex = []

for npz in glob(f"{nokdb_root}/**/*.npz", recursive=True):
    folder_name = npz.split("/")[-2]
    if folder_name == "norm":
        continue
    pid, iid = list(map(int, npz[:-4].split("/")[-2:]))
    w = np.load(npz)['w'].flatten()
    if np.isnan(w).any() or np.isinf(w).any():
        w = np.nan_to_num(w)
        # raise Exception("Latent vector contain NaN or Inf.")
    age = images_df[(images_df.pid == pid) & (images_df.iid == iid)].age.item()
    sex = persons_df[(persons_df.pid == pid)].sex.item()
    age = 0 if age <= mean_age else 1
    sex = 0 if sex == "M" else 1
    X.append(w)
    y_age.append(age)
    y_sex.append(sex)

X = np.stack(X)
y_age = np.array(y_age)
y_sex = np.array(y_sex)

print(X.shape)

732
732
732
732
732
732
732
732
732
732
732
1085
1085
1085
1085
1085
1085
628
628
628
628
628
1097
1097
1097
1097
1097
1097
382
382
382
382
382
382
382
382
382
382
335
335
335
335
335
335
335
335
335
425
880
880
203
203
203
203
203
834
834
834
834
834
754
754
754
754
754
754
754
754
754
754
80
80
80
80
80
498
498
498
498
498
498
498
330
330
330
330
330
330
65
65
65
76
76
76
76
76
76
76
76
76
911
911
911
911
911
911
78
78
78
78
78
78
78
78
78
78
78
78
715
715
715
715
715
715
715
529
529
529
529
529
529
529
529
529
529
529
529
529
529
529
529
388
388
388
388
388
388
590
590
590
590
590
590
590
590
590
666
448
448
448
448
448
520
520
520
520
520
520
520
520
520
520
520
520
896
896
896
896
513
513
970
970
970
970
970
970
970
970
970
316
316
316
316
316
316
744
744
744
848
848
848
848
848
429
429
429
429
429
429
429
429
429
429
429
429
429
429
429
1063
1063
838
751
751
751
417
670
670
670
670
670
670
670
670
670
670
220
89
89
89
89
89
89
89
89
89
89
89
947
947
947
947
947
947
947
947
947
94

In [18]:
idx = np.arange(3690)
np.random.shuffle(idx)

split_i = int(3690*0.8)

train_idx = idx[:split_i] # 80%
test_idx = idx[split_i:]  # 20%

In [19]:
svm_age = LinearSVC(max_iter=20000)
svm_age.fit(X[train_idx,:],y_age[train_idx])



In [20]:
svm_sex = LinearSVC(max_iter=10000)
svm_sex.fit(X[train_idx,:],y_sex[train_idx])

In [23]:
train_age_acc = svm_age.score(X[train_idx,:],y_age[train_idx])
test_age_acc = svm_age.score(X[test_idx,:],y_age[test_idx])

"age acc", train_age_acc, test_age_acc

('age acc', 0.7493224932249323, 0.6707317073170732)

In [24]:
train_sex_acc = svm_sex.score(X[train_idx,:],y_sex[train_idx])
test_sex_acc = svm_sex.score(X[test_idx,:],y_sex[test_idx])

"sex acc", train_sex_acc, test_sex_acc

('sex acc', 0.9915311653116531, 0.9065040650406504)