# Vorgehen:
- produziere 10,000 images und extrahiere das zielattribut
- generiere buckets in denen das zielattribut von 0-0.1, 0.1-0.2, ... 0.9-1.0 ist
- iteriere über alle bilder in einem bucket, verändere sie mit dem attributsvektor, und speichere die prediction-difference ab (für attribute <0.5 attributsvektor addieren sonst subtrahieren


# Conclusion
- der Ausgangspunkt hat einen signifikaten Einfluss auf die Wirksamkeit des Attributvektors
- wenn latent vektoren miteinander verglichen werden ist es daher wichtig gleiche Ausgangs- und geshiftete Bilder miteinander zu vergleichen
- Bilder mit einen Ausgangspunkt nahe der 0.0 bzw. 1.0 haben eine höhere Standardabweichung

In [3]:
from os import listdir
from os.path import isfile, join
from natsort import natsorted
import sys
stylegan2_path = '../stylegan2-ada-pytorch'
sys.path.append(stylegan2_path)

import dnnlib
import click
import legacy
import torch
import numpy as np
from PIL import Image
from scipy.spatial.distance import cosine
import torch.nn.functional as F
import pandas as pd
import pickle
import random

n_regressor_predictions = 40 # the regressor is pretrained on CelebA and predicts 40 face attributes
device = 'cuda:0'
batch_size = 1
truncation_psi = 0.5
noise_mode = 'const'
network_pkl =  '../pretrained_models/ffhq.pkl'

with dnnlib.util.open_url(network_pkl) as f:
    G = legacy.load_network_pkl(f)['G_ema'].to(device) # stylegan2-generator

label = torch.zeros([1, G.c_dim], device=device)

def initialize_model():
    from facenet_pytorch import MTCNN, InceptionResnetV1
    resnet = InceptionResnetV1(pretrained='vggface2').to(device).eval()
    return resnet

face_rec = initialize_model()
file_to_read = open("../pretrained_models/resnet_092_all_attr_5_epochs.pkl", "rb")
regressor = pickle.load(file_to_read)
file_to_read.close()
regressor.eval()
sigmoid = torch.nn.Sigmoid().to(device)
attr_vec_ours_bs8_smile = torch.load("../attribute_vectors/l_vec_our_old_approach_smile_w.pt", map_location=device)


In [4]:
'''
create 10000 images and save the seed and smiling prediction in a dataframe
'''
attr_index = 31
df = pd.DataFrame(columns=["seed", "attr_pred"])
sigmoid = torch.nn.Sigmoid().to(device)

for i in range(10000):
    z = torch.from_numpy(np.random.RandomState(i).randn(1, 512)).to(device)
    w = G.mapping(z,label, truncation_psi=truncation_psi)
    img = G.synthesis(w, noise_mode=noise_mode)
    img = F.interpolate(img, size=256)
    pred = sigmoid(regressor(img)).detach().cpu().numpy()[0,attr_index]
    new_row = {"seed": i, "attr_pred": pred}
    df = df.append(new_row, ignore_index=True)

In [7]:
'''
create dict with ranges 0.0-0.1, 0.1-0.2, ... 0.9-0.1 as keys
for each prediction lying in the corresponding range, add the seed to the dict
'''
hist_indices = {}
df1 = df.loc[df["attr_pred"] < 0.1]
hist_indices["0.0"] = df1["seed"].values
smallest_bucket = df1.shape[0]
for i in range(1,10):
    df1 = df.loc[df["attr_pred"] > 0.1*i]
    df1 = df1.loc[df["attr_pred"] < (0.1*i + 0.1)]
    print(df1.shape)
    if df1.shape[0] < smallest_bucket:
        smallest_bucket = df1.shape[0]
    hist_indices[str(0.1*i)] = df1["seed"].values

(394, 2)
(276, 2)
(225, 2)
(243, 2)
(245, 2)
(258, 2)
(356, 2)
(507, 2)
(6418, 2)


# resize the buckets randomly based on smallest bucket

In [11]:
hist_indices_same_bucketsize = {}
for key in hist_indices.keys():
    hist_indices_same_bucketsize[key] = random.sample(list(hist_indices[key]), smallest_bucket)

In [12]:
for key in hist_indices.keys():
    print(len(hist_indices_same_bucketsize[key]))

225
225
225
225
225
225
225
225
225
225


# Negative direction

In [13]:
for key in hist_indices.keys():
    pred_diff_list = []
    for seed in hist_indices_same_bucketsize[key]:
        z = torch.from_numpy(np.random.RandomState(int(seed)).randn(1, 512)).to(device)
        w = G.mapping(z,label, truncation_psi=truncation_psi)
        img_orig = G.synthesis(w, noise_mode=noise_mode)
        img_orig = F.interpolate(img_orig, size=256)
        pred_orig = sigmoid(regressor(img_orig)).detach().cpu().numpy()[0,attr_index]
        
        img_shifted = G.synthesis(w-attr_vec_ours_bs8_smile*0.5, noise_mode=noise_mode)
        img_shifted = F.interpolate(img_shifted, size=256)
        pred_shifted = sigmoid(regressor(img_shifted)).detach().cpu().numpy()[0,attr_index]
        pred_diff_list.append((pred_shifted - pred_orig))
    pred_array = np.array(pred_diff_list)
    print(key)
    print("min: ", pred_array.min())
    print("max: ", pred_array.max())
    print("mean: ", pred_array.mean())
    print("std: ", pred_array.std())
    print("-----------------------------")

0.0
min:  -0.094770476
max:  -0.0006498359
mean:  -0.030379059
std:  0.023912467
-----------------------------
0.1
min:  -0.19381678
max:  -0.07263972
mean:  -0.12602045
std:  0.028199092
-----------------------------
0.2
min:  -0.28738064
max:  -0.1216307
mean:  -0.22093503
std:  0.031071706
-----------------------------
0.30000000000000004
min:  -0.3887313
max:  -0.198825
mean:  -0.31653428
std:  0.035748452
-----------------------------
0.4
min:  -0.4832864
max:  -0.28887296
mean:  -0.40334848
std:  0.041945927
-----------------------------
0.5
min:  -0.5843334
max:  -0.33367547
mean:  -0.49347347
std:  0.04746863
-----------------------------
0.6000000000000001
min:  -0.68123406
max:  -0.3251984
mean:  -0.57301694
std:  0.063190326
-----------------------------
0.7000000000000001
min:  -0.7847696
max:  -0.33626977
mean:  -0.6521231
std:  0.08167345
-----------------------------
0.8
min:  -0.8579134
max:  -0.2658885
mean:  -0.70036405
std:  0.12245361
-----------------------------
0

# positive direction

In [14]:
for key in hist_indices.keys():
    pred_diff_list = []
    for seed in hist_indices_same_bucketsize[key]:
        z = torch.from_numpy(np.random.RandomState(int(seed)).randn(1, 512)).to(device)
        w = G.mapping(z,label, truncation_psi=truncation_psi)
        img_orig = G.synthesis(w, noise_mode=noise_mode)
        img_orig = F.interpolate(img_orig, size=256)
        pred_orig = sigmoid(regressor(img_orig)).detach().cpu().numpy()[0,attr_index]
        
        img_shifted = G.synthesis(w+attr_vec_ours_bs8_smile*0.5, noise_mode=noise_mode)
        img_shifted = F.interpolate(img_shifted, size=256)
        pred_shifted = sigmoid(regressor(img_shifted)).detach().cpu().numpy()[0,attr_index]
        pred_diff_list.append((pred_shifted - pred_orig))
    pred_array = np.array(pred_diff_list)
    print(key)
    print("min: ", pred_array.min())
    print("max: ", pred_array.max())
    print("mean: ", pred_array.mean())
    print("std: ", pred_array.std())
    print("-----------------------------")

0.0
min:  0.004406931
max:  0.9445577
mean:  0.46950766
std:  0.30198595
-----------------------------
0.1
min:  0.18348889
max:  0.89099896
mean:  0.74519557
std:  0.11797269
-----------------------------
0.2
min:  0.34053227
max:  0.793743
mean:  0.6875937
std:  0.08148353
-----------------------------
0.30000000000000004
min:  0.39869627
max:  0.6929902
mean:  0.61081654
std:  0.053080708
-----------------------------
0.4
min:  0.2478841
max:  0.5971608
mean:  0.52018034
std:  0.04760139
-----------------------------
0.5
min:  0.25236994
max:  0.49710792
mean:  0.43459255
std:  0.037518233
-----------------------------
0.6000000000000001
min:  0.24069077
max:  0.39814568
mean:  0.33639604
std:  0.03161344
-----------------------------
0.7000000000000001
min:  0.13123715
max:  0.29700238
mean:  0.2396993
std:  0.03014289
-----------------------------
0.8
min:  0.07303035
max:  0.1980195
mean:  0.14196308
std:  0.030491414
-----------------------------
0.9
min:  -0.0002591014
max:  0.