**Understanding the latent space in Generative Adversarial Networks - Controlled human image generation and editing**

# Loading repository and libraries

In [1]:
# if running on google colab
import os
!git clone https://github.com/samanthabiegel/stylegan.git
os.chdir("stylegan")

Cloning into 'stylegan'...
remote: Enumerating objects: 156, done.[K
remote: Total 156 (delta 0), reused 0 (delta 0), pack-reused 156[K
Receiving objects: 100% (156/156), 2.96 MiB | 945.00 KiB/s, done.
Resolving deltas: 100% (55/55), done.


In [2]:
import PIL
from PIL import Image
import matplotlib.pyplot as plt
import pickle
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import os
import config
import gzip
from encoder.generator_model import Generator
import dnnlib
import dnnlib.tflib as tflib
import collections

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Embedding, Flatten, Activation
from keras.optimizers import SGD
from sklearn.linear_model import LogisticRegression, SGDClassifier

import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from IPython.display import display
!jupyter nbextension enable --py widgetsnbextension

Using TensorFlow backend.


Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


# Loading data

We load a pre-trained StyleGAN generator. This version was trained with Flickr-Faces-HQ dataset at 1024×1024.

In [3]:
dnnlib.tflib.init_tf()

url = 'https://drive.google.com/uc?id=1AenmykaCbvZRMpM2mZj2QScW8eH5mWsW'
with dnnlib.util.open_url(url) as f:
    _G, _D, Gs = pickle.load(f)
    # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run.
    # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run.
    # Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot.
    
generator = Generator(Gs, batch_size=1, randomize_noise=False)

Downloading https://drive.google.com/uc?id=1AenmykaCbvZRMpM2mZj2QScW8eH5mWsW .... done


InvalidArgumentError: Cannot assign a device for operation learnable_dlatents/read: node learnable_dlatents/read (defined at /Users/samanthabiegel/Downloads/stylegan/encoder/generator_model.py:15) was explicitly assigned to /device:GPU:0 but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0 ]. Make sure the device specification refers to a valid device. The requested device appears to be a GPU, but CUDA is not enabled.
	 [[learnable_dlatents/read]]

Load the training data set. It was created in the following way:

* qlatents = np.random.normal(size=(1, 512))
* dlatents = Gs_network.components.mapping.run(qlatents, None, minibatch_size=1, randomize_noise=False, structure='fixed')
* images = Gs_network.components.synthesis.run(dlatents, minibatch_size=1, randomize_noise=False, output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), structure='fixed')


So qlatent_data represents the random input noise and dlatent_data represents the images in the intermediate latent space W. Labels_data represents the training labels, and includes the facial attribute values for each image, as well as facial landmark coordinates and the facial bounding box.

In [4]:
LATENT_TRAINING_DATA = 'https://drive.google.com/uc?id=1xMM3AFq0r014IIhBLiMCjKJJvbhLUQ9t'
    
with dnnlib.util.open_url(LATENT_TRAINING_DATA, cache_dir=config.cache_dir) as f:
    qlatent_data, dlatent_data, labels_data = pickle.load(gzip.GzipFile(fileobj=f))

Downloading https://drive.google.com/uc?id=1xMM3AFq0r014IIhBLiMCjKJJvbhLUQ9t .... done


Download dictionary of directions I've trained previously. Later on code is provided to obtain these directions, so they can easily modified. 

In [0]:
with open('directions_pretrained.p', 'rb') as handle:
    directions = pickle.load(handle)

# Data processing

Process the training labels data to get a facial attributes dictionary for each image.

In [6]:
def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)
  
attribute_dict = labels_data.copy()
  
for i, image in enumerate(attribute_dict):
  attribute_dict[i] = flatten(attribute_dict[i]['faceAttributes'])
  for item in attribute_dict[i]['hair_hairColor']:
    color = item['color']
    value = item['confidence']
    attribute_dict[i]['hair_hairColor_' + color] = value
  attribute_dict[i].pop('hair_hairColor')

# Optionally, show the training targets for the first image
attribute_dict[0]

{'accessories': [],
 'age': 50.0,
 'blur_blurLevel': 'low',
 'blur_value': 0.06,
 'emotion_anger': 0.0,
 'emotion_contempt': 0.0,
 'emotion_disgust': 0.0,
 'emotion_fear': 0.0,
 'emotion_happiness': 0.999,
 'emotion_neutral': 0.001,
 'emotion_sadness': 0.0,
 'emotion_surprise': 0.0,
 'exposure_exposureLevel': 'goodExposure',
 'exposure_value': 0.71,
 'facialHair_beard': 0.1,
 'facialHair_moustache': 0.1,
 'facialHair_sideburns': 0.1,
 'gender': 'male',
 'glasses': 'NoGlasses',
 'hair_bald': 0.11,
 'hair_hairColor_black': 0.23,
 'hair_hairColor_blond': 0.36,
 'hair_hairColor_brown': 1.0,
 'hair_hairColor_gray': 0.65,
 'hair_hairColor_other': 0.04,
 'hair_hairColor_red': 0.2,
 'hair_invisible': False,
 'headPose_pitch': 0.0,
 'headPose_roll': -0.4,
 'headPose_yaw': 3.1,
 'makeup_eyeMakeup': False,
 'makeup_lipMakeup': False,
 'noise_noiseLevel': 'low',
 'noise_value': 0.09,
 'occlusion_eyeOccluded': False,
 'occlusion_foreheadOccluded': False,
 'occlusion_mouthOccluded': False,
 'smile':

Create a few dictionaries for analyzing the training data. 

* y_data is a dictionary with an array of values for each attribute key.
* count_classes counts the number of unique values for each attribute key. This can be used to distinguish between continuous and discrete variables.
* type_dict lists the variable type for each attribute key.



In [0]:
y_data = {}
attribute_list = []

for feature in attribute_dict[0]:
  attribute_list.append(feature)
  var_name = 'y_' + feature
  y_data[var_name] = np.array([x[feature] for x in attribute_dict if feature in x])
  
count_classes = {}

y_data.pop('y_accessories')

for feature in y_data:
  count = len(np.unique(y_data[feature]))
  count_classes[feature] = count 
  
type_dict = {}
for feature in attribute_dict[0]:
  type_dict[feature] = type(attribute_dict[0][feature])
  
# Optionally, print the dictionaries created
# print(y_data)
# print(count_classes)
# print(type_dict)
# print(attribute_list)

In [8]:
np.unique(y_data['y_facialHair_beard'])

array([0. , 0.1, 0.4, 0.6, 0.9])

Create the input data set X_data and a different data set for the hair variables, since not all training images have information on the hair. Then create the target variables:
* if the target variable is continuous, calculate the mean for that variable accross all training images that have information on that variable. Then set the target to 1 for each image that is above the mean, and to 0 for each image that is below the mean.
* if the target variable is boolean, it's already binary and we can just train on the value.
* for the glasses attribute, we set it to 1 if the value is not 'NoGlasses' and to 0 otherwise.
* for the gender attribute, we set it to 1 if the value is 'male' and to 0 otherwise.
* 'headpose_pitch' has only one value, so we ignore it.
* ''blur_value', 'exposure_value', and 'noise_value' are not necessarily features we want to change, so we ignore it for now as well.

In [0]:
X_data = dlatent_data.reshape((-1, 18*512))

has_blondhair = ['hair_hairColor_blond' in x for x in attribute_dict]
X_hair_data = X_data[has_blondhair,:]

y_binary = {}

for attribute in attribute_list:
  
  if type_dict[attribute] == float:
    has_attribute = [attribute in x for x in attribute_dict]
    X_withattribute_data = X_data[has_attribute,:]
    if X_withattribute_data.shape[0] == X_data.shape[0]:
      mean_attribute = sum(y_data['y_'+attribute])/X_data.shape[0]
      y_binary[attribute] = np.array([x[attribute] > mean_attribute for x in attribute_dict])
    if X_withattribute_data.shape[0] != X_data.shape[0]:
      mean_attribute = sum(y_data['y_'+attribute])/X_hair_data.shape[0]
      y_binary[attribute] = np.array([x[attribute] > mean_attribute for x in attribute_dict if attribute in x])
      
  if type_dict[attribute] == bool:
    y_binary[attribute] = np.array([x[attribute] for x in attribute_dict])
    
  if attribute == 'glasses':
    y_binary['glasses'] = np.array([x['glasses'] != 'NoGlasses' for x in attribute_dict])

  if attribute == 'gender':
    y_binary['gender'] = np.array([x['gender'] == 'male' for x in attribute_dict])
    
for key in ['headPose_pitch', 'blur_value', 'exposure_value', 'noise_value']:
  y_binary.pop(key)
    
# Optionally, print the new dictionary of targets
# print(y_binary)

# Learning directions

Now that we have binary targets for all of the variables, we can loop through all of them, and train a logistic regression model per attribute. 

If you want to save the new directions for later use, use the bottom part to download it as a file. Then push to the repository as 'directions_pretrained'.

In [0]:
# %%time

# directions = {}

# for feature in y_binary.keys():
#   print("Training:", feature)
#   if feature not in directions:
#     clf = LogisticRegression(class_weight='balanced', solver='lbfgs')
#     if len(y_binary[feature]) != X_data.shape[0]:
#       clf.fit(X_hair_data.reshape((-1, 18*512)), y_binary[feature])
#     else:
#       clf.fit(X_data.reshape((-1, 18*512)), y_binary[feature])
#     directions[feature] = clf.coef_.reshape((18, 512))

# use below to save
# with open('directions.p', 'wb') as handle:
#     pickle.dump(directions, handle)

# from google.colab import files
# files.download('directions.p') 

# Optionally, print dictionary of learned directions
# print(directions)

# Finding latent representations

Here we process any images that are in the folder raw_images. If you want to experiment with your own images, just add them to this folder. We first extract and align faces from the images. Then we find the latent representations of these aligned images.

If you want to save the latent vectors for later use, download and save them to the latent_representations folder.

In [0]:
# os.mkdir('aligned_images')
# !python align_images.py raw_images/ aligned_images/
# !python encode_images.py aligned_images/ generated_images/ latent_representations/

# replace name for own images
# justinbieber = np.load('latent_representations/justinbieber_01.npy')
# os.chdir('latent_representations')
# from google.colab import files
# files.download('justinbieber_01.npy')
# %cd ..

justinbieber = np.load('ffhq_dataset/latent_representations/justinbieber_01.npy')

We also create a latent vector of all zeros. This represents the 'average face' of the dataset. Since we sample from a random normal distribution, the expected vector z is all zeros, which are not affected by the mapping network, so in intermediate latent space W it's all zeros as well. 

In [0]:
zero_latent_vector = np.zeros((18,512))

# Preparing to visualize results

Create functions that visualize the results of the shifts. Shift_latent loops through the list of features to change, and for each feature shifts the latent vector into that direction with a magnitude specified by the user. The final image is then shown.

In [0]:
def generate_image(latent_vector):
    latent_vector = latent_vector.reshape((1, 18, 512))
    generator.set_dlatents(latent_vector)
    img_array = generator.generate_images()[0]
    img = PIL.Image.fromarray(img_array, 'RGB')
    return img.resize((256, 256))

def shift_latent(latent_vector, key_dict, coeff_dict, feature_list, directions, n_coeffs):
  
    fig,ax = plt.subplots(1, n_coeffs, figsize=(15, 10), dpi=80)
    
    for i in range(n_coeffs):
      
        new_latent_vector = latent_vector.copy()
        for key in feature_list:
          coeff = coeff_dict[key][i]
          if key_dict[key] == 'coarse':
            new_latent_vector[:4] = (new_latent_vector + coeff*directions[key])[:4]
          if key_dict[key] == 'middle':
            new_latent_vector[4:8] = (new_latent_vector + coeff*directions[key])[4:8]
          if key_dict[key] == 'coarse_middle':
            new_latent_vector[:8] = (new_latent_vector + coeff*directions[key])[:8]
          if key_dict[key] == 'fine':
            new_latent_vector[8:] = (new_latent_vector + coeff*directions[key])[8:]
        
        ax[i].imshow(generate_image(new_latent_vector))
    [x.axis('off') for x in ax]
    
    if n_coeffs == 2:
      ax[0].set_title('original')
      ax[1].set_title('shifted')

    plt.show()

Create reference dictionaries:
* key_dict maps the attributes to the resolutions their shifts should be applied to. The attributes were assigned to a resolution level based on information in the paper as well as experimenting. If the shifts for an attribute don't work well, try assigning the attribute to a different resolution level.
* change_dict maps the attributes to a list that specifies by how much we want to shift the latent vector into the direction of the attribute. We start with all zeros, because we don't want to change anything unless specified later.

In [0]:
key_dict = {}

for key in ['glasses', 'headPose_roll', 'headPose_yaw', 'occlusion_foreheadOccluded', 'occlusion_mouthOccluded', 'occlusion_eyeOccluded']:
  key_dict[key] = 'coarse'
for key in ['makeup_eyeMakeup', 'makeup_lipMakeup']:
  key_dict[key] = 'middle'
for key in ['age', 'smile', 'gender', 'facialHair_beard', 'facialHair_moustache', 'facialHair_sideburns', 'emotion_anger', 'emotion_contempt', 'emotion_disgust', 
            'emotion_fear', 'emotion_happiness', 'emotion_neutral', 'emotion_sadness', 'emotion_surprise', 'hair_bald']:
  key_dict[key] = 'coarse_middle'
for key in ['hair_hairColor_black', 'hair_hairColor_blond', 'hair_hairColor_brown', 'hair_hairColor_red', 'hair_hairColor_gray', 'hair_hairColor_other']:
  key_dict[key] = 'fine'
  
change_dict = {}
for key in y_binary.keys():
  change_dict[key] = [0,0]
  
# Optionally, show dictionaries
# print(key_dict)
# print(change_dict)

# Interactive visualizations

Create interactive interface to experiment with shifting into different directions. It visualizes to images: the original on the left and the image with all changes applied on the right.

The range of possible magnitudes is set for each attribute after interact_manual, and can be altered by changing the min or max value.

In [15]:
@interact_manual(smile=widgets.FloatSlider(min=-1.0, max=1.0, readout=False), 
                 age=widgets.FloatSlider(min= -3, max=3, readout=False), 
                 emotion=['happiness', 'anger', 'contempt', 'disgust', 'fear', 'neutral', 'surprise', 'sadness'],
                 m_emotion=widgets.FloatSlider(min = 0, max=2, readout=False, description='emotion intensity'),
                 gender=widgets.FloatSlider(min=-2.0, max=2.0, description='female-male', readout=False),
                 beard=widgets.FloatSlider(min=-2.0, max=2.0, readout=False),
                 moustache=widgets.FloatSlider(min=-2.0, max=2.0, readout=False),
                 glasses=['remove', '', 'add'],
                 m_glasses=widgets.FloatSlider(min=0.0, max=1.0, description='glasses intensity', readout=False),
                 haircolor=['black', 'blond', 'brown', 'red', 'gray', 'other', ''],
                 bald=widgets.FloatSlider(min=-2.0, max=2.0, description='baldness', readout=False),
                 yaw=widgets.FloatSlider(min=-1.0, max=1.0, description='headpose yaw', readout=False),
                 roll=widgets.FloatSlider(min=-1.0, max=1.0, description='headpose roll', readout=False),
                 eyemakeup=widgets.FloatSlider(min=-2.0, max=2.0, description='eye makeup', readout=False),
                 lipmakeup=widgets.FloatSlider(min=-2.0, max=2.0, description='lip makeup', readout=False))
          

def play_with_shifts_light(smile=0, age=0, gender=0, beard=0, moustache=0, emotion='neutral', m_emotion=0, glasses='', m_glasses=0, haircolor='', bald=0, 
                      yaw=0, roll=0, eyemakeup=0, lipmakeup=0):
  n_coeffs = 2
  
  current_change_dict = change_dict.copy()
   
  current_change_dict['age'][1] = age
  current_change_dict['smile'][1] = smile
  current_change_dict['facialHair_beard'][1] = beard
  current_change_dict['facialHair_moustache'][1] = moustache
  current_change_dict['emotion_'+emotion][1] = m_emotion
  current_change_dict['gender'][1] = gender
  current_change_dict['hair_bald'][1] = bald
  current_change_dict['headPose_yaw'][1] = yaw
  current_change_dict['headPose_roll'][1] = roll
  current_change_dict['makeup_eyeMakeup'][1] = eyemakeup
  current_change_dict['makeup_lipMakeup'][1] = lipmakeup

  if glasses == 'add':
    current_change_dict['glasses'][1] = m_glasses
  if glasses == 'remove':
    current_change_dict['glasses'][1] = -m_glasses
  if haircolor != '':
    current_change_dict['hair_hairColor_'+haircolor][1] = 1
  
  change_features = []
  for key in current_change_dict:
    if np.count_nonzero(current_change_dict[key]) != 0:
      change_features.append(key)
        
  shift_latent(after_mapping, key_dict, current_change_dict, change_features, directions, n_coeffs)

interactive(children=(FloatSlider(value=0.0, description='smile', max=1.0, min=-1.0, readout=False), FloatSlid…

# Experiments

A different approach to creating the targets for hair color. Instead of setting 1 if the value is above the mean for that hair color, we compare the value to the other hair colors for that image. Only the hair color attribute with the highest value gets a 1, the others get a zero. Add this to the attribute_dict creation code to implement.

In [0]:
#   haircolor_confidence = [ v for k,v in attribute_dict[i].items() if 'hair_hairColor' in k]
#   haircolors = [ k for k,v in attribute_dict[i].items() if 'hair_hairColor' in k]
#   if haircolors == []:
#     continue
#   max_haircolor = haircolors[np.argmax(np.array(haircolor_confidence))]
#   for color in haircolors:
#     attribute_dict[i][color] = False
#   attribute_dict[i][max_haircolor] = True
  
# attribute_dict.pop('hair_hairColor')

Create different data sets for different types of glasses. The goal was to train separate models for normal glasses and sunglasses. However, the sunglass model did not work well at all, probably due to the small training set.

In [0]:
# has_readingglasses = np.array([x['glasses'] == 'ReadingGlasses' for x in attribute_dict])
# has_noglasses = np.array([x['glasses'] == 'NoGlasses' for x in attribute_dict])
# has_sunglasses = np.array([x['glasses'] == 'Sunglasses' for x in attribute_dict])
# glasses_data = has_readingglasses | has_noglasses
# X_glasses_data = X_data[glasses_data,:]
# sunglasses_data = has_sunglasses | has_noglasses
# X_sunglasses_data = X_data[sunglasses_data,:]

# add this to the attribute_list loop
# if attribute == 'glasses':
#   y_binary['glasses'] = np.array([x['glasses'] == 'ReadingGlasses' for x in attribute_dict if x['glasses'] == 'ReadingGlasses' or x['glasses'] == 'NoGlasses'])
#   y_binary['sunglasses'] = np.array([x['glasses'] == 'Sunglasses' for x in attribute_dict if x['glasses'] == 'NoGlasses' or x['glasses'] == 'Sunglasses'])

# use this for training the targets
# for feature in y_binary.keys():
#   clf = LogisticRegression(class_weight='balanced', solver='lbfgs')
#   if feature == 'glasses':
#     clf.fit(X_glasses_data.reshape((-1, 18*512)), y_binary[feature])
#   elif feature == 'sunglasses':
#     clf.fit(X_sunglasses_data.reshape((-1, 18*512)), y_binary[feature])

#   directions[feature] = clf.coef_.reshape((18, 512))    


Create different targets for the beard model. Since there are only 5 classes for the beard variable at different levels, I tried training a model for each of these classes. I could then use the coefficients of any of these models to obtain the beard direction. Nevertheless, I found that it worked better to just create targets based on the mean beard value.

In [0]:
# y_beard_data = np.array([x['facialHair_beard'] for x in attribute_dict])
# y_beard_data = y_beard_data*10
# y_beard_data.astype(int)
# y_blackhair_data = np.array([x['hair_hairColor_black'] for x in attribute_dict if 'hair_hairColor_black' in x])
# y_blondhair_data = np.array([x['hair_hairColor_blond'] for x in attribute_dict if 'hair_hairColor_blond' in x])

We could also use the pretrained directions for some of the attributes instead of using our own. I found it makes no difference.

In [0]:
# directions['smile'] = np.load('ffhq_dataset/latent_directions/smile.npy')
# directions['gender'] = np.load('ffhq_dataset/latent_directions/gender.npy')
# directions['age'] = np.load('ffhq_dataset/latent_directions/age.npy')

Experiments with other models for finding directions. They all took very long to train and did not give good results.

In [0]:
# clf = SGDClassifier('log', class_weight='balanced')
# clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')
# clf.fit(X_data.reshape((-1, 18*512)), y_beard_data)
# directions['beard'] = clf.coef_.reshape((-1, 18, 512))

# clf = LinearRegression()
# clf.fit(X_data.reshape((-1, 18*512)), y_bald_data)
# directions['baldness'] = clf.coef_.reshape((18, 512))

# clf = SVC()
# clf.fit(X_data.reshape((-1, 18*512)), y_eyemakeup_data)
# directions['eyemakeup'] = clf.coef_.reshape((18, 512))

# rgr = RandomForestRegressor()
# rgr.fit(X_data[:1000,:].reshape((-1, 18*512)), y_bald_data[:1000])
# bald_direction = rgr.coef_.reshape((18,512))


A different method of visualization. Here only one attribute at a time can be changed. Change the row numbers for the new latent vector to change the resolutions the shift should be applied to (0:4 for coarse, 4:8 for middle).

In [0]:
# def move_and_show(latent_vector, direction, coeffs):
#     fig,ax = plt.subplots(1, len(coeffs), figsize=(15, 10), dpi=80)
#     for i, coeff in enumerate(coeffs):
#         new_latent_vector = latent_vector.copy()
#         new_latent_vector[:8] = (latent_vector + coeff*direction)[:8]
#         ax[i].imshow(generate_image(new_latent_vector))
#         ax[i].set_title('Coeff: %0.1f' % coeff)
#     [x.axis('off') for x in ax]
#     plt.show()

# for i in range(2):
#     move_and_show(X_data.reshape((-1, 18, 512))[i], directions['gender'], [-4, -2, 0, 2, 4])

The next code should be part of the shift_latent function. It explores the idea of shifting for all emotion directions when the emotion is changed. So if we want to increasse for one emotion, we also decrease for the others. This only lead to weirder results though.


In [0]:
#     extended_feature_list = feature_list.copy()
#     for key in feature_list:
#       if 'emotion' in key:
#         emotion_list = ['emotion_anger', 'emotion_contempt', 'emotion_disgust', 
#             'emotion_fear', 'emotion_happiness', 'emotion_neutral', 'emotion_sadness', 'emotion_surprise']
#         for emotion in emotion_list:
#           if emotion != key:
#             coeff_dict[emotion] = [-1 * i for i in coeff_dict[key]]
#         emotion_list.remove(key)
#         extended_feature_list.extend(emotion_list)

#     feature_list = extended_feature_list.copy()

Handcrafted dictionary of optimal values for change coefficients for each attribute. 

In [0]:
# old_change_dict = {'makeup_eyeMakeup': [-6, 0, 6], 
#                'makeup_lipMakeup': [-2, 0, 2],
#                'age': [-2, 0, 2],
#                'gender': [-2, 0, 2],
#                'smile': [-2, 0, 2],
#                'headPose_roll': [0, 2, 4],
#                'headPose_yaw': [-1.5, 0, 1.5],
#                'facialHair_moustache': [-2, 0, 2],
#                'facialHair_beard': [-2, 0, 2],
#                'facialHair_sideburns': [-2, 0, 2],
#                'glasses': [-1, 0, 1],
#                'sunglasses': [-2, 0, 2],
#                'emotion_anger': [0, 2, 4],
#                'emotion_contempt': [0, 2, 4],
#                'emotion_disgust': [0, 2, 4],
#                'emotion_fear': [0, 0.5, 1],
#                'emotion_happiness': [0, 2, 4],
#                'emotion_neutral': [0, 2, 4],
#                'emotion_sadness': [0, 2, 4],
#                'emotion_surprise': [0, 2, 4],
#                'occlusion_foreheadOccluded': [-2, 0, 2],
#                'occlusion_eyeOccluded': [-2, 0, 2],
#                'occlusion_mouthOccluded': [-2, 0, 2],
#                'hair_bald': [-2, 0, 2],
#                'hair_invisible': [-2, 0, 2],
#                'hair_hairColor_brown': [-2, 0, 2],
#                'hair_hairColor_blond': [-2, 0, 2],
#                'hair_hairColor_black': [-2, 0, 2],
#                'hair_hairColor_red': [-2, 0, 2],
#                'hair_hairColor_gray': [-2, 0, 2],
#                'hair_hairColor_other': [-2, 0, 2]
#               }

# feature_list = ['gender']
# n_coeffs = 3

# for i in range(20):
#   shift_latent(X_data.reshape((-1, 18, 512))[i], key_dict, old_change_dict, feature_list, directions, n_coeffs)

This is a small start of implementing the text2image part of the model. It takes a text string as user input, and saves it as a variable. From here it is pretty straightforward to analyze the input text, and determine which features should be changed.

In [0]:
# input_text = widgets.Text()
# output_text = widgets.Text()

# def handle_submit(sender):
#   output_text.value = input_text.value
#   print(output_text.value)
  
# input_text.on_submit(handle_submit)

# input_text

In [0]:
# description = output_text.value
# description

Explore latent directions and representations.

In [0]:
# import seaborn
# import scipy

# plt.figure(figsize=(20,10))
# # heatmap = seaborn.heatmap(X_data[0,:].reshape(18,512)+directions['glasses'])
# heatmap = seaborn.heatmap(justinbieber)
# plt.show()

# plt.figure(figsize=(20,10))
# heatmap = seaborn.heatmap(X_data[0,:].reshape(18,512))
# # heatmap = seaborn.heatmap(taylor_swift)
# plt.show()

# plt.figure(figsize=(20,10))
# heatmap = seaborn.heatmap(X_data[0,:].reshape(18,512)-4*directions['gender'])
# # heatmap = seaborn.heatmap(taylor_swift)
# plt.show()

# plt.figure(figsize=(20,10))
# heatmap = seaborn.heatmap(directions['gender'])
# # heatmap = seaborn.heatmap(taylor_swift)
# plt.show()

# zero_latent_vector = scipy.sparse.random(m=18,n=512, density = .1)
# # zero_latent_vector = np.array(zero_latent_vector)

# new_latent_vector = np.random.normal(size=(1,512))
# after_mapping = Gs.components.mapping.run(new_latent_vector, None, minibatch_size=1, randomize_noise=False, structure='fixed')
# new_latent_vector = np.ones((18,512))-0.9
# print(new_latent_vector)

Interface with all attributes.

In [0]:
# @interact_manual(smile=widgets.FloatSlider(min=-1.0, max=1.0, readout=False), 
#                  age=widgets.FloatSlider(min= -3, max=3, readout=False), 
#                  emotion=['happy', 'anger', 'contempt', 'disgust', 'fear', 'neutral', 'surprise', 'sadness'],
#                  m_emotion=widgets.FloatSlider(min = 0, max=2, readout=False, description='emotion intensity'),
#                  gender=widgets.FloatSlider(min=-2.0, max=2.0, description='female-male', readout=False),
#                  beard=widgets.FloatSlider(min=-2.0, max=2.0, readout=False),
#                  moustache=widgets.FloatSlider(min=-2.0, max=2.0, readout=False),
#                  glasses=['remove', '', 'add'],
#                  m_glasses=widgets.FloatSlider(min=0.0, max=1.0, description='glasses intensity', readout=False),
#                  haircolor=['black', 'blond', 'brown', 'red', 'gray', 'other', ''],
#                  bald=widgets.FloatSlider(min=-1.0, max=1.0, description='baldness', readout=False),
#                  yaw=widgets.FloatSlider(min=-1.0, max=1.0, description='headpose yaw', readout=False),
#                  eyemakeup=widgets.FloatSlider(min=-1.0, max=1.0, description='eye makeup', readout=False),
#                  lipmakeup=widgets.FloatSlider(min=-1.0, max=1.0, description='lip makeup', readout=False))
          


# def play_with_shifts(smile=0, age=0, gender=0, beard=0, moustache=0, sideburns=0, emotion='neutral', m_emotion=0, glasses='', m_glasses=0, haircolor='', bald=0, 
#                      hairvisible=0, yaw=0, roll=0, eyemakeup=0, lipmakeup=0, foreheadocc=0, eyeocc=0, mouthocc=0):
#   n_coeffs = 2
  
#   current_change_dict = change_dict.copy()
   
#   current_change_dict['age'][1] = age
#   current_change_dict['smile'][1] = smile
#   current_change_dict['facialHair_beard'][1] = beard
#   current_change_dict['facialHair_moustache'][1] = moustache
#   current_change_dict['facialHair_sideburns'][1] = sideburns
#   current_change_dict['emotion_'+emotion][1] = m_emotion
#   current_change_dict['gender'][1] = gender
#   current_change_dict['hair_bald'][1] = bald
#   current_change_dict['hair_invisible'][1] = hairvisible
#   current_change_dict['headPose_yaw'][1] = yaw
#   current_change_dict['headPose_roll'][1] = roll
#   current_change_dict['makeup_eyeMakeup'][1] = eyemakeup
#   current_change_dict['makeup_lipMakeup'][1] = lipmakeup
#   current_change_dict['occlusion_foreheadOccluded'][1] = foreheadocc
#   current_change_dict['occlusion_eyeOccluded'][1] = eyeocc
#   current_change_dict['occlusion_mouthOccluded'][1] = mouthocc

#   if glasses == 'add':
#     current_change_dict['glasses'][1] = m_glasses
#   if glasses == 'remove':
#     current_change_dict['glasses'][1] = -m_glasses
#   if haircolor != '':
#     current_change_dict['hair_hairColor_'+haircolor][1] = 1
  
#   change_features = []
#   for key in current_change_dict:
#     if np.count_nonzero(current_change_dict[key]) != 0:
#       change_features.append(key)
        
#   shift_latent(zero_latent_vector, key_dict, current_change_dict, change_features, directions, n_coeffs)

Test a nonlinear model. It worked well too, but is much less practical to use than a linear model, where we just get coefficients to shift the latent vector by.

In [0]:
# model = Sequential()
# model.add(Dense(32, activation='relu'))
# model.add(Dropout(0.3))
# model.add(Dense(1))
# # model.add(Activation('sigmoid'))
# model.compile('adam', 'mean_squared_error', metrics=['accuracy'])

# model.fit(X_data.reshape((-1, 18*512)), y_beard_data, validation_split=0.2, epochs=5)
# model = Model(model.input, model.layers[-1].output)

In [0]:
# embedding_model = Sequential()
# embedding_model.add(Embedding(10, 18*512, input_length=1)) # it's actually just a variable
# embedding_model.add(Flatten())

# nonlinear_beard_model = Model(embedding_model.input, model(embedding_model.output))
# nonlinear_beard_model.layers[-1].trainable = False # fix non-linear model and train only embeddings
# nonlinear_beard_model.compile('sgd', 'mse')

# nonlinear_beard_model.layers[1].set_weights([X_data[:10].reshape((-1, 18*512))])
# y_data_real = nonlinear_beard_model.predict(np.arange(10))
# # y_data_real

In [0]:
# factors = [-40, -20, 0, 20, 40]

# for i, factor in enumerate(factors):
#   fig,ax = plt.subplots(1, 10, figsize=(15, 10), dpi=160)
#   nonlinear_beard_model.layers[1].set_weights([X_data[:10].reshape((-1, 18*512))])
#   nonlinear_beard_model.fit(np.arange(10), np.full((10, 1), factor), verbose=0, epochs=500)
#   for j, emb in enumerate(embedding_model.layers[0].get_weights()[0]):
#     ax[j].imshow(generate_image(emb))
# #     ax[j].set_title('Factor: %0.1f' % factor)
#   [x.axis('off') for x in ax]
#   plt.show()