<a href="https://colab.research.google.com/github/safwanp314/cari-net/blob/master/cyclegan_landmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
# !pip install tensorflow-addons

In [0]:
# !git clone https://github.com/lsaiml/CaVINet.git

In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow_addons.layers import InstanceNormalization, GroupNormalization
from tensorflow.keras.activations import relu
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import BinaryCrossentropy
import tensorflow.keras.backend as K

In [0]:
from sklearn.decomposition import PCA

In [0]:
import os
import sys
from PIL import Image
import cv2
import dlib
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('/content/drive/My Drive/Deep Learning/shape_predictor_68_face_landmarks.dat')

In [0]:
def find_files(directory, extension):
  try:
    file_list = []
    for file in os.listdir(directory):
      if file.endswith('.' + extension):
        file_list.append(os.path.abspath(directory + '/' + file))
      elif (os.path.isdir(directory + '/' + file)):
        file_list.extend(find_files(directory + '/' + file, extension))
    return file_list
  except:
    print('directory not found')

In [0]:
input_path = './CaVINet/CaVI_Dataset'
ext = 'jpg'
input_image_files = find_files(input_path, ext)
input_image_files.sort()

In [0]:
n_dim_landmark = 68

n_dim_landmark_pca = 32

n_dim_disc = 4

In [0]:
def rect_to_bb(rect):
	# take a bounding predicted by dlib and convert it
	# to the format (x, y, w, h) as we would normally do
	# with OpenCV
	x = rect.left()
	y = rect.top()
	w = rect.right() - x
	h = rect.bottom() - y
	# return a tuple of (x, y, w, h)
	return (x, y, w, h)

def to_numpy(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((n_dim_landmark, 2), dtype=dtype)
	# loop over the 68 facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, n_dim_landmark):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

In [13]:
landmarks = []
dataset = {}

num_of_files = len(input_image_files)
print('number of files', num_of_files)
for idx, file in enumerate(input_image_files):

  name_split = file.split('/')
  image_type = name_split[-3]
  personality = name_split[-2]

  # load the input image, resize it, and convert it to grayscale
  image = cv2.imread(file)
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # detect faces in the grayscale image
  rects = detector(gray, 1)
  # loop over the face detections
  for (i, rect) in enumerate(rects):
    # determine the facial landmarks for the face region, then
    # convert the facial landmark (x, y)-coordinates to a NumPy array
    shape = predictor(gray, rect)
    shape = to_numpy(shape)
  landmarks.append(shape)

  if (not personality in dataset):
    dataset[personality] = {}
  if (not image_type in dataset[personality]):
    dataset[personality][image_type] = []
  dataset[personality][image_type].append(shape)

  sys.stdout.write('\r%.2f %%' % ((idx + 1) * 100.0 / num_of_files))

number of files 12379
100.00 %

In [0]:
landmarks = np.array(landmarks).reshape((len(landmarks), n_dim_landmark * 2))

In [15]:
pca = PCA(n_components=n_dim_landmark_pca)
pca.fit(landmarks)

PCA(copy=True, iterated_power='auto', n_components=32, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [0]:
def build_generator(n_dim_landmark_pca=n_dim_landmark_pca):
  # Landmark generator architecture 
  inp = Input(shape=(n_dim_landmark_pca))
  x = Dense(64, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(inp)
  x = Dropout(0.1)(x)   

  x = Dense(128, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(x)
  x = Dropout(0.1)(x)   

  x = Dense(128, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(x)
  x = Dropout(0.1)(x)   

  x = Dense(64, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(x)
  x = Dropout(0.1)(x)   
      
  enc_landmarks = Dense(n_dim_landmark_pca, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(x)
  
  return Model(inp, enc_landmarks, name='landmark_generator')

In [0]:
def build_discriminator(n_dim_landmark_pca=n_dim_landmark_pca, n_dim_disc=n_dim_disc):
  # Landmark discriminator architecture 
  inp = Input(shape=(n_dim_landmark_pca))
  
  x = Dense(64, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(inp)
  x = LeakyReLU(alpha=0.2)(x)
  x = Dropout(0.1)(x)   

  x = Dense(32, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(x)
  x = LeakyReLU(alpha=0.2)(x)
  x = Dropout(0.1)(x)   

  x = Dense(16, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='relu')(x)
  x = LeakyReLU(alpha=0.2)(x)
  x = Dropout(0.1)(x)   
      
  enc_landmarks = Dense(n_dim_disc, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), activation='sigmoid')(x)
  
  return Model(inp, enc_landmarks, name='landmark_discriminator')

In [0]:
generator_g = build_generator()
generator_f = build_generator()

discriminator_x = build_discriminator()
discriminator_y = build_discriminator()

In [0]:
loss_obj = BinaryCrossentropy(from_logits=True)

In [0]:
def generator_loss(generated):
  return loss_obj(tf.ones_like(generated), generated)

In [0]:
def discriminator_loss(real, generated):
  real_loss = loss_obj(tf.ones_like(real), real)
  generated_loss = loss_obj(tf.zeros_like(generated), generated)
  total_disc_loss = real_loss + generated_loss
  return total_disc_loss * 0.5

In [0]:
def cycle_loss(real_image, cycled_image):
  return tf.reduce_mean(tf.abs(real_image - cycled_image))

In [0]:
def identity_loss(real_image, same_image):
  return tf.reduce_mean(tf.abs(real_image - same_image))

In [0]:
generator_g_optimizer = Adam(2e-4, beta_1=0.5)
generator_f_optimizer = Adam(2e-4, beta_1=0.5)

discriminator_x_optimizer = Adam(2e-4, beta_1=0.5)
discriminator_y_optimizer = Adam(2e-4, beta_1=0.5)

In [0]:
@tf.function
def train_step(real_x, real_y):
  # persistent is set to True because the tape is used more than
  # once to calculate the gradients.
  with tf.GradientTape(persistent=True) as tape:
    # Generator G translates X -> Y
    # Generator F translates Y -> X.
    
    fake_y = generator_g(real_x, training=True)
    cycled_x = generator_f(fake_y, training=True)

    fake_x = generator_f(real_y, training=True)
    cycled_y = generator_g(fake_x, training=True)

    # same_x and same_y are used for identity loss.
    same_x = generator_f(real_x, training=True)
    same_y = generator_g(real_y, training=True)

    disc_real_x = discriminator_x(real_x, training=True)
    disc_real_y = discriminator_y(real_y, training=True)

    disc_fake_x = discriminator_x(fake_x, training=True)
    disc_fake_y = discriminator_y(fake_y, training=True)

    # calculate the loss
    gen_g_loss = generator_loss(disc_fake_y)
    gen_f_loss = generator_loss(disc_fake_x)
    
    total_cycle_loss = cycle_loss(real_x, cycled_x) + cycle_loss(real_y, cycled_y)
    
    # Total generator loss = adversarial loss + cycle loss
    total_gen_g_loss = gen_g_loss + total_cycle_loss + identity_loss(real_y, same_y)
    total_gen_f_loss = gen_f_loss + total_cycle_loss + identity_loss(real_x, same_x)

    disc_x_loss = discriminator_loss(disc_real_x, disc_fake_x)
    disc_y_loss = discriminator_loss(disc_real_y, disc_fake_y)
  
  # Calculate the gradients for generator and discriminator
  generator_g_gradients = tape.gradient(total_gen_g_loss, generator_g.trainable_variables)
  generator_f_gradients = tape.gradient(total_gen_f_loss, generator_f.trainable_variables)
  discriminator_x_gradients = tape.gradient(disc_x_loss, discriminator_x.trainable_variables)
  discriminator_y_gradients = tape.gradient(disc_y_loss, discriminator_y.trainable_variables)
  
  # Apply the gradients to the optimizer
  generator_g_optimizer.apply_gradients(zip(generator_g_gradients, generator_g.trainable_variables))
  generator_f_optimizer.apply_gradients(zip(generator_f_gradients, generator_f.trainable_variables))
  discriminator_x_optimizer.apply_gradients(zip(discriminator_x_gradients, discriminator_x.trainable_variables))
  discriminator_y_optimizer.apply_gradients(zip(discriminator_y_gradients, discriminator_y.trainable_variables))


In [0]:
train_dataset = dataset.keys()[0:10]

for idx, personality in enumerate(train_dataset):
  
  real = dataset[personality]['Real']
  caricature = dataset[personality]['Caricature']

  n_real = len(real)
  n_caricature = len(caricature)
  n_combination = max(int(0.25 * n_real * n_caricature), 2)
  
  for i in range(n_combination):
    image_x = choice(real).reshape((1, n_dim_landmark * 2))
    image_x = pca.transform(image_x).astype('float32')
    
    image_y = choice(caricature).reshape((1, n_dim_landmark * 2))
    image_y = pca.transform(image_y).astype('float32')

    train_step(image_x, image_y)