### **Generation and Manipulation of faces using Generative Adversarial Networks**
*Project for CSCI 630 - Foundations of Artificial Intelligence* 

---


*   [Aishwarya Rao](ar2711@rit.edu)
*   [Rishabh Manish Sahlot](rs3655@rit.edu)
*   [Anuj Kulkarni](ak8285@rit.edu)
*   [Shweta Vijay Wahane](sw9910@rit.edu)

The following notebook uses conditional  generative  ad-versarial  networks  to  generate  realistic  images  of faces based age.

In [0]:
#Import statements
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
import cv2
import dlib 
import os 
import scipy.misc
import numpy as np
from tqdm import tqdm 
import pickle
import random
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
#Dataset is stored in drive as a tar file, it is read and extracted here
downloaded = drive.CreateFile({'id':"19W6fziIc3pSywcgO6lL5tHZAynlV21RX"})   
downloaded.GetContentFile('data.tar.gz')
!tar -xvf data.tar.gz  

In [0]:
#Face recognition model to obtain embeddings from every face in dataset
downloaded = drive.CreateFile({'id':"1XLHHUD3qMEk2i8SEVoIOvgR3cDQCeR1q"})   
downloaded.GetContentFile('dlib_face_recognition_resnet_model_v1.dat.bz2')
# Landmark generator from dlib to identify facial landmarks
# Facial Landmarks are used by the dlib face recognition model
downloaded = drive.CreateFile({'id':"1Q7Bj4YRpIXU2WF5zrUbJe3AazUGi0lCt"})   
downloaded.GetContentFile('shape_predictor_68_face_landmarks.dat.bz2')
#Extract these for use 
!bzip2 dlib_face_recognition_resnet_model_v1.dat.bz2 --decompress
!bzip2 shape_predictor_68_face_landmarks.dat.bz2 --decompress

### **DATA PREPARATION**

---
Extracting face landmarks, getting embeddings and storing arrays of faces, corresponding age and face embedding. These will be used by our GAN model at a later stage.

In [0]:
#Image sizes are resized to 100
IMG_SIZE = 100

def get_age(filename):
  '''
  age is extracted from filename. 
  Filename format for the dataset is age_gender_race_timestamp
  Function returns quantized age as an integer
  '''
  minage = 0
  maxage = 116
  return int(filename.split("_")[0])//5

def get_dlib_embeddings(image):
  '''
  Takes in image, (grayscale)
  returns 128 dimension facial embedding using the dlib face recognition model
  '''
  
  face_detector = dlib.get_frontal_face_detector()
  shape_predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
  face_recognition_model = dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')
  
  #Detect face position
  detected_faces = face_detector(image, 1)
  #Extract landmarks for every face detected - in this case, only one face is present
  shapes_faces = [shape_predictor(image, face) for face in detected_faces]
  #Return embedding of the face using face recognition model
  return [np.array(face_recognition_model.compute_face_descriptor(image, face_pose, 1)) for face_pose in shapes_faces]

def load_data(path):
  '''
  Reads every image, extracts age, face embeddings and appends to array
  '''
  real_data = []
  age_values = []
  embeddings_face = []
  for image in tqdm(os.listdir(path)):
    image_array = cv2.imread(path+"/"+image)
    age_values.append(get_age(image))
    image_array = cv2.resize(image_array, (IMG_SIZE, IMG_SIZE))
    real_data.append(image_array)
    embeddings_face.append(get_dlib_embeddings(image_array))
  return real_data, age_values, embeddings_face

#Contains faces in real train data, corresponding age values and face embeddings
real_train_data, age_values, embeddings = load_data('UTKFace')

In [0]:
#Store to file for later use
pickle_out = open("embeddings.pickle","wb")
pickle.dump(embeddings, pickle_out)
pickle_out.close()
pickle_out = open("age.pickle","wb")
pickle.dump(age_values, pickle_out)
pickle_out.close()
pickle_out = open("faces.pickle","wb")
pickle.dump(real_train_data, pickle_out)
pickle_out.close()

### **CONDITONAL GAN MODEL**


---
Construct generator, discriminator, optimizers and loss functions. \\
Includes training function


In [0]:
%matplotlib inline
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torchvision import transforms
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import autograd
from torch.autograd import Variable
from torchvision.utils import make_grid
import matplotlib.pyplot as plt


In [0]:
#Preparing data for model's use
#Getting data from pickle files (data includes age values, array of faces and face embeddings)
downloaded = drive.CreateFile({'id':"1ZX4v97jWA_5iZciY50HLCVyJzKpitr62"})   #Prestored on cloud 
downloaded.GetContentFile('age.pickle')

downloaded = drive.CreateFile({'id':"1FPmDblDjK1W9fUTljV1MaEghV8nlpK8m"})   #Prestored on cloud
downloaded.GetContentFile('embeddings.pickle')

downloaded = drive.CreateFile({'id':"1G9cJXmKTPZPxzJfpJfM3bPh2NwEp5AdH"})   # Prestored on cloud
downloaded.GetContentFile('faces.pickle')

agevalues = pickle.load(open("age.pickle", "rb"))
embeddings = pickle.load(open("embeddings.pickle", "rb"))
faces = pickle.load(open("faces.pickle", "rb"))


remove = []
#Remove all images whose faces were not reconized by dlib - no facial embeddings
for index, arr in enumerate(embeddings):
  if len(arr)<1:
    remove.append(index)
#One hot encoding representation for ages 
agevalues =[age for j, age in enumerate(agevalues) if j not in remove]
tempage = [[0]*24 for _ in range(len(agevalues))]
for index, age in enumerate(agevalues):
  tempage[index][age]=1
agevalues = tempage

embeddings =[embed for j, embed in enumerate(embeddings) if j not in remove]
#Faces are of shape 64x64 and in grayscale to run model faster
faces =[cv2.cvtColor(cv2.resize(face, (64,64)), cv2.COLOR_BGR2GRAY) for j, face in enumerate(faces) if j not in remove]
#Convert to tensors for PyTorch
age = torch.Tensor(np.asarray(agevalues))
embeddings = np.asarray([arr[0] for arr in embeddings])
embed = torch.from_numpy(embeddings)
face = torch.Tensor(np.asarray(faces)/255.0)

#Use dataloader to prevent using the entire dataset in memory at once
dataset = torch.utils.data.TensorDataset(face, embed, age)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)


**DISCRIMINATOR MODEL**

In [0]:
#Conditional GAN discriminator code - takes in image and age label - concatenates them for prediction
#Returns a prediction - 0 if real, 1 is fake
class Discriminator(nn.Module):
  def __init__(self):
    super().__init__()
    self.model = nn.Sequential(
        nn.Conv2d(2,32,5,2), #2 channels - one for image, one for age label
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv2d(32,64,5,2),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Conv2d(64,128,5,2),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Flatten(1,-1),
        nn.Dropout(0.3),
        nn.Linear(3200,1,False),
        nn.Sigmoid() #squash output between 0 and 1
        )
    #embed age vector into a 64x64 value
    self.lab = nn.Linear(24,64*64*1,False)

  def forward(self, x, labels):
    x = x.view(x.size(0),1,64,64).float() #shape - batchsize x 1 x imgsize x imgsize
    c = self.lab(labels)
    c = c.view(labels.size(0),1,64,64).float()
    x = torch.cat([x, c], 1)
    out = self.model(x)
    return out.squeeze()

**GENERATOR MODEL**

In [0]:
#Generator component for conditional GAN - takes in face embedding and age condition as input
#Produces a 64x64 grayscale image
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        #Encode condition into a 1024 vector before feeding into convolutoion layers
        self.encoding = nn.Sequential(nn.Linear(128+24, 512),
            nn.ReLU(True),
            nn.Linear(512, 1024),
            nn.ReLU(True))
        self.model = nn.Sequential( 
            nn.ConvTranspose2d(1024, 64*8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(64* 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(64* 8, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d( 64 * 4, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64* 2),
            nn.ReLU(True),
            nn.ConvTranspose2d( 64 * 2, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d( 64, 1, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    
    def forward(self, z, labels):
        z = z.view(z.size(0), 128).float() #size - batchsize x 128
        c = labels.view(labels.size(0),24).float() #size - batchsize x 24
        x = torch.cat([z, c], 1)
        x= x.view(x.size(0), 152) #after concat - batchsize x (128+24)
        x = self.encoding(x.float()) #Encode condition before convolution model
        x = x.view(x.size(0), 1024, 1,1)
        out = self.model(x.float())
        
        return out

In [0]:
#Shift to GPU
generator = Generator().cuda()
discriminator = Discriminator().cuda()

#Loss 1 - used for generator and discriminator based on discriminator's prediction - increase realisticness of image
criterion = nn.BCELoss()
#Loss 2 - used for generator to reduce errors in image generation - increase similarity to original image
g_loss1 = torch.nn.L1Loss()
#Both models use Adam Optimizer
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=1e-4)
g_optimizer = torch.optim.Adam(generator.parameters(), lr=1e-4)

In [0]:
#Training of both generator and discriminator

#generator training 
def generator_train_step(batch_size, discriminator, generator, g_optimizer, criterion, embeddings, labels, real_images):
    g_optimizer.zero_grad()
    z = embeddings
    fake_labels = labels
    #generate fake imges from embeddings and age condition
    fake_images = generator(z, fake_labels)
    #get discriminator's prediction
    validity = discriminator(fake_images, fake_labels)
    #overall loss = discriminator loss + lambda * image similarity loss
    g_loss =criterion(validity, Variable(torch.zeros(batch_size)).cuda()) + 100*g_loss1(fake_images, real_images.view(real_images.shape[0], 1, 64,64)).cuda()
    #Propoage loss
    g_loss.backward()
    g_optimizer.step()
    return g_loss.data

#discriminator training
def discriminator_train_step(batch_size, discriminator, generator, d_optimizer, criterion, real_images, embeddings, labels):
    d_optimizer.zero_grad()
    # train with real images
    real_validity = discriminator(real_images, labels)
    real_loss = criterion(real_validity, Variable(torch.zeros(batch_size)).cuda())
    # train with fake images
    z = embeddings
    fake_labels = labels
    fake_images = generator(z, fake_labels)
    fake_validity = discriminator(fake_images, fake_labels)
    fake_loss = criterion(fake_validity, Variable(torch.ones(batch_size)).cuda())
    
    #discriminator loss = how far away from 0 fot real images + how far away from 1 for fake images
    d_loss = real_loss + fake_loss
    d_loss.backward()
    d_optimizer.step()
    return d_loss.data

In [0]:
#Overall GAN training
num_epochs = 1000
for epoch in range(num_epochs):
    print('Starting epoch {}...'.format(epoch))
    #set generator to train mode
    #by default, models are in train mode
    generator.train()
    #get data one batch at a time
    for i, (images, faceembed, labels) in enumerate(data_loader):
        #shift to gpu
        real_images = Variable(images).cuda()
        labels = Variable(labels).cuda()
        faceembeddings = Variable(faceembed).cuda()
        
        batch_size = real_images.size(0)
        #Train discriminator
        d_loss = discriminator_train_step(len(real_images), discriminator,
                                          generator, d_optimizer, criterion,
                                          real_images,faceembeddings, labels)
        #Train generator - since discriminator reaches 0 loss quickly, every step has more generator training
        for _ in range(5):
          #5 steps of generator to one step of discriminator
          g_loss = generator_train_step(batch_size, discriminator, generator, g_optimizer, criterion, faceembeddings, labels, real_images)
       
    print('g_loss: {}, d_loss: {}'.format(g_loss, d_loss))
    if epoch%10==0:
      #switch to evaluate model mode to display results
      generator.eval()
      #Pick random image from dataset 
      index = random.choice(range(len(embed)))
      z = embed[index].view(1, 128).cuda()
      real = faces[index]
      #Set random age as condition
      labels = Variable(torch.LongTensor(np.zeros((1, 24)))).cuda()
      labels[0][4]=1
      sample_images = generator(z, labels).unsqueeze(1).data.cpu()
      sample_images= sample_images.resize(64,64)
      #Display images
      sample_images = np.asarray(sample_images, dtype="float32")*255
      plt.imshow(sample_images, cmap="gray")
      plt.show()
      plt.imshow(real, cmap="gray")
      plt.show()
      #Save generator model weights in drive to use later 
      torch.save(generator.state_dict(), "/content/gdrive/My Drive/generator.pt")