# Setup

In [None]:
# Some env variables

# Location to save data
STORAGE_DIR = "/content/drive/MyDrive/ifgan_workdir"
IMAGE_DIR = STORAGE_DIR + "/stylegan_generated"

# Working directory (where to clone repositories)
WORK_DIR = "/content"

# How many images to generate using StyleGAN
NUMBER_OF_IMAGES = 10000

In [None]:
# Mount drive folder (if used as storage location)
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Use old version of tensorflow supported by StyleGAN1
%tensorflow_version 1.x
import tensorflow as tf

In [None]:
# Clone repositories
%cd {WORK_DIR}

!rm -rf img2pose
!git clone https://github.com/vitoralbiero/img2pose

!rm -rf interfacegan
!git clone https://github.com/genforce/interfacegan


In [None]:
# Get stylegan model for interfacegan
%cd {WORK_DIR}/interfacegan
!rm -f models/pretrain/stylegan_ffhq.pth

!wget https://www.dropbox.com/s/qyv37eaobnow7fu/stylegan_ffhq.pth?dl=1 -O models/pretrain/stylegan_ffhq.pth

In [None]:
# img2pose setup
%cd {WORK_DIR}/img2pose/Sim3DR/

!sh build_sim3dr.sh
%cd ..

# Generate images using interfacegan

In [None]:
%cd {WORK_DIR}/interfacegan
!python generate_data.py -m stylegan_ffhq -o {IMAGE_DIR} -n {NUMBER_OF_IMAGES}

# Run pose estimation

In [None]:
# This is a modified script based on: https://github.com/vitoralbiero/img2pose/blob/main/evaluation/jupyter_notebooks/test_own_images.ipynb
%cd {WORK_DIR}/img2pose

import sys
import numpy as np
import torch
from torchvision import transforms
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
from PIL import Image, ImageOps
import matplotlib.patches as patches
from scipy.spatial.transform import Rotation
import pandas as pd
from scipy.spatial import distance
import time
import os
import math
import scipy.io as sio
from utils.renderer import Renderer
from utils.image_operations import expand_bbox_rectangle
from utils.pose_operations import get_pose
from img2pose import img2poseModel
from model_loader import load_model

np.set_printoptions(suppress=True)

# Create the renderer for visualization (skip?)
renderer = Renderer(
    vertices_path="pose_references/vertices_trans.npy", 
    triangles_path="pose_references/triangles.npy"
)

# Load model weights
threed_points = np.load('pose_references/reference_3d_68_points_trans.npy')

transform = transforms.Compose([transforms.ToTensor()])

DEPTH = 18
MAX_SIZE = 1400
MIN_SIZE = 600

POSE_MEAN = STORAGE_DIR + "/img2pose_models/WIDER_train_pose_mean_v1.npy"
POSE_STDDEV = STORAGE_DIR + "/img2pose_models/WIDER_train_pose_stddev_v1.npy"
MODEL_PATH = STORAGE_DIR + "/img2pose_models/img2pose_v1.pth"

pose_mean = np.load(POSE_MEAN)
pose_stddev = np.load(POSE_STDDEV)

img2pose_model = img2poseModel(
    DEPTH, MIN_SIZE, MAX_SIZE, 
    pose_mean=pose_mean, pose_stddev=pose_stddev,
    threed_68_points=threed_points,
)
load_model(img2pose_model.fpn_model, MODEL_PATH, cpu_mode=str(img2pose_model.device) == "cpu", model_only=True)
img2pose_model.evaluate()

# Load latent vectors - we need to drop any we can't compute the pose for
latent_vectors_in_z = np.load(os.path.join(IMAGE_DIR, "z.npy"))
latent_vectors_in_w = np.load(os.path.join(IMAGE_DIR, "w.npy"))
latent_vectors_out_z = np.empty((0,512))
latent_vectors_out_w = np.empty((0,512))

# Run the estimation

threshold = 0.9

img_paths = [os.path.join(IMAGE_DIR, img_path) for img_path in os.listdir(IMAGE_DIR)]

# Uncomment to test quickly on 10 images
# img_paths = img_paths[1:100]

ifgan_data = np.empty((0,1))
all_angles = np.empty((0,6))

for img_path in tqdm(img_paths):
  if img_path.endswith(".jpg"):
    img = Image.open(img_path).convert("RGB")
    
    (w, h) = img.size
    image_intrinsics = np.array([[w + h, 0, w // 2], [0, w + h, h // 2], [0, 0, 1]])
            
    res = img2pose_model.predict([transform(img)])[0]

    all_bboxes = res["boxes"].cpu().numpy().astype('float')
            
    image_name = os.path.split(img_path)[1]
    image_name_noext = os.path.splitext(image_name)[0]
    image_number = int(image_name_noext)

    best_score = 0;
    best_pose_pred = None

    for i in range(len(all_bboxes)):
      current_score = res["scores"][i] 
      if current_score > threshold and current_score > best_score:
        best_pose_pred = res["dofs"].cpu().numpy()[i].astype('float').squeeze()
        best_score = current_score
    
    if best_pose_pred is not None:
        image_direction = 0 if best_pose_pred[0] >= 0 else 1

        ifgan_data = np.append(ifgan_data, [[image_direction]], 0)
        latent_vectors_out_z = np.append(latent_vectors_out_z, [latent_vectors_in_z[image_number]], 0)
        latent_vectors_out_w = np.append(latent_vectors_out_w, [latent_vectors_in_w[image_number]], 0)
        all_angles = np.append(all_angles, [best_pose_pred], 0)

# These are the scores used to create boundaries
np.save(STORAGE_DIR + "/face_pitch_scores.npy", ifgan_data)

# z space and w space latent vectors (these should have the same amount of samples as there are scores in ifgan_data)
np.save(STORAGE_DIR + "/face_pitch_latents_z.npy", latent_vectors_out_z)
np.save(STORAGE_DIR + "/face_pitch_latents_w.npy", latent_vectors_out_w)

# All angles and translations predicted by img2pose (for stats and later use)
np.save(STORAGE_DIR + "/all_face_angles.npy", all_angles)

# Create boundaries

In [None]:
# What thresholds to use when filtering & generating boundaries
degree_thresholds = [0, 5, 10, 15, 20]


In [None]:
%cd {WORK_DIR}/interfacegan

import numpy as np
import pandas
import math
from IPython.display import Image

def ShowTable(data):
  print(pandas.DataFrame(data))

scores = np.load(STORAGE_DIR + "/face_pitch_scores.npy")
latent_vectors_z = np.load(STORAGE_DIR + "/face_pitch_latents_z.npy")
latent_vectors_w = np.load(STORAGE_DIR + "/face_pitch_latents_w.npy")
face_angles = np.load(STORAGE_DIR + "/all_face_angles.npy")

pitches = face_angles[:,0]

print("Total looking up: ", np.sum(pitches < 0))
print("Total looking down: ", np.sum(pitches >= 0))

# Average pitch angle
print(np.average(pitches))


# Find min and max angle
print("Max angle (down): ", math.degrees(np.max(pitches)))
print("Min angle (up)  : ", math.degrees(np.min(pitches)))

# Filter samples based on thresholds and create separate files
for threshold in degree_thresholds:
  radian_threshold = math.radians(threshold)

  filtered_scores = np.empty((0,1))
  filtered_latent_z = np.empty((0,512))
  filtered_latent_w = np.empty((0,512))

  for idx, face_angle in enumerate(face_angles):
    if abs(face_angle[0]) > radian_threshold:
      filtered_scores = np.append(filtered_scores, [scores[idx]], 0)
      filtered_latent_z = np.append(filtered_latent_z, [latent_vectors_z[idx]], 0)
      filtered_latent_w = np.append(filtered_latent_w, [latent_vectors_w[idx]], 0)

  print("Threshold: ", str(threshold))
  print("Samples:   " + str(len(filtered_scores)))
  print("Looking up:  " + str(np.sum(filtered_scores == 0)))
  print("Looking down:  " + str(np.sum(filtered_scores == 1)))

  base_filepath = STORAGE_DIR + "/filter_" + str(threshold)
  np.save(base_filepath + "_face_pitch_scores.npy", filtered_scores)
  np.save(base_filepath + "_face_pitch_latents_z.npy", filtered_latent_z)
  np.save(base_filepath + "_face_pitch_latents_w.npy", filtered_latent_w)


In [None]:
# Generate boundaries for each threshold

%cd {WORK_DIR}/interfacegan

OUT_DIR = "boundaries/stylegan_ffqh_pitch"

!rm -rf {OUT_DIR}

for threshold in degree_thresholds:
  !python train_boundary.py \
      -o {OUT_DIR}_{threshold} \
      -c {STORAGE_DIR}/filter_{threshold}_face_pitch_latents_w.npy \
      -s {STORAGE_DIR}/filter_{threshold}_face_pitch_scores.npy \
      -n 0.2



# Test boundary

In [None]:
import numpy as np
import random
import os

num_samples = 5
all_w_samples = np.load(os.path.join(IMAGE_DIR, "w.npy"))
my_w_samples = np.empty((num_samples,512))

# Pull some random W samples
for i in range(0, num_samples):
  sampleIndex = random.randint(0, NUMBER_OF_IMAGES-1)
  my_w_samples[i] = all_w_samples[sampleIndex]

print(my_w_samples)
np.save(STORAGE_DIR + "/random_w_samples.npy", my_w_samples);

In [None]:
# Generate images based on all samples from the previous step, and all thresholds
%cd {WORK_DIR}/interfacegan

for threshold in degree_thresholds:
  !python edit.py \
      -m stylegan_ffhq \
      -b boundaries/stylegan_ffqh_pitch_{threshold}/boundary.npy \
      -i {STORAGE_DIR}/random_w_samples.npy \
      -o {STORAGE_DIR}/pitch_editing_degthresh_{threshold} \
      -s 'w' \
      --start_distance -2.0 \
      --end_distance 2.0 \
      --steps 10


In [None]:
# Combine generated images into 1 for each sample
from PIL import Image

for subject in range(0, 5):
  blank_image = Image.new("RGB", (1000, 500))
  
  for idy, threshold in enumerate(degree_thresholds):
    fname = STORAGE_DIR + "/pitch_editing_degthresh_" + str(threshold)
    for idx in range(0, 10):
      iname = fname + "/" + str(subject).zfill(3) + "_" + str(idx).zfill(3) + ".jpg" 
      image = Image.open(iname)
      blank_image.paste(image.resize((100, 100)), (idx*100,idy*100))  
  
  display(blank_image)
