論文  
https://arxiv.org/abs/2201.13433  
  
GitHub  
https://github.com/yuval-alaluf/stylegan3-editing  
  
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/StyleGAN3_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ランタイムの設定
「ランタイム」→「ランタイムのタイプを変更」→「ハードウェアアクセラレータ」をGPUに変更

# 実行方法
「ランタイム」→「すべてのセルを実行」を選択

# GPU確認

In [None]:
!nvidia-smi

# 環境セットアップ

## ライブラリのインポート

In [None]:
import os
from pathlib import Path

## GitHubからコードを取得

In [None]:
%cd /content

CODE_DIR = 'stylegan3-editing'
!git clone https://github.com/yuval-alaluf/stylegan3-editing

## ライブラリのインストール

In [None]:
!wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
!sudo unzip ninja-linux.zip -d /usr/local/bin/
!sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force

!pip install pyrallis
!pip install git+https://github.com/openai/CLIP.git

## ライブラリのインポート

In [None]:
%cd /content/{CODE_DIR}

import time
import sys
import pprint
import numpy as np
from PIL import Image
import dataclasses
import torch
import torchvision.transforms as transforms

sys.path.append(".")
sys.path.append("..")

from editing.interfacegan.face_editor import FaceEditor
from editing.styleclip_global_directions import edit as styleclip_edit
from models.stylegan3.model import GeneratorType
from notebooks.notebook_utils import Downloader, ENCODER_PATHS, INTERFACEGAN_PATHS, STYLECLIP_PATHS
from notebooks.notebook_utils import run_alignment, crop_image, compute_transforms
from utils.common import tensor2im
from utils.inference_utils import run_on_batch, load_encoder, get_average_image

%load_ext autoreload
%autoreload 2

# 学習済みモデルのセットアップ

In [None]:
## Downloaderの設定

In [None]:
download_with_pydrive = False #@param {type:"boolean"}
downloader = Downloader(code_dir=CODE_DIR,
                        use_pydrive=download_with_pydrive,
                        subdir="pretrained_models")

In [None]:
#@markdown 学習済みモデルの選択
experiment_type = 'restyle_pSp_ffhq' #@param ['restyle_e4e_ffhq', 'restyle_pSp_ffhq']

## 推論時パラメータ設定

In [None]:
EXPERIMENT_DATA_ARGS = {
    "restyle_pSp_ffhq": {
        "model_path": "./pretrained_models/restyle_pSp_ffhq.pt",
        "image_path": "./notebooks/images/face_image.jpg",
        "transform": transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
    },
    "restyle_e4e_ffhq": {
        "model_path": "./pretrained_models/restyle_e4e_ffhq.pt",
        "image_path": "./notebooks/images/face_image.jpg",
        "transform": transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
    }
}

EXPERIMENT_ARGS = EXPERIMENT_DATA_ARGS[experiment_type]

## 学習済みモデルのダウンロード
/content/stylegan3-editing/pretrained_models/restyle_pSp_ffhq.pt  をダウンロード

In [None]:
if not os.path.exists(EXPERIMENT_ARGS['model_path']) or os.path.getsize(EXPERIMENT_ARGS['model_path']) < 1000000:
    print(f'Downloading ReStyle encoder model: {experiment_type}...')
    try:
      downloader.download_file(file_id=ENCODER_PATHS[experiment_type]['id'],
                              file_name=ENCODER_PATHS[experiment_type]['name'])
    except Exception as e:
      raise ValueError(f"Unable to download model correctly! {e}")
    # if google drive receives too many requests, we'll reach the quota limit and be unable to download the model
    if os.path.getsize(EXPERIMENT_ARGS['model_path']) < 1000000:
        raise ValueError("Pretrained model was unable to be downloaded correctly!")
    else:
        print('Done.')
else:
    print(f'Model for {experiment_type} already exists!')


## 学習済みモデルのロード

In [None]:
model_path = EXPERIMENT_ARGS['model_path']
net, opts = load_encoder(checkpoint_path=model_path)
pprint.pprint(dataclasses.asdict(opts))

# テスト画像のアップロード
縦・横同じサイズの画像をアップロードしてください。

In [None]:
%cd /content/{CODE_DIR}

!mkdir upload_images
%cd upload_images
from google.colab import files
uploaded = files.upload()
uploaded = list(uploaded.keys())
dir_path = "/content/" + CODE_DIR + "/upload_images"
image_path = Path( os.path.join(dir_path, uploaded[0]) )

#image_path = Path(EXPERIMENT_DATA_ARGS[experiment_type]["image_path"]) # ウィル・スミス

original_image = Image.open(image_path).convert("RGB")
original_image = original_image.resize((256, 256))
original_image

# 顔画像の下処理

## 顔部分の整列(alignment)、クロップ(crop)

In [None]:
%cd /content/{CODE_DIR}

import dlib
from utils.alignment_utils import align_face


if not os.path.exists("./shape_predictor_68_face_landmarks.dat"):
  !wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
  !bzip2 -d shape_predictor_68_face_landmarks.dat.bz2

predictor = dlib.shape_predictor("./shape_predictor_68_face_landmarks.dat")
detector = dlib.get_frontal_face_detector()
print("Aligning image...")
input_image = align_face(filepath=str(image_path), detector=detector, predictor=predictor)

#input_image = run_alignment(image_path)
cropped_image = crop_image(image_path)
joined = np.concatenate([input_image.resize((256, 256)), cropped_image.resize((256, 256))], axis=1)
Image.fromarray(joined)

## LandmarkベースのTransform

In [None]:
images_dir = Path("./images")
images_dir.mkdir(exist_ok=True, parents=True)
cropped_path = images_dir / f"cropped_{image_path.name}"
aligned_path = images_dir / f"aligned_{image_path.name}"
cropped_image.save(cropped_path)
input_image.save(aligned_path)
landmarks_transform = compute_transforms(aligned_path=aligned_path, cropped_path=cropped_path)

# Inversion

In [None]:
n_iters_per_batch =  10#@param {type:"integer"}
opts.n_iters_per_batch = n_iters_per_batch
opts.resize_outputs = False  # generate outputs at full resolution

In [None]:
img_transforms = EXPERIMENT_ARGS['transform']
transformed_image = img_transforms(input_image)

avg_image = get_average_image(net)

with torch.no_grad():
    tic = time.time()
    result_batch, result_latents = run_on_batch(inputs=transformed_image.unsqueeze(0).cuda().float(),
                                                net=net,
                                                opts=opts,
                                                avg_image=avg_image,
                                                landmarks_transform=torch.from_numpy(landmarks_transform).cuda().float())
    toc = time.time()
    print('Inference took {:.4f} seconds.'.format(toc - tic))

In [None]:
def get_coupled_results(result_batch, cropped_image):
    result_tensors = result_batch[0]  # there's one image in our batch
    resize_amount = (256, 256) if opts.resize_outputs else (opts.output_size, opts.output_size)
    final_rec = tensor2im(result_tensors[-1]).resize(resize_amount)
    input_im = cropped_image.resize(resize_amount)
    res = np.concatenate([np.array(input_im), np.array(final_rec)], axis=1)
    res = Image.fromarray(res)
    return res

res = get_coupled_results(result_batch, cropped_image)
res.resize((1024, 512))

# 編集
Inversionで得られた潜在コードを編集

## boundaries, styleclipのダウンロード

In [None]:
download_with_pydrive = False #@param {type:"boolean"}

# download files for interfacegan
downloader = Downloader(code_dir=CODE_DIR,
                        use_pydrive=download_with_pydrive,
                        subdir="editing/interfacegan/boundaries/ffhq")
print("Downloading InterFaceGAN boundaries...")
for editing_file, params in INTERFACEGAN_PATHS.items():
    print(f"Downloading {editing_file} boundary...")
    downloader.download_file(file_id=params['id'],
                             file_name=params['name'])

# download files for styleclip
downloader = Downloader(code_dir=CODE_DIR,
                        use_pydrive=download_with_pydrive,
                        subdir="editing/styleclip_global_directions/sg3-r-ffhq-1024")
print("Downloading StyleCLIP auxiliary files...")
for editing_file, params in STYLECLIP_PATHS.items():
    print(f"Downloading {editing_file}...")
    downloader.download_file(file_id=params['id'],
                             file_name=params['name'])

## 編集パラメータ設定

In [None]:
editor = FaceEditor(stylegan_generator=net.decoder, generator_type=GeneratorType.ALIGNED)

#@markdown 編集パラメータ設定
edit_direction = 'Male' #@param ['age', 'smile', 'pose', 'Male']
min_value = -5 #@param {type:"slider", min:-10, max:10, step:1}
max_value = 5 #@param {type:"slider", min:-10, max:10, step:1}

## 編集実行

In [None]:
%%time

print(f"Performing edit for {edit_direction}...")
input_latent = torch.from_numpy(result_latents[0][-1]).unsqueeze(0).cuda()
edit_images, edit_latents = editor.edit(latents=input_latent,
                                        direction=edit_direction,
                                        factor_range=(min_value, max_value),
                                        user_transforms=landmarks_transform,
                                        apply_user_transformations=True)
print("Done!")

## 編集結果表示

In [None]:
def prepare_edited_result(edit_images):
  if type(edit_images[0]) == list:
      edit_images = [image[0] for image in edit_images]
  res = np.array(edit_images[0].resize((512, 512)))
  for image in edit_images[1:]:
      res = np.concatenate([res, image.resize((512, 512))], axis=1)
  res = Image.fromarray(res).convert("RGB")
  return res

res = prepare_edited_result(edit_images)
res

## 動画にまとめる

In [None]:
%cd /content/{CODE_DIR}
!rm -rf videos
!mkdir -p videos/frames

def create_video(edit_images):
  if type(edit_images[0]) == list:
    edit_images = [image[0] for image in edit_images]
  res = np.array(edit_images[0].resize((512, 512)))
  for i, image in enumerate( edit_images[1:] ):
    work_dir = "/content/" + CODE_DIR
    save_dir = os.path.join(work_dir, "videos/frames")
    filename = edit_direction + "_" + str(i) + ".png"
    filename = os.path.join(save_dir, filename)
    resize_img = image.resize((512, 512))
    resize_img.save(filename)

  framename = edit_direction + "_%d.png"
  framename = os.path.join(save_dir, framename)
  dst_video = os.path.join(work_dir, "videos/result.mp4")
  !!ffmpeg -i {framename} -c:v libx264 -vf "fps=25,format=yuv420p" {dst_video}

create_video(edit_images)

# StyleCLIP

In [None]:
styleclip_args = styleclip_edit.EditConfig()
global_direction_calculator = styleclip_edit.load_direction_calculator(stylegan_model=net.decoder, opts=styleclip_args)

## 編集パラメータ設定
neutral_textはデフォルトが望ましい

In [None]:
neutral_text = "a face" #@param {type:"raw"}
target_text = "a smiling face" #@param {type:"raw"}
alpha = 4 #@param {type:"slider", min:-5, max:5, step:0.5}
beta = 0.13 #@param {type:"slider", min:-1, max:1, step:0.1}

## 編集実行

In [None]:
%%time

opts = styleclip_edit.EditConfig()
opts.alpha_min = alpha
opts.alpha_max = alpha
opts.num_alphas = 1
opts.beta_min = beta
opts.beta_max = beta
opts.num_betas = 1
opts.neutral_text = neutral_text
opts.target_text = target_text

input_latent = result_latents[0][-1]
input_transforms = torch.from_numpy(landmarks_transform).cpu().numpy()
print(f'Performing edit for: "{opts.target_text}"...')
edit_res, edit_latent = styleclip_edit.edit_image(latent=input_latent,
                                                  landmarks_transform=input_transforms,
                                                  stylegan_model=net.decoder,
                                                  global_direction_calculator=global_direction_calculator,
                                                  opts=opts,
                                                  image_name=None,
                                                  save=False)
print("Done!")

In [None]:
input_im = tensor2im(transformed_image).resize((512, 512))
edited_im = tensor2im(edit_res[0]).resize((512, 512))
edit_coupled = np.concatenate([np.array(input_im), np.array(edited_im)], axis=1)
edit_coupled = Image.fromarray(edit_coupled)
edit_coupled.resize((1024, 512))