GitHub  
https://github.com/sberbank-ai/sber-swap/tree/main/models  
論文  
https://habr.com/ru/company/sberbank/blog/645919/  

<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/sber_swap_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 環境セットアップ

## GPUの確認

In [None]:
!nvidia-smi
!nvcc --version

## GitHubからCode Clone

In [None]:
%cd /content/
!git clone https://github.com/sberbank-ai/sber-swap.git
%cd sber-swap

## 学習済みモデルのダウンロード

In [None]:

# arcfaceのダウンロード
!wget -P ./arcface_model https://github.com/sberbank-ai/sber-swap/releases/download/arcface/backbone.pth
!wget -P ./arcface_model https://github.com/sberbank-ai/sber-swap/releases/download/arcface/iresnet.py

# landmarks detectorのダウンロード
!wget -P ./insightface_func/models/antelope https://github.com/sberbank-ai/sber-swap/releases/download/antelope/glintr100.onnx
!wget -P ./insightface_func/models/antelope https://github.com/sberbank-ai/sber-swap/releases/download/antelope/scrfd_10g_bnkps.onnx

# SberSwapのダウンロード
!wget -P ./weights https://github.com/sberbank-ai/sber-swap/releases/download/sber-swap-v2.0/G_unet_2blocks.pth

# SuperResolutionのダウンロード
!wget -P ./weights https://github.com/sberbank-ai/sber-swap/releases/download/super-res/10_net_G.pth

## ライブラリのインストール

In [None]:
!pip install mxnet-cu101mkl
!pip install onnxruntime-gpu==1.8
!pip install insightface==0.2.1
!pip install kornia==0.5.4

# ライブラリのインポート

In [None]:
import cv2
import torch
import time
import os

from utils.inference.image_processing import crop_face, get_final_image, show_images
from utils.inference.video_processing import read_video, get_target, get_final_video, add_audio_from_another_video, face_enhancement
from utils.inference.core import model_inference

from network.AEI_Net import AEI_Net
from coordinate_reg.image_infer import Handler
from insightface_func.face_detect_crop_multi import Face_detect_crop
from arcface_model.iresnet import iresnet100
from models.pix2pix_model import Pix2PixModel
from models.config_sr import TestOptions

from google.colab import files

# Modelのbuild, load

In [None]:
# 初期化
app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))

# modelのbuild
G = AEI_Net(backbone='unet', num_blocks=2, c_id=512)
G.eval()
G.load_state_dict(torch.load('weights/G_unet_2blocks.pth', map_location=torch.device('cpu')))
G = G.cuda()
G = G.half()

# arcfaceからface embeddingのロード
netArc = iresnet100(fp16=False)
netArc.load_state_dict(torch.load('arcface_model/backbone.pth'))
netArc=netArc.cuda()
netArc.eval()

# face landmarksのロード
handler = Handler('./coordinate_reg/model/2d106det', 0, ctx_id=0, det_size=640)

# use_sr=True -> 超解像(高解像度化)
use_sr = True
if use_sr:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    torch.backends.cudnn.benchmark = True
    opt = TestOptions()
    #opt.which_epoch ='10_7'
    model = Pix2PixModel(opt)
    model.netG.train()

# FaceSwap画像セットアップ

In [None]:
%cd /content/sber-swap

#@markdown ファイルをアップロードするか、  
#@markdown サンプルを使用するか選択してください。
#@markdown アップデートする場合初めに使用したい顔の画像をアップロードし、
#@markdown 次に顔を当てはめたい画像、または動画をアップロードしてください。
setting_type = 'upload' #@param ["upload", "sample"]

#@markdown videoか、imageのどちらの顔を交換するか選択してください。
target_type = 'image' #@param ["video", "image"]

source_path = 'examples/images/elon_musk.jpg'
target_path = 'examples/images/beckham.jpg'
path_to_video = 'examples/videos/nggyup.mp4'

if setting_type == 'upload':
  # 初めにsourceをアップロード
  upload_src = files.upload()
  upload_src = list(upload_src.keys())
  source_path = upload_src[0]
  
  upload_target = files.upload()
  upload_target = list(upload_target.keys())
  if target_type=='image':
    target_path = upload_target[0]
  else:
    path_to_video = upload_target[0]

In [None]:
source_full = cv2.imread(source_path)
OUT_VIDEO_NAME = "examples/results/result.mp4"
crop_size = 224

try:    
    source = crop_face(source_full, app, crop_size)[0]
    source = [source[:, :, ::-1]]
    print("Everything is ok!")
except TypeError:
    print("Bad source images")

# read video
if target_type == 'image':
    target_full = cv2.imread(target_path)
    full_frames = [target_full]
else:
    full_frames, fps = read_video(path_to_video)
target = get_target(full_frames, app, crop_size)

# FaceSwap実行
Source->顔を使用
Target->Sourceの顔を当てはめる

In [None]:
#@markdown #**Inference**
batch_size =  40#@param {type:"integer"}

In [None]:
START_TIME = time.time()

final_frames_list, crop_frames_list,full_frames, tfm_array_list = model_inference(
    full_frames,
    source,
    target,
    netArc,
    G,
    app,
    set_target = False,
    crop_size=crop_size,
    BS=batch_size
    )

if use_sr:
    final_frames_list = face_enhancement(final_frames_list, model)

if target_type == 'video':
  get_final_video(final_frames_list,
                  crop_frames_list,
                  full_frames,
                  tfm_array_list,
                  OUT_VIDEO_NAME,
                  fps, 
                  handler)
  
  add_audio_from_another_video(path_to_video, OUT_VIDEO_NAME, "audio")

  print(f'Full pipeline took {time.time() - START_TIME}')
  print(f"Video saved with path {OUT_VIDEO_NAME}")
else:
  result = get_final_image(final_frames_list, crop_frames_list, full_frames[0], tfm_array_list, handler)
  cv2.imwrite('examples/results/result.png', result)

# FaceSwap結果の表示

In [None]:
import matplotlib.pyplot as plt

if target_type == 'image':
  show_images(
      [source[0][:, :, ::-1], target_full, result], 
      ['Source Image', 'Target Image', 'Swapped Image'], 
      figsize=(20, 15))

In [None]:
from moviepy.editor import *
clip = VideoFileClip(OUT_VIDEO_NAME)
clip.ipython_display()