<a href="https://colab.research.google.com/github/sunyeul/ToyProjectLab/blob/feature%2Fdeepfake-like-hands-on-tutorial/DeepFake_like/20230410_DeepFake%E3%81%A3%E3%81%BB%E3%82%9A%E3%81%84%E3%81%AA%E6%8A%80%E8%A1%93%E3%81%AE%E7%B4%B9%E4%BB%8B%E3%83%BB%E3%83%8F%E3%83%B3%E3%82%B9%E3%82%99%E3%82%AA%E3%83%B3%E3%83%81%E3%83%A5%E3%83%BC%E3%83%88%E3%83%AA%E3%82%A2%E3%83%AB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Thin-Plate Spline Motion Model for Image Animation

- コードなしで使用
    - https://replicate.com/yoyo-nb/thin-plate-spline-motion-model
    - https://huggingface.co/spaces/CVPR/Image-Animation-using-Thin-Plate-Spline-Motion-Model

- ソースコード： https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model

- 論文： https://arxiv.org/abs/2203.14367
![](https://i0.wp.com/tt-tsukumochi.com/wp-content/uploads/2022/05/fig_.png?resize=1536%2C531&ssl=1)

## 結果例

![](https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model/blob/main/assets/vox.gif?raw=true)

![](https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model/raw/main/assets/ted.gif)

## コード・モデルダウンロード

In [None]:
!pip install -qq face_alignment imageio_ffmpeg
!git clone https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model.git

%cd Thin-Plate-Spline-Motion-Model
!mkdir checkpoints
!wget -q -c https://cloud.tsinghua.edu.cn/f/da8d61d012014b12a9e4/?dl=1 -O checkpoints/vox.pth.tar
#!wget -q -c https://cloud.tsinghua.edu.cn/f/483ef53650b14ac7ae70/?dl=1 -O checkpoints/ted.pth.tar
#!wget -q -c https://cloud.tsinghua.edu.cn/f/9ec01fa4aaef423c8c02/?dl=1 -O checkpoints/taichi.pth.tar
#!wget -q -c https://cloud.tsinghua.edu.cn/f/cd411b334a2e49cdb1e2/?dl=1 -O checkpoints/mgif.pth.tar

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for face_alignment (setup.py) ... [?25l[?25hdone
Cloning into 'Thin-Plate-Spline-Motion-Model'...
remote: Enumerating objects: 112, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 112 (delta 24), reused 18 (delta 18), pack-reused 76[K
Receiving objects: 100% (112/112), 32.65 MiB | 13.43 MiB/s, done.
Resolving deltas: 100% (48/48), done.
/content/Thin-Plate-Spline-Motion-Model


## 設定

![](https://user-images.githubusercontent.com/48593306/197152487-45d5198a-1e7d-4e73-8709-cf7621827d60.png)

In [None]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
dataset_name = 'vox' # ['vox', 'taichi', 'ted', 'mgif']
config_path = 'config/vox-256.yaml'
checkpoint_path = 'checkpoints/vox.pth.tar'
predict_mode = 'relative' # ['standard', 'relative', 'avd']
find_best_frame = True # when use the relative mode to animate a face, use 'find_best_frame=True' can get better quality result

pixel = 256 # for vox, taichi and mgif, the resolution is 256*256
if(dataset_name == 'ted'): # for ted, the resolution is 384*384
    pixel = 384

## ライブラリーインポート

In [None]:
import imageio
import imageio_ffmpeg
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML
import warnings
from skimage import img_as_ubyte

from demo import load_checkpoints, make_animation, find_best_frame as _find

warnings.filterwarnings("ignore")

inpainting, kp_detector, dense_motion_network, avd_network = load_checkpoints(config_path = config_path, checkpoint_path = checkpoint_path, device = device)

## 【オプション】 colabで動画録画

In [None]:
#@title
from IPython.display import display, Javascript,HTML
from google.colab.output import eval_js
from base64 import b64decode
 
def record_video(filename):
  js=Javascript("""
    async function recordVideo() {
      const options = { mimeType: "video/webm; codecs=vp9" };
      const div = document.createElement('div');
      const capture = document.createElement('button');
      const stopCapture = document.createElement("button");
       
      capture.textContent = "Start Recording";
      capture.style.background = "orange";
      capture.style.color = "white";
 
      stopCapture.textContent = "Stop Recording";
      stopCapture.style.background = "red";
      stopCapture.style.color = "white";
      div.appendChild(capture);
 
      const video = document.createElement('video');
      const recordingVid = document.createElement("video");
      video.style.display = 'block';
 
      const stream = await navigator.mediaDevices.getUserMedia({audio:true, video: {
        facingMode: "environment",
        aspectRatio: { exact: 1 }
      }});
     
      let recorder = new MediaRecorder(stream, options);
      document.body.appendChild(div);
      div.appendChild(video);
 
      video.srcObject = stream;
      video.muted = true;
 
      await video.play();
 
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
 
      await new Promise((resolve) => {
        capture.onclick = resolve;
      });
      recorder.start();
      capture.replaceWith(stopCapture);
 
      await new Promise((resolve) => stopCapture.onclick = resolve);
      recorder.stop();
      let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
      let arrBuff = await recData.data.arrayBuffer();
       
      // stop the stream and remove the video element
      stream.getVideoTracks()[0].stop();
      div.remove();
 
      let binaryString = "";
      let bytes = new Uint8Array(arrBuff);
      bytes.forEach((byte) => {
        binaryString += String.fromCharCode(byte);
      })
    return btoa(binaryString);
    }
  """)
  try:
    display(js)
    data=eval_js('recordVideo({})')
    binary=b64decode(data)
    with open(filename,"wb") as video_file:
      video_file.write(binary)
    print(f"Finished recording video at:{filename}")
  except Exception as err:
    print(str(err))

record_video('assets/capture.mp4')

<IPython.core.display.Javascript object>

Finished recording video at:assets/capture.mp4


## 【オプション】 DALL-E 2・MIDJOURNEYで顔の画像生成

## 前処理

- 画像と動画のサイズ調整

In [None]:
source_image_path = 'assets/source.png'
driving_video_path = 'assets/driving.mp4'

source_image = imageio.imread(source_image_path)
reader = imageio.get_reader(driving_video_path)

source_image = resize(source_image, (pixel, pixel))[..., :3]

fps = reader.get_meta_data()['fps']
driving_video = []
try:
    for im in reader:
        driving_video.append(im)
except RuntimeError:
    pass
reader.close()

driving_video = [resize(frame, (pixel, pixel))[..., :3] for frame in driving_video]

def display(source, driving, generated=None):
    fig = plt.figure(figsize=(8 + 4 * (generated is not None), 4))
    fig.subplots_adjust(bottom=0, top=1, left=0, right=1)

    ims = []
    for i in range(len(driving)):
        cols = [source]
        cols.append(driving[i])
        if generated is not None:
            cols.append(generated[i])
        im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
        plt.axis('off')
        ims.append([im])

    ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
    plt.close()
    return ani
    

HTML(display(source_image, driving_video).to_html5_video())

## 推論・結果の出力

In [None]:
output_video_path = 'assets/result.mp4'

if predict_mode=='relative' and find_best_frame:
    i = _find(source_image, driving_video, device=='cpu')
    print ("Best frame: " + str(i))

    driving_forward = driving_video[i:]
    driving_backward = driving_video[:(i+1)][::-1]

    predictions_forward = make_animation(source_image, driving_forward, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)
    predictions_backward = make_animation(source_image, driving_backward, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)
    
    predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
    predictions = make_animation(source_image, driving_video, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)

# save result video
imageio.mimsave(output_video_path, [img_as_ubyte(frame) for frame in predictions], fps=fps)

HTML(display(source_image, driving_video, predictions).to_html5_video())

Downloading: "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" to /root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth
100%|██████████| 85.7M/85.7M [00:10<00:00, 8.97MB/s]
Downloading: "https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip" to /root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip
100%|██████████| 91.9M/91.9M [00:10<00:00, 9.09MB/s]
169it [00:08, 20.50it/s]


Best frame: 14


100%|██████████| 155/155 [00:12<00:00, 12.67it/s]
100%|██████████| 15/15 [00:00<00:00, 15.04it/s]
