# Demo for paper "First Order Motion Model for Image Animation"

**Load driving video and source image**

In [8]:
from alterego.demo import generate_animation

generate_animation('content/02.png', 'content/10-backward.mp4', "new_video")

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:06<00:00, 41.04it/s]


In [2]:
from alterego.demo import load_checkpoints
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', 
                            checkpoint_path='models/vox-cpk.pth.tar')

In [4]:
import imageio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML
import warnings
import time

from alterego.demo import make_animation
from skimage import img_as_ubyte

warnings.filterwarnings("ignore")

# Resize image and video to 256x256
t = time.time()

scale = 256

source_image = imageio.imread('content/02.png')
driving_video = imageio.mimread('content/10-backward.mp4', memtest="4GB")

source_image = resize(source_image, (scale, scale))[..., :3]
driving_video = [resize(frame, (scale, scale))[..., :3] for frame in driving_video]

print(time.time() - t)

3.9460110664367676


In [4]:
# Display source and destination

def display(source, driving, generated=None):
    fig = plt.figure(figsize=(8 + 4 * (generated is not None), 6))

    ims = []
    for i in range(len(driving)):
        cols = [source]
        cols.append(driving[i])
        if generated is not None:
            cols.append(generated[i])
        im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
        plt.axis('off')
        ims.append([im])

    ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
    plt.close()
    return ani


HTML(display(source_image, driving_video).to_html5_video())

**Create a model and load checkpoints**

**Perform image animation**

In [5]:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

#save resulting video
imageio.mimsave('content/generated_file.mp4', [img_as_ubyte(frame) for frame in predictions])
#video can be downloaded from /content folder
elapsed = time.time() - t

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:07<00:00, 38.38it/s]


array([[[0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        ...,
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078]],

       [[0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        ...,
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078]],

       [[0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        [0.11372549, 0.10196078, 0.10196078],
        ...,
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078],
        [0.10980392, 0.09019608, 0.10196078]],

       ...,

       [[0.14919881, 0.11780328, 0.13896597],
        [0.12403638, 0.09264085, 0.12006885],
        [0.10202958, 0

[array([[[0.56862745, 0.54509804, 0.57254902],
         [0.55686275, 0.53333333, 0.56078431],
         [0.53333333, 0.51764706, 0.55294118],
         ...,
         [0.51764706, 0.54509804, 0.48235294],
         [0.52941176, 0.54509804, 0.48627451],
         [0.52941176, 0.54509804, 0.48627451]],
 
        [[0.57254902, 0.54901961, 0.57647059],
         [0.56078431, 0.5372549 , 0.56470588],
         [0.54117647, 0.5254902 , 0.56078431],
         ...,
         [0.51372549, 0.54117647, 0.47843137],
         [0.5254902 , 0.54117647, 0.48235294],
         [0.52156863, 0.5372549 , 0.47843137]],
 
        [[0.58431373, 0.56078431, 0.58823529],
         [0.57254902, 0.54901961, 0.57647059],
         [0.54901961, 0.53333333, 0.56862745],
         ...,
         [0.50588235, 0.52156863, 0.4627451 ],
         [0.52156863, 0.52941176, 0.47058824],
         [0.51764706, 0.5254902 , 0.46666667]],
 
        ...,
 
        [[0.20784314, 0.20784314, 0.20784314],
         [0.2       , 0.2       , 0.2    

[array([[[0.1457848 , 0.13985887, 0.12620506],
         [0.12324839, 0.11036432, 0.10660549],
         [0.11916058, 0.10405936, 0.10644794],
         ...,
         [0.12001856, 0.09050936, 0.09884638],
         [0.11929057, 0.09294458, 0.09926532],
         [0.14799516, 0.11659938, 0.12821709]],
 
        [[0.13436368, 0.12288843, 0.10902149],
         [0.12643844, 0.10735502, 0.10912094],
         [0.12630878, 0.10600974, 0.1118809 ],
         ...,
         [0.12000528, 0.09256166, 0.09916157],
         [0.11607368, 0.09446882, 0.09946138],
         [0.12880515, 0.10348073, 0.11018545]],
 
        [[0.12824084, 0.11493948, 0.10541116],
         [0.1281078 , 0.10799076, 0.11430729],
         [0.12811908, 0.10730597, 0.11602279],
         ...,
         [0.12161189, 0.09293306, 0.10639834],
         [0.1159648 , 0.09339532, 0.10583311],
         [0.1230291 , 0.09620722, 0.1059139 ]],
 
        ...,
 
        [[0.12341735, 0.09993003, 0.09980986],
         [0.11022747, 0.08191836, 0.09140

AttributeError: 'NoneType' object has no attribute 'to_html5_video'

In [25]:
file = "dest_2"

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

#save resulting video
imageio.mimsave('content/_test_3/mix/generated_{}.mp4'.format(file), [img_as_ubyte(frame) for frame in predictions])
#video can be downloaded from /content folder
elapsed = time.time() - t

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:13<00:00, 27.95it/s]


In [15]:
file = "dest_3"

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

#save resulting video
imageio.mimsave('content/_test_3/mix/generated_{}.mp4'.format(file), [img_as_ubyte(frame) for frame in predictions])
#video can be downloaded from /content folder
elapsed = time.time() - t

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 191/191 [00:06<00:00, 28.76it/s]


In [10]:
file = "dest_4"

source_image = imageio.imread('content/_test_3/destination/{}.jpg'.format(file))
source_image = resize(source_image, (scale, scale))[..., :3]

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

#save resulting video
imageio.mimsave('content/_test_3/mix/generated_{}.mp4'.format(file), [img_as_ubyte(frame) for frame in predictions])
#video can be downloaded from /content folder
elapsed = time.time() - t

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 521/521 [00:18<00:00, 28.69it/s]


In [11]:
file = "dest_5"

source_image = imageio.imread('content/_test_3/destination/{}.jpg'.format(file))
source_image = resize(source_image, (scale, scale))[..., :3]

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

#save resulting video
imageio.mimsave('content/_test_3/mix/generated_{}.mp4'.format(file), [img_as_ubyte(frame) for frame in predictions])
#video can be downloaded from /content folder
elapsed = time.time() - t

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 521/521 [00:18<00:00, 27.57it/s]


**In the cell above we use relative keypoint displacement to animate the objects. We can use absolute coordinates instead,  but in this way all the object proporions will be inherited from the driving video. For example Putin haircut will be extended to match Trump haircut.**

In [9]:
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=False, adapt_movement_scale=True)
HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|████████████████████████████████████████████████████████████████████████████████| 521/521 [00:17<00:00, 29.81it/s]


## Running on your data

**First we need to crop a face from both source image and video, while simple graphic editor like paint can be used for cropping from image. Cropping from video is more complicated. You can use ffpmeg for this.**

In [27]:
!ffmpeg -i content/custom_input/destination.mp4 -ss 00:08:57.50 -t 00:00:08 -filter:v "crop=600:600:760:50" -async 1 hinton.mp4

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 9.3.1 (GCC) 20200621
  configuration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enab

**Another posibility is to use some screen recording tool, or if you need to crop many images at ones use face detector(https://github.com/1adrianb/face-alignment) , see https://github.com/AliaksandrSiarohin/video-preprocessing for preprcessing of VoxCeleb.** 

In [0]:
source_image = imageio.imread('/content/gdrive/My Drive/first-order-motion-model/09.png')
driving_video = imageio.mimread('hinton.mp4', memtest=False)


#Resize image and video to 256x256

source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True,
                             adapt_movement_scale=True)

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|██████████| 240/240 [00:08<00:00, 29.00it/s]
