In [2]:
%%capture
%pip install ffmpeg-python imageio-ffmpeg
!git init .
!git remote add origin https://github.com/AliaksandrSiarohin/first-order-model
!git pull origin master
!git clone https://github.com/graphemecluster/first-order-model-demo demo

In [3]:
import imageio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")

In [4]:
source_image = imageio.imread('/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/01.png')
driving_video = imageio.mimread('/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/4.mp4')

In [5]:
source_image = resize(source_image, (256,256))[..., :3]
driving_video = [resize(frame, (256,256))[..., :3] for frame in driving_video]

In [6]:
def display(source, driving, generated = None):
  fig = plt.figure(figsize = (8+4*(generated is not None), 6))

  ims = []
  for i in range(len(driving)):
    cols = [source]
    cols.append(driving[i])
    if generated is not None:
      cols.append(generated[i])
    im = plt.imshow(np.concatenate(cols, axis=1),animated=True)
    plt.axis('off')
    ims.append([im])
  ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
  plt.close()
  return ani

HTML(display(source_image, driving_video).to_html5_video())

In [9]:
from demo import load_checkpoints
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml',
                                          checkpoint_path='/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/vox-cpk.pth.tar')

In [11]:
from demo import make_animation
from skimage import img_as_ubyte

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

imageio.mimsave('../generated.mp4', [img_as_ubyte(frame) for frame in predictions])

HTML(display(source_image, driving_video, predictions).to_html5_video())

  0%|          | 0/444 [00:00<?, ?it/s]

In [12]:
prediction = make_animation(source_image, driving_video, generator, kp_detector, relative=False, adapt_movement_scale=True)
HTML(display(source_image, driving_video, predictions).to_html5_video())

  0%|          | 0/444 [00:00<?, ?it/s]

In [15]:
!pip install playsound

Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: playsound
  Building wheel for playsound (setup.py) ... [?25l[?25hdone
  Created wheel for playsound: filename=playsound-1.3.0-py3-none-any.whl size=7019 sha256=92d8bdef20d91891a76b592c77391ec459794894a5566206c174680e733288fb
  Stored in directory: /root/.cache/pip/wheels/90/89/ed/2d643f4226fc8c7c9156fc28abd8051e2d2c0de37ae51ac45c
Successfully built playsound
Installing collected packages: playsound
Successfully installed playsound-1.3.0


In [19]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.3.2-py3-none-any.whl (28 kB)
Installing collected packages: gtts
Successfully installed gtts-2.3.2


In [35]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [48]:
from gtts import gTTS
import os
import imageio
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from IPython.display import HTML
import matplotlib.animation as animation
from skimage import img_as_ubyte
from demo import load_checkpoints, make_animation
from pydub import AudioSegment
from pydub.playback import play

In [49]:
# Đoạn văn bản bạn muốn bức ảnh nói chuyện
text_to_speak = 'Xin chào các bạn, tôi là một bức ảnh có thể nói chuyện.'

In [50]:
# Chuyển đoạn văn bản thành file âm thanh
tts = gTTS(text_to_speak, tld='com.vn', lang='vi')
tts.save('/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/hello.mp3')

In [51]:
# Chơi âm thanh
audio = AudioSegment.from_mp3('/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/hello.mp3')
play(audio)

In [52]:
# Đường dẫn đến hình ảnh nguồn và video lái
source_image_path = '/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/01.png'
driving_video_path = '/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/4.mp4'

In [53]:
# Đọc hình ảnh nguồn và video lái
source_image = imageio.imread(source_image_path)
driving_video = imageio.mimread(driving_video_path)

In [54]:
# Resize hình ảnh nguồn và video lái
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]

In [55]:
# Hiển thị video lái
def display(source, driving, generated=None):
    fig = plt.figure(figsize=(8 + 4 * (generated is not None), 6))
    ims = []
    for i in range(len(driving)):
        cols = [source]
        cols.append(driving[i])
        if generated is not None:
            cols.append(generated[i])
        im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
        plt.axis('off')
        ims.append([im])
    ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
    plt.close()
    return ani

HTML(display(source_image, driving_video).to_html5_video())

In [56]:
# Tải checkpoint của mô hình
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml',
                                          checkpoint_path='/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/vox-cpk.pth.tar')

In [57]:
# Tạo video dự đoán từ hình ảnh nguồn và video lái
predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True)

  0%|          | 0/444 [00:00<?, ?it/s]

In [58]:
# Lưu video dự đoán
imageio.mimsave('/content/drive/MyDrive/Colab Notebooks/first_order_model_demo/generated.mp4', [img_as_ubyte(frame) for frame in predictions])

In [59]:
# Hiển thị video dự đoán
HTML(display(source_image, driving_video, predictions).to_html5_video())