# Check your Google Colab settings

In [None]:
#@markdown Check type of GPU and VRAM available.
## make sure you are using a runtime with GPU
## you can check at Runtime/Change runtime type in the top bar.

!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

Tesla T4, 15109 MiB, 15109 MiB


In [None]:
#@markdown (OPTIONAL) Mount your google drive folder and save files locally 

## this will require some changes in code to include different filepaths
from google.colab import drive
drive.mount('/content/drive')


# 1. Sim Swap

This part allows you to create a Deepfake given a source video and a target image.

Code : https://github.com/neuralchen/SimSwap.  
Paper: https://arxiv.org/pdf/2106.06340v1.pdf 

In [None]:
#@title Installation

!git clone https://github.com/neuralchen/SimSwap
!cd SimSwap && git pull

In [None]:
!pip install insightface==0.2.1 onnxruntime moviepy
!pip install googledrivedownloader
!pip install imageio==2.4.1

In [None]:
import os
os.chdir("SimSwap")
!ls

from google_drive_downloader import GoogleDriveDownloader

### If google drive link is not permenant, you can use ID from open url.
# GoogleDriveDownloader.download_file_from_google_drive(file_id='1TLNdIufzwesDbyr_nVTR7Zrx9oRHLM_N',
#                                     dest_path='./arcface_model/arcface_checkpoint.tar')
# GoogleDriveDownloader.download_file_from_google_drive(file_id='1PXkRiBUYbu1xWpQyDEJvGKeqqUFthJcI',
#                                     dest_path='./checkpoints.zip')

!wget -P ./arcface_model https://github.com/neuralchen/SimSwap/releases/download/1.0/arcface_checkpoint.tar
!wget https://github.com/neuralchen/SimSwap/releases/download/1.0/checkpoints.zip
!unzip ./checkpoints.zip  -d ./checkpoints
!wget -P ./parsing_model/checkpoint https://github.com/neuralchen/SimSwap/releases/download/1.0/79999_iter.pth

In [None]:
### Now onedrive file can be downloaded in Colab directly!
!wget --no-check-certificate "https://sh23tw.dm.files.1drv.com/y4mmGiIkNVigkSwOKDcV3nwMJulRGhbtHdkheehR5TArc52UjudUYNXAEvKCii2O5LAmzGCGK6IfleocxuDeoKxDZkNzDRSt4ZUlEt8GlSOpCXAFEkBwaZimtWGDRbpIGpb_pz9Nq5jATBQpezBS6G_UtspWTkgrXHHxhviV2nWy8APPx134zOZrUIbkSF6xnsqzs3uZ_SEX_m9Rey0ykpx9w" -O antelope.zip
!unzip ./antelope.zip -d ./insightface_func/models/

In [None]:
#@title Inference

import cv2
import torch
import fractions
import numpy as np
from PIL import Image
import torch.nn.functional as F
from torchvision import transforms
from models.models import create_model
from options.test_options import TestOptions
from insightface_func.face_detect_crop_multi import Face_detect_crop
from util.videoswap import video_swap
from util.add_watermark import watermark_image

In [None]:
transformer = transforms.Compose([
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

transformer_Arcface = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

detransformer = transforms.Compose([
        transforms.Normalize([0, 0, 0], [1/0.229, 1/0.224, 1/0.225]),
        transforms.Normalize([-0.485, -0.456, -0.406], [1, 1, 1])
    ])

In [None]:
#@markdown Path of the source video.
SOURCE_VIDEO = "./demo_file/test.mp4" #@param {type:"string"}

#@markdown Path of the target image.
TARGET_IMAGE = "./demo_file/Iron_man.jpg" #@param {type:"string"}

#@markdown Output file name. Will be saved in ./output folder .
OUTPUT_FILE =  "demo.mp4" #@param {type:"string"}


In [None]:
opt = TestOptions()
opt.initialize()
opt.parser.add_argument('-f') ## dummy arg to avoid bug
opt = opt.parse()
opt.pic_a_path = TARGET_IMAGE ## or replace it with image from your own google drive
opt.video_path = SOURCE_VIDEO ## or replace it with video from your own google drive
opt.output_path = './output/' + OUTPUT_FILE
opt.temp_path = './tmp'
opt.Arc_path = './arcface_model/arcface_checkpoint.tar'
opt.isTrain = False
opt.use_mask = True  ## new feature up-to-date

crop_size = opt.crop_size

torch.nn.Module.dump_patches = True
model = create_model(opt)
model.eval()

app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))

with torch.no_grad():
    pic_a = opt.pic_a_path
    # img_a = Image.open(pic_a).convert('RGB')
    img_a_whole = cv2.imread(pic_a)
    img_a_align_crop, _ = app.get(img_a_whole,crop_size)
    img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB)) 
    img_a = transformer_Arcface(img_a_align_crop_pil)
    img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])

    # convert numpy to tensor
    img_id = img_id.cuda()

    #create latent id
    img_id_downsample = F.interpolate(img_id, size=(112,112))
    latend_id = model.netArc(img_id_downsample)
    latend_id = latend_id.detach().to('cpu')
    latend_id = latend_id/np.linalg.norm(latend_id,axis=1,keepdims=True)
    latend_id = latend_id.to('cuda')

    video_swap(opt.video_path, latend_id, model, app, opt.output_path, temp_results_dir=opt.temp_path, use_mask=opt.use_mask)

In [None]:
#@title Display video

from IPython.display import HTML
from base64 import b64encode
output_file = './output/' + OUTPUT_FILE
mp4 = open(output_file,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
#@title Download video
from google.colab import files
out_file = './output/' + OUTPUT_FILE
files.download(out_file) 

# 2. Real Time Voice Cloning

This part allows you to clone a voice by providing a voice sample and an output text.

Code: https://github.com/CorentinJ/Real-Time-Voice-Cloning

Paper: https://arxiv.org/pdf/1806.04558.pdf

In [None]:
#@title Installation

import os
from os.path import exists, join, basename, splitext
from scipy.io.wavfile import write

git_repo_url = 'https://github.com/CorentinJ/Real-Time-Voice-Cloning.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # go to root
  %cd ../content/
  # clone and install
  !git clone -q --recursive {git_repo_url}
  # install dependencies
  !cd {project_name} && pip install -q -r requirements.txt
  !pip install -q --upgrade gdown
  !apt-get install -qq libportaudio2
  !pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

  # download pretrained model
  #!cd {project_name} && wget https://github.com/blue-fish/Real-Time-Voice-Cloning/releases/download/v1.0/pretrained.zip && unzip -o pretrained.zip
  !cd {project_name} && mkdir -p output
  !cd {project_name} && mkdir -p saved_models/default/ 
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu

import sys
sys.path.append(project_name)

from IPython.display import display, Audio, clear_output
from IPython.utils import io
import ipywidgets as widgets
import numpy as np
from dl_colab_notebooks.audio import record_audio, upload_audio

from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder import inference as vocoder
from pathlib import Path

!ls 
encoder.load_model(project_name / Path("saved_models/default/encoder.pt"))
synthesizer = Synthesizer(project_name / Path("saved_models/default/synthesizer.pt"))
vocoder.load_model(project_name / Path("saved_models/default/vocoder.pt"))

In [None]:
#@title Record or Upload
#@markdown * Either record audio from microphone or upload audio from file (.mp3 or .wav) 

SAMPLE_RATE = 22050
record_or_upload = "Upload (.mp3 or .wav)" #@param ["Record", "Upload (.mp3 or .wav)"]
record_seconds =   10#@param {type:"number", min:1, max:10, step:1}

embedding = None
def _compute_embedding(audio):
  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
  global embedding
  embedding = None
  embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))
def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)
def _upload_audio(b):
  clear_output()
  audio = upload_audio(sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)

if record_or_upload == "Record":
  button = widgets.Button(description="Record Your Voice")
  button.on_click(_record_audio)
  display(button)
else:
  #button = widgets.Button(description="Upload Voice File")
  #button.on_click(_upload_audio)
  _upload_audio("")

In [None]:
#@title Synthesize text { run: "auto" }

%cd /content/Real-Time-Voice-Cloning
!pwd

#Path of output audio file. Make sure that its .wav file
OUTPUT_AUDIO = "./output/generatedAudio.wav" #@param {type:"string"}

TEXT = "Hey, this is me saying something completely different" #@param {type:"string"}

def synthesize(embed, text):
  
  print("Synthesizing new audio...")
  #with io.capture_output() as captured:
  global generated_wav
  specs = synthesizer.synthesize_spectrograms([text], [embed])
  generated_wav = vocoder.infer_waveform(specs[0])
  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
  clear_output()
  display(Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True))
  write(OUTPUT_AUDIO, synthesizer.sample_rate, generated_wav)
if embedding is None:
  print("first record a voice or upload a voice file!")
else:
  synthesize(embedding, TEXT)


In [None]:
#@title Download audio
from google.colab import files
files.download(OUTPUT_AUDIO) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 3. Wav2Lip
 
 This code lets you lipsync your videos based on the provided audio file.
 
 Code: https://github.com/Rudrabha/Wav2Lip

 Paper: https://arxiv.org/abs/2008.10010

In [None]:
#@title Installation

%cd ../content/
!pwd
!git clone https://github.com/zabique/Wav2Lip
!cd Wav2Lip

#download the pretrained model
!wget 'https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA' -O '/content/Wav2Lip/checkpoints/wav2lip_gan.pth'
!wget 'https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?e=TBFBVW' -O '/content/Wav2Lip/checkpoints/wav2lip.pth'
!pip install https://raw.githubusercontent.com/AwaleSajil/ghc/master/ghc-1.0-py3-none-any.whl

# !pip uninstall tensorflow tensorflow-gpu
!cd Wav2Lip && pip install -r requirements.txt

#download pretrained model for face detection
!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "/content/Wav2Lip/face_detection/detection/sfd/s3fd.pth"

!pip install -q youtube-dl
!pip install ffmpeg-python
from IPython.display import clear_output 
clear_output()
print("\nDone")

In [None]:
#@title Inference

#@markdown Path of the audio source.
SOURCE_AUDIO = "/content/SimSwap/demo_file/voice.wav" #@param {type:"string"}

#@markdown Path of the video source
TARGET_VIDEO = "/content/SimSwap/output/demo.mp4" #@param {type:"string"}

#@markdown Path of the output video
OUTPUT_VIDEO = "/content/SimSwap/output/demoWav2Lip.mp4" #@param {type:"string"}



In [None]:
#@title Create Wav2Lip video

# Using wav2lip_gan.pth GAN
!cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face {TARGET_VIDEO} --audio {SOURCE_AUDIO} --outfile {OUTPUT_VIDEO}

In [None]:
#@title Display video

from IPython.display import HTML
from base64 import b64encode
mp4 = open(OUTPUT_VIDEO,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
#@title Download video

from google.colab import files
files.download(OUTPUT_VIDEO) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>