In [None]:
#@title ##**Import necessary libraries and creste some helper functions**

from IPython.display import HTML, Audio, display
from base64 import b64encode
import cv2
import os
import moviepy.editor as mp

# Clear the sample_data directory
!rm -rf /content/sample_data
# Create a new sample_data directory
!mkdir /content/sample_data

def show_video(path):
    """Display a video from a given file path."""
    mp4 = open(path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    display(HTML("""
    <video width=700 controls>
                <source src="%s" type="video/mp4">
    </video>
    """ % data_url))

def get_video_resolution(video_path):
    """Get the resolution of a video from the provided file path."""
    video = cv2.VideoCapture(video_path)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    return (width, height)

def process_video(input_video_path):
    """
    Process a video, resize it to 720p if necessary, and display it.

    Args:
        input_video_path (str): File path to the input video.
    """
    PATH_TO_YOUR_VIDEO = input_video_path
    video_duration = mp.VideoFileClip(input_video_path).duration

    # Remove previous input video
    if os.path.isfile('/content/sample_data/input_vid.mp4'):
        os.remove('/content/sample_data/input_vid.mp4')

    if video_duration > 70:
        print("WARNING: Video duration exceeds 70 seconds. Please upload a shorter video.")
        return

    video_resolution = get_video_resolution(PATH_TO_YOUR_VIDEO)
    print(f"Video resolution: {video_resolution}")

    if video_resolution[0] >= 1920 or video_resolution[1] >= 1080:
        print("Resizing video to 720p...")
        os.system(f"ffmpeg -i {PATH_TO_YOUR_VIDEO} -vf scale=1280:720 -an /content/sample_data/input_vid.mp4")
        PATH_TO_YOUR_VIDEO = "/content/sample_data/input_vid.mp4"
        print("Video resized to 720p")
    else:
        print("No resizing needed")
        os.system(f"ffmpeg -i {PATH_TO_YOUR_VIDEO} -c copy -an /content/sample_data/input_vid.mp4")
        PATH_TO_YOUR_VIDEO = "/content/sample_data/input_vid.mp4"

    show_video(PATH_TO_YOUR_VIDEO)

Either input a video file with or without audio file but make sure to have an editied video file consisting of only the face of the speaker.

In [None]:
#@markdown ###*Enter the path for video and audio*
input_video_path = '10_pad_normal.mp4' #@param {type:"string"}
process_video(input_video_path)

# Consider only the first uploaded file
audio_path = "output10.wav" #@param {type:"string"}
# Use ffmpeg to copy the audio with the same quality and save it with the new filename
os.system(f"ffmpeg -i {audio_path} -acodec copy /content/sample_data/input_aud.wav")
PATH_TO_YOUR_AUDIO = "/content/sample_data/input_aud.wav"
# Display the uploaded audio
display(Audio(PATH_TO_YOUR_AUDIO))

* Here `use_hd_model` means we use this model [Wav2Lip + GAN](https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA?e=n9ljGW) which produces **Slightly inferior lip-sync, but better visual quality**<br>

* And if `use_hd_model` is not used then we use [Wav2Lip](https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?e=TBFBVW) model which produces
Highly accurate lip-sync

In [None]:
!git clone https://github.com/justinjohn0306/Wav2Lip

%cd /content/Wav2Lip

#download the pretrained model
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'
a = !pip install https://raw.githubusercontent.com/AwaleSajil/ghc/master/ghc-1.0-py3-none-any.whl
!pip install git+https://github.com/elliottzheng/batch-face.git@master

#@title **Start Crunching and Preview Output**
#@markdown <b>Note: Only change these, if you have to</b>

%cd /content/Wav2Lip

# Set up paths and variables for the output file
output_file_path = '/content/Wav2Lip/results/result_voice.mp4'

# Delete existing output file before processing, if any
if os.path.exists(output_file_path):
    os.remove(output_file_path)

pad_top =  0#@param {type:"integer"}
pad_bottom =  -5#@param {type:"integer"}
pad_left =  0#@param {type:"integer"}
pad_right =  0#@param {type:"integer"}
rescaleFactor =  1#@param {type:"integer"}
nosmooth = True #@param {type:"boolean"}
#@markdown ___
#@markdown Model selection:
use_hd_model = False #@param {type:"boolean"}
checkpoint_path = 'checkpoints/wav2lip.pth' if not use_hd_model else 'checkpoints/wav2lip_gan.pth'

if nosmooth == False:
  !python inference.py --checkpoint_path $checkpoint_path --face "../sample_data/input_vid.mp4" --audio "../sample_data/input_aud.wav" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor
else:
  !python inference.py --checkpoint_path $checkpoint_path --face "../sample_data/input_vid.mp4" --audio "../sample_data/input_aud.wav" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth

#Preview output video
if os.path.exists(output_file_path):
    clear_output()
    print("Final Video Preview")
    print("Download this video from", output_file_path)
    showVideo(output_file_path)
else:
    print("Processing failed. Output video not found.")


A good video was found when pad_bottom was in between the range (-10, -5) for both the model that we used.