In [None]:
import subprocess
import sys
import os

#@title (1) Setup


# Check if required packages are installed, install if missing
packages = [
    ('yt-dlp', 'yt-dlp'),
    ('face_alignment', 'face-alignment==1.3.5'),
    ('imageio', 'imageio'),
    ('scikit-image', 'scikit-image'),
    ('scipy', 'scipy'),
    ('torch', 'torch'),
]

for import_name, pip_name in packages:
    try:
        __import__(import_name)
        print(f"✓ {import_name} already installed")
    except ImportError:
        print(f"Installing {pip_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])

# Create necessary directories
os.makedirs('./.cache/torch/hub/checkpoints', exist_ok=True)

# Clone first-order-model if not exists
if not os.path.exists('./first-order-model'):
    subprocess.run(['git', 'clone', '--depth', '1', 
                   'https://github.com/eyaler/first-order-model'], check=True)
    print("✓ first-order-model cloned")
else:
    print("✓ first-order-model already exists")

# Download model weights if not exists
weights_to_download = [
    ('vox-adv-cpk.pth.tar', 'https://openavatarify.s3.amazonaws.com/weights/vox-adv-cpk.pth.tar'),
]

for filename, url in weights_to_download:
    filepath = f'./first-order-model/{filename}'
    if not os.path.exists(filepath):
        print(f"Downloading {filename}...")
        subprocess.run(['wget', '--no-check-certificate', '-nc', url, '-O', filepath], check=True)
    else:
        print(f"✓ {filename} already exists")

print("\n✓ Setup complete! Ready to proceed.")

In [1]:
#@title (2) Get the Driver video and Avatar image from the web
#@markdown 1. You can change the URLs to your **own** stuff from most video platforms!
#@markdown 2. Alternatively, you can upload **local** files in the next cells (2a, 2b)

import subprocess
import urllib.request
import os

video_url = 'https://dai.ly/k7ppSzt4SHe27PCGrY6'
limit_video_height = "1080"
image_url = 'https://www.srugim.co.il/wp-content/uploads/2010/10/%D7%90%D7%97%D7%9E%D7%93-%D7%98%D7%99%D7%91%D7%99.jpg'

if video_url:
  if os.path.exists("./video.mp4"):
    os.remove("./video.mp4")
  if os.path.exists("./video"):
    os.remove("./video")

  subprocess.run([
    "yt-dlp", "--no-playlist", "-f",
    f"bestvideo[ext=mp4][vcodec!*=av01][height<={limit_video_height}]+bestaudio[ext=m4a]/mp4[height<={limit_video_height}][vcodec!*=av01]/mp4[vcodec!*=av01]/mp4",
    video_url, "--merge-output-format", "mp4", "-o", "./video.mp4"
  ])
  
  if os.path.exists("./video.mp4"):
    os.rename("./video.mp4", "./video")

if image_url:
  urllib.request.urlretrieve(image_url, "./image")

[dailymotion] Extracting URL: https://dai.ly/k7ppSzt4SHe27PCGrY6
[dailymotion] Downloading Access Token
[dailymotion] k7ppSzt4SHe27PCGrY6: Downloading media JSON metadata
[dailymotion] k7ppSzt4SHe27PCGrY6: Downloading metadata JSON
[dailymotion] k7ppSzt4SHe27PCGrY6: Downloading m3u8 information
[info] k7ppSzt4SHe27PCGrY6: Downloading 1 format(s): hls-720
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 3
[download] Destination: ./video.mp4
[download] 100% of    2.26MiB in 00:00:02 at 812.82KiB/s               
[FixupM3u8] Fixing MPEG-TS in MP4 container of "./video.mp4"


In [2]:
#@title (2a) Optionally upload local Driver video
#@markdown Instructions: select a video file from your local system
manually_upload_video = False #@param {type:"boolean"}
if manually_upload_video:
    import shutil
    from pathlib import Path

    # Open file dialog to select video
    from tkinter import Tk, filedialog
    root = Tk()
    root.withdraw()
    video_path = filedialog.askopenfilename(
        title="Select a video file",
        filetypes=[("Video files", "*.mp4 *.avi *.mov *.mkv"), ("All files", "*.*")]
    )
    
    if video_path:
        shutil.copy(video_path, './video')
        print(f"Video uploaded from: {video_path}")

In [3]:
#@title (2b) Optionally upload local Avatar image
#@markdown Instructions: mark the checkbox + run the cell, and select your image file from a dialog
manually_upload_image = False #@param {type:"boolean"}
if manually_upload_image:
    import shutil
    from pathlib import Path

    # Open file dialog to select image
    from tkinter import Tk, filedialog
    root = Tk()
    root.withdraw()
    image_path = filedialog.askopenfilename(
        title="Select an image file",
        filetypes=[("Image files", "*.jpg *.jpeg *.png *.bmp"), ("All files", "*.*")]
    )

    if image_path:
        shutil.copy(image_path, './image')
        print(f"Image uploaded from: {image_path}")

In [4]:
#@title (3) Optionally (but recommended) shorten Driver video
import shutil
import subprocess
from pathlib import Path

start_seconds = 0 #@param {type:"number"}
duration_seconds = 60 #@param {type:"number"}
start_seconds = max(start_seconds, 0)
duration_seconds = max(duration_seconds, 0)
#@markdown (use duration_seconds = 0 for unrestricted duration)

video_path = Path("./video")
full_video_path = Path("./full_video")

if start_seconds or duration_seconds:
    # Move the video file to full_video to preserve original
    if video_path.exists():
        shutil.move(str(video_path), str(full_video_path))
    else:
        print("Video file not found at ./video")
        full_video_path = None

    if full_video_path and full_video_path.exists():
        # Prepare ffmpeg command
        ffmpeg_cmd = [
            "ffmpeg",
            "-ss", str(start_seconds),
            "-t", str(duration_seconds),
            "-i", str(full_video_path),
            "-f", "mp4",
            str(video_path),
            "-y"
        ]
        try:
            subprocess.run(ffmpeg_cmd, check=True)
            print(f"Video shortened and saved to {video_path}")
        except subprocess.CalledProcessError as e:
            print("Error occurred during video processing:", e)
    else:
        print("Cannot process video as the source file does not exist.")
else:
    print('Using full video.')

ffmpeg version n8.0.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with gcc 15.2.1 (GCC) 20251112
  configuration: --prefix=/usr --disable-debug --disable-static --disable-stripping --enable-amf --enable-avisynth --enable-cuda-llvm --enable-lto --enable-fontconfig --enable-frei0r --enable-gmp --enable-gnutls --enable-gpl --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libdav1d --enable-libdrm --enable-libdvdnav --enable-libdvdread --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgsm --enable-libharfbuzz --enable-libiec61883 --enable-libjack --enable-libjxl --enable-libmodplug --enable-libmp3lame --enable-libopencore_amrnb --enable-libopencore_amrwb --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libplacebo --enable-libpulse --enable-librav1e --enable-librsvg --enable-librubberband --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libsvtav1 --enab

Video shortened and saved to video


[out#0/mp4 @ 0x5642689ba880] video:495KiB audio:150KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 1.973336%
frame=  290 fps=0.0 q=-1.0 Lsize=     659KiB time=00:00:09.59 bitrate= 562.1kbits/s speed=14.8x elapsed=0:00:00.64    
[libx264 @ 0x56426897b180] frame I:2     Avg QP:17.32  size: 19894
[libx264 @ 0x56426897b180] frame P:74    Avg QP:20.64  size:  4247
[libx264 @ 0x56426897b180] frame B:214   Avg QP:25.58  size:   713
[libx264 @ 0x56426897b180] consecutive B-frames:  1.0%  1.4%  1.0% 96.6%
[libx264 @ 0x56426897b180] mb I  I16..4: 24.3% 65.5% 10.2%
[libx264 @ 0x56426897b180] mb P  I16..4:  2.1%  5.6%  0.2%  P16..4: 30.7%  7.4%  2.9%  0.0%  0.0%    skip:51.2%
[libx264 @ 0x56426897b180] mb B  I16..4:  0.1%  0.2%  0.0%  B16..8: 23.2%  0.6%  0.1%  direct: 0.1%  skip:75.8%  L0:45.7% L1:53.2% BI: 1.1%
[libx264 @ 0x56426897b180] 8x8 transform intra:69.3% inter:90.8%
[libx264 @ 0x56426897b180] coded y,uvDC,uvAC intra: 51.6% 62.7% 11.9% inter: 3.1% 3.4% 0.0%
[lib

In [6]:
# (4) Prepare assets
# If you run out of RAM, it may mean your video is too large. 
# You can shorten it above (3), or try to use a video of smaller resolution.

# ---- PARAMETERS ----
# Set to True to attempt to center/crop to the face using face alignment.
center_video_to_head = True
crop_video_to_head = True
video_crop_expansion_factor = 2.5
center_image_to_head = True
crop_image_to_head = False
image_crop_expansion_factor = 2.5
video_crop_expansion_factor = max(video_crop_expansion_factor, 1)
image_crop_expansion_factor = max(image_crop_expansion_factor, 1)

# ---- FILEPATHS ----
# Change these filepaths to your local files as appropriate.
# Example usage: place your files into the project folder as 'image.jpg' and 'video.mp4'
SOURCE_IMAGE_PATH = 'image.jpg'
DRIVING_VIDEO_PATH = 'video.mp4'
OUT_VIDEO_PATH = 'input_concat.mp4'

import imageio
import numpy as np
from skimage import img_as_ubyte
from skimage.transform import resize
import warnings
warnings.filterwarnings("ignore")
import face_alignment
import torch
import os
import sys

# Display utility for desktop: uses OpenCV if available, else prints path
def show_video_file(video_path):
    try:
        import cv2
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Cannot open file {video_path}")
            return
        print("Press 'q' in the video window to quit.")
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            cv2.imshow('Output Video Preview', frame)
            if cv2.waitKey(int(1000 // fps)) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()
    except ImportError:
        print(f"Video saved at '{video_path}' (Install OpenCV for preview window.)")

if not hasattr(face_alignment.utils, '_original_transform'):
    face_alignment.utils._original_transform = face_alignment.utils.transform

def patched_transform(point, center, scale, resolution, invert=False):
    return face_alignment.utils._original_transform(
        point, center, torch.tensor(scale, dtype=torch.float32), torch.tensor(resolution, dtype=torch.float32), invert)

face_alignment.utils.transform = patched_transform

# Use GPU if available, else fallback to CPU for FaceAlignment
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True, device=device_type)

def create_bounding_box(target_landmarks, expansion_factor=1):
    target_landmarks = np.array(target_landmarks)
    x_y_min = target_landmarks.reshape(-1, 68, 2).min(axis=1)
    x_y_max = target_landmarks.reshape(-1, 68, 2).max(axis=1)
    expansion_factor = (expansion_factor-1)/2
    bb_expansion_x = (x_y_max[:, 0] - x_y_min[:, 0]) * expansion_factor
    bb_expansion_y = (x_y_max[:, 1] - x_y_min[:, 1]) * expansion_factor
    x_y_min[:, 0] -= bb_expansion_x
    x_y_max[:, 0] += bb_expansion_x
    x_y_min[:, 1] -= bb_expansion_y
    x_y_max[:, 1] += bb_expansion_y
    return np.hstack((x_y_min, x_y_max-x_y_min))

def fix_dims(im):
    if im.ndim == 2:
        im = np.tile(im[..., None], [1, 1, 3])
    return im[...,:3]

def get_crop(im, center_face=True, crop_face=True, expansion_factor=1, landmarks=None):
    im = fix_dims(im)
    if (center_face or crop_face) and not landmarks:
        landmarks = fa.get_landmarks_from_image(im)
    if (center_face or crop_face) and landmarks:
        rects = create_bounding_box(landmarks, expansion_factor=expansion_factor)
        x0,y0,w,h = sorted(rects, key=lambda x: x[2]*x[3])[-1]
        if crop_face:
            s = max(h, w)
            x0 += (w-s)//2
            x1 = x0 + s
            y0 += (h-s)//2
            y1 = y0 + s
        else:
            img_h,img_w = im.shape[:2]
            img_s = min(img_h,img_w)
            x0 = min(max(0, x0+(w-img_s)//2), img_w-img_s)
            x1 = x0 + img_s
            y0 = min(max(0, y0+(h-img_s)//2), img_h-img_s)
            y1 = y0 + img_s
    else:
        h,w = im.shape[:2]
        s = min(h,w)
        x0 = (w-s)//2
        x1 = x0 + s
        y0 = (h-s)//2
        y1 = y0 + s
    return int(x0),int(x1),int(y0),int(y1)

def pad_crop_resize(im, x0=None, x1=None, y0=None, y1=None, new_h=256, new_w=256):
    im = fix_dims(im)
    h,w = im.shape[:2]
    if x0 is None:
      x0 = 0
    if x1 is None:
      x1 = w
    if y0 is None:
      y0 = 0
    if y1 is None:
      y1 = h
    if x0<0 or x1>w or y0<0 or y1>h:
        im = np.pad(im, pad_width=[(max(-y0,0),max(y1-h,0)),(max(-x0,0),max(x1-w,0)),(0,0)], mode='edge')
    return resize(im[max(y0,0):y1-min(y0,0),max(x0,0):x1-min(x0,0)], (new_h, new_w))

# Load source image
if not os.path.exists(SOURCE_IMAGE_PATH):
    print(f"Source image file not found: {SOURCE_IMAGE_PATH}")
    sys.exit(1)
source_image = imageio.imread(SOURCE_IMAGE_PATH)
source_image = pad_crop_resize(source_image, *get_crop(source_image, center_face=center_image_to_head, crop_face=crop_image_to_head, expansion_factor=image_crop_expansion_factor))

# Load driving video
if not os.path.exists(DRIVING_VIDEO_PATH):
    print(f"Driving video file not found: {DRIVING_VIDEO_PATH}")
    sys.exit(1)

with imageio.get_reader(DRIVING_VIDEO_PATH, format='mp4') as reader:
    fps = reader.get_meta_data()['fps']
    driving_video = []
    landmarks = None
    i = 0
    try:
        for i, im in enumerate(reader):
            if not crop_video_to_head:
                break
            landmarks = fa.get_landmarks_from_image(im)
            if landmarks:
                break
        x0, x1, y0, y1 = get_crop(im, center_face=center_video_to_head, crop_face=crop_video_to_head, expansion_factor=video_crop_expansion_factor, landmarks=landmarks)
        reader.set_image_index(0)
        for im in reader:
            driving_video.append(pad_crop_resize(im, x0, x1, y0, y1))
    except RuntimeError:
        pass

def vid_display(path, source, driving, generated=None):
    assert len(driving) > 0
    output_frames = [img_as_ubyte(np.hstack([source, driving[i]] + ([generated[i]] if generated else []))) for i in range(len(driving))]
    imageio.mimwrite(path, output_frames, fps=fps)
    print(f"Output video saved at '{path}'.")
    show_video_file(path)

if landmarks:
    print(f'First found head in frame {i}')
else:
    print('No face landmarks found in driving video (first segment). You may want to check your video or settings.')

vid_display(OUT_VIDEO_PATH, source_image, driving_video)

RuntimeError: unexpected EOF, expected 998062 more bytes. The file might be corrupted.

In [None]:
# (5) Find best alignment

import os

# Set working directory to local first-order-model directory if needed
first_order_model_path = os.path.abspath("first-order-model")
if os.path.exists(first_order_model_path):
    os.chdir(first_order_model_path)
else:
    raise FileNotFoundError("first-order-model folder not found in the current directory.")

from demo import load_checkpoints
generator, kp_detector = load_checkpoints(
    config_path=os.path.join(first_order_model_path, 'config/vox-adv-256.yaml'),
    checkpoint_path=os.path.join(first_order_model_path, 'vox-adv-cpk.pth.tar')
)

from scipy.spatial import ConvexHull
def normalize_kp(kps):
    max_area = 0
    max_kp = None
    for kp in kps:
        kp = kp - kp.mean(axis=0, keepdims=True)
        area = ConvexHull(kp[:, :2]).volume
        area = np.sqrt(area)
        kp[:, :2] = kp[:, :2] / area
        if area > max_area:
            max_area = area
            max_kp = kp
    return max_kp

from tqdm import tqdm
import numpy as np

kp_source = fa.get_landmarks_from_image(255 * source_image)
if kp_source:
    norm_kp_source = normalize_kp(kp_source)

norm = float('inf')
best = 0
best_kp_driving = None
for i, image in tqdm(enumerate(driving_video)):
    kp_driving = fa.get_landmarks_from_image(255 * image)
    if kp_driving:
        norm_kp_driving = normalize_kp(kp_driving)
        if kp_source:
            new_norm = (np.abs(norm_kp_source - norm_kp_driving) ** 2).sum()
            if new_norm < norm:
                norm = new_norm
                best = i
                best_kp_driving = kp_driving
        else:
            best_kp_driving = kp_driving
            break

from logger import Visualizer

vis = Visualizer(kp_size=3, colormap='gist_rainbow')
source_with_kp = vis.draw_image_with_kp(
    source_image,
    kp_source[0]*2/np.array(source_image.shape[:2][::-1])[np.newaxis] - 1
) if kp_source else source_image
driving_with_kp = vis.draw_image_with_kp(
    driving_video[best],
    best_kp_driving[0]*2/np.array(driving_video[best].shape[:2][::-1])[np.newaxis] - 1
) if best_kp_driving else driving_video[best]

print('\nbest frame=%d' % best)

# Visualize using OpenCV (cv2.imshow), which works locally
import cv2

show_img = (np.hstack([source_with_kp, driving_with_kp])[..., ::-1] * 255).astype(np.uint8)
cv2.imshow("Source with Keypoints (left) / Driving with Keypoints (right)", show_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# (6) Animate

exaggerate_factor = 1  # You can adjust this between 0.1 and 5
adapt_movement_scale = True
use_relative_movement = True
use_relative_jacobian = True

import torch
import os
import imageio
from skimage.img_as_ubyte import img_as_ubyte
from scipy.spatial import ConvexHull

def full_normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False,
                      use_relative_movement=False, use_relative_jacobian=False, exaggerate_factor=1):
    if adapt_movement_scale:
        source_area = ConvexHull(kp_source['value'][0].data.cpu().numpy()).volume
        driving_area = ConvexHull(kp_driving_initial['value'][0].data.cpu().numpy()).volume
        adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area)
    else:
        adapt_movement_scale = 1

    kp_new = {k: v for k, v in kp_driving.items()}

    if use_relative_movement:
        kp_value_diff = (kp_driving['value'] - kp_driving_initial['value'])
        kp_value_diff *= adapt_movement_scale * exaggerate_factor
        kp_new['value'] = kp_value_diff + kp_source['value']

        if use_relative_jacobian:
            jacobian_diff = torch.matmul(kp_driving['jacobian'], torch.inverse(kp_driving_initial['jacobian']))
            kp_new['jacobian'] = torch.matmul(jacobian_diff, kp_source['jacobian'])

    return kp_new

def make_animation(source_image, driving_video, generator, kp_detector, adapt_movement_scale=False,
                   use_relative_movement=False, use_relative_jacobian=False, cpu=False, exaggerate_factor=1):
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = full_normalize_kp(
                kp_source=kp_source, kp_driving=kp_driving,
                kp_driving_initial=kp_driving_initial, adapt_movement_scale=adapt_movement_scale,
                use_relative_movement=use_relative_movement, use_relative_jacobian=use_relative_jacobian,
                exaggerate_factor=exaggerate_factor
            )
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
    return predictions

# Set local working directory for outputs
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)

# Animations
predictions_forward = make_animation(
    source_image, driving_video[best:], generator, kp_detector, 
    adapt_movement_scale=adapt_movement_scale, 
    use_relative_movement=use_relative_movement,
    use_relative_jacobian=use_relative_jacobian, exaggerate_factor=exaggerate_factor
)
predictions_backward = make_animation(
    source_image, driving_video[:(best+1)][::-1], generator, kp_detector, 
    adapt_movement_scale=adapt_movement_scale, 
    use_relative_movement=use_relative_movement,
    use_relative_jacobian=use_relative_jacobian, exaggerate_factor=exaggerate_factor
)

# Write video
full_frames = predictions_backward[::-1] + predictions_forward[1:]
out_mp4 = os.path.join(output_dir, "generated.mp4")
imageio.mimwrite(out_mp4, [img_as_ubyte(frame) for frame in full_frames], fps=fps)

# If you want to mix audio and/or re-encode:
import subprocess

input_video_path = out_mp4  # Generated video
original_audio_path = "video.mp4"  # You should provide this path, usually your input video
final_output = os.path.join(output_dir, "final.mp4")

# You must set original_audio_path to the correct path of the original video that has audio
ffmpeg_cmd = [
    "ffmpeg",
    "-i", input_video_path,
    "-i", original_audio_path,
    "-c:v", "libx264",
    "-c:a", "aac",
    "-map", "0:v",
    "-map", "1:a?",
    "-pix_fmt", "yuv420p",
    final_output,
    "-y"
]
subprocess.run(ffmpeg_cmd, check=True)

print(f"Video saved at {final_output}")

# Video display (OpenCV window)
import cv2

for frame in full_frames:
    frame_bgr = (frame[..., ::-1] * 255).astype(np.uint8)
    cv2.imshow("Animation", frame_bgr)
    if cv2.waitKey(int(1000//fps)) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()


In [None]:
# (7) Save and Notify
# This cell informs the user where the generated video is saved locally.

output_path = os.path.abspath(final_output) if 'final_output' in locals() else os.path.abspath('final.mp4')
print(f"\nYour video is saved at: {output_path}")
print("Please open this file using your preferred video player.")

In [None]:
# (8) Optionally apply Wav2Lip post processing (local version, not Colab)

import os
import sys
import subprocess
import shutil

wav2lip_post_processing = True  # Set to False to skip post-processing
smooth_face_detection = True    # Set to False if you don't want smoothing

if wav2lip_post_processing:
    # Install required packages if missing
    try:
        import librosa
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "librosa==0.9.2"])
    try:
        import gdown
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])

    # Set up paths
    work_dir = os.path.abspath("./Wav2Lip_work")
    wav2lip_dir = os.path.join(work_dir, "Wav2Lip")
    checkpoints_dir = os.path.join(wav2lip_dir, "checkpoints")
    fd_sfd_dir = os.path.join(wav2lip_dir, "face_detection", "detection", "sfd")
    os.makedirs(checkpoints_dir, exist_ok=True)
    os.makedirs(fd_sfd_dir, exist_ok=True)

    final_mp4_src = os.path.abspath("final.mp4")
    final_wav2lip_out = os.path.join(work_dir, "final_wav2lip.mp4")
    faulty_frame_jpg = os.path.join(wav2lip_dir, "temp", "faulty_frame.jpg")

    # Remove previous output if it exists
    if os.path.exists(final_wav2lip_out):
        os.remove(final_wav2lip_out)

    # Clone Wav2Lip repo if needed
    if not os.path.exists(wav2lip_dir):
        subprocess.check_call([
            "git", "clone", "--depth", "1",
            "https://github.com/eyaler/Wav2Lip.git",
            wav2lip_dir
        ])

    # Download wav2lip_gan.pth if needed
    wav2lip_gan_path = os.path.join(checkpoints_dir, "wav2lip_gan.pth")
    if not os.path.exists(wav2lip_gan_path):
        # Try google drive (gdown) first
        import gdown
        gdown.download(
            'https://drive.google.com/uc?id=1dwHujX7RVNCvdR1RR93z0FS2T2yzqup9',
            wav2lip_gan_path, quiet=False)
        # If download didn't succeed, try fallback hosting
        if not os.path.exists(wav2lip_gan_path):
            import urllib.request
            urllib.request.urlretrieve(
                "https://eyalgruss.com/fomm/wav2lip_gan.pth", wav2lip_gan_path)

    # Download s3fd face detector if needed
    s3fd_path = os.path.join(fd_sfd_dir, "s3fd.pth")
    if not os.path.exists(s3fd_path):
        import urllib.request
        s3fd_url = "https://github.com/clcarwin/sfd_pytorch/releases/download/v1.0/s3fd-619a316812.pth"
        urllib.request.urlretrieve(s3fd_url, s3fd_path)

    # Clear Wav2Lip/temp if exists
    temp_dir = os.path.join(wav2lip_dir, "temp")
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir, ignore_errors=True)

    # Build inference command
    nosmooth = [] if smooth_face_detection else ["--nosmooth"]
    inference_py = os.path.join(wav2lip_dir, "inference.py")
    args = [
        sys.executable, inference_py,
        "--checkpoint_path", wav2lip_gan_path,
        "--face", final_mp4_src,
        "--audio", final_mp4_src,
        "--pads", "0", "20", "0", "0",
        "--outfile", final_wav2lip_out
    ] + nosmooth

    # Run inference
    subprocess.check_call(args)

    # If faulty_frame.jpg exists, try fallback box inference
    if os.path.exists(faulty_frame_jpg):
        import cv2
        print('\nFace not detected - will use whole frame')
        video_stream = cv2.VideoCapture(final_mp4_src)
        still_reading, frame = video_stream.read()
        if not still_reading:
            raise RuntimeError("Can't read the frame from final.mp4 for bounding box calculation.")
        x1 = y1 = 0
        y2, x2 = frame.shape[:2]
        # assuming h is intended as height
        h = y2
        if x2 > h:
            x1 = (x2 - h) // 2
            x2 = x1 + y2
        args_box = [
            sys.executable, inference_py,
            "--checkpoint_path", wav2lip_gan_path,
            final_mp4_src,  # face
            "--audio", final_mp4_src,
            "--box", str(y1), str(y2), str(x1), str(x2),
            "--pads", "0", "20", "0", "0",
            "--outfile", final_wav2lip_out
        ]
        subprocess.check_call(args_box)

    print(f"Wav2Lip post-processing complete. Output: {final_wav2lip_out}")
    print("You can now open the resulting video file locally.")