<a href="https://colab.research.google.com/github/karthik18-lgtm/Lip_sync_models/blob/main/SadTalker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================================================================================
# THE FINAL, ALL-IN-ONE SETUP SCRIPT FOR SADTALKER ON MODERN COLAB
# =========================================================================================

# 1. Apply asyncio patch for Gradio in Colab
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

# 2. Clone the SadTalker repository and enter it
!git clone https://github.com/OpenTalker/SadTalker.git
%cd SadTalker

# 3. Install a modern, working set of all required libraries
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install basicsr gfpgan gradio dlib-bin face-alignment kornia yacs uvicorn==0.19.0

# 4. Apply all necessary code patches to fix incompatibilities

# PATCH 1 (NEW): Fixes the 'torchvision.transforms.functional_tensor' error inside the 'basicsr' library.
!sed -i "s/from torchvision.transforms.functional_tensor import rgb_to_grayscale/from torchvision.transforms.functional import rgb_to_grayscale/g" /usr/local/lib/python3.12/dist-packages/basicsr/data/degradations.py

# PATCH 2: Fixes the 'np.VisibleDeprecationWarning' error inside the SadTalker code.
!sed -i "s/np.VisibleDeprecationWarning/DeprecationWarning/g" /content/SadTalker/src/face3d/util/preprocess.py

# PATCH 3: Fixes the deprecated 'np.float' error in SadTalker's code.
!sed -i "s/np.float/float/g" /content/SadTalker/src/face3d/util/my_awing_arch.py

# PATCH 4: Fixes the ValueError by ensuring the transform variable is flattened.
!sed -i '101i \    t = t.flatten()' /content/SadTalker/src/face3d/util/preprocess.py


print("\n\n✅✅✅ Final Setup Complete. All libraries are installed and patched. ✅✅✅\n")

Cloning into 'SadTalker'...
remote: Enumerating objects: 1605, done.[K
remote: Counting objects: 100% (545/545), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 1605 (delta 470), reused 445 (delta 445), pack-reused 1060 (from 1)[K
Receiving objects: 100% (1605/1605), 92.20 MiB | 40.17 MiB/s, done.
Resolving deltas: 100% (882/882), done.
/content/SadTalker
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting basicsr
  Downloading basicsr-1.4.2.tar.gz (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.5/172.5 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gfpgan
  Downloading gfpgan-1.3.8-py3-none-any.whl.metadata (12 kB)
Collecting dlib-bin
  Downloading dlib_bin-20.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.2 kB)
Collecting face-alignment
  Downloading face_alignment-1.4.1-py2.py3-none-any.whl.metadata (7.4 kB)
Co

In [None]:
# Run the provided script to download all required model checkpoints
!bash scripts/download_models.sh

--2025-10-20 11:06:35--  https://github.com/OpenTalker/SadTalker/releases/download/v0.0.2-rc/mapping_00109-model.pth.tar
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/569518584/ccc415aa-c6f4-47ee-8250-b10bf440ba62?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-10-20T11%3A42%3A02Z&rscd=attachment%3B+filename%3Dmapping_00109-model.pth.tar&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-10-20T10%3A41%3A28Z&ske=2025-10-20T11%3A42%3A02Z&sks=b&skv=2018-11-09&sig=3JGqRYNMZHD%2BAUXbLXtzLh1%2B7qPnKrEhBXYHv12wZ%2Fs%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2MDk2MTk5NSwibmJmIjoxNzYwOTU4Mzk1LCJwYXRoIjoi

In [None]:
'''import gradio as gr
import subprocess
import os
import glob

# --- Main Function to Run SadTalker (with Enhanced Debugging) ---
def run_sadtalker(source_image, driven_audio):
    """
    This function takes an image and audio file, runs the SadTalker inference script,
    and returns the path to the generated video.

    This version has been updated to capture and print detailed error messages.
    """
    if source_image is None or driven_audio is None:
        return None

    command = [
        'python',
        'inference.py',
        '--driven_audio', driven_audio,
        '--source_image', source_image,
        '--result_dir', './results'
    ]

    print("Executing SadTalker command...")
    try:
        # UPDATED: We now capture the output to see any errors from the script
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        print("Script STDOUT:", result.stdout)

    except subprocess.CalledProcessError as e:
        # This is the crucial part: print the actual error from the script
        print("\n--- ERROR ---")
        print("The SadTalker script failed to run.")
        print("Error message from the script (STDERR):")
        print(e.stderr) # This will show the detailed traceback
        print("-------------")
        return None # Return None if the script fails

    print("SadTalker execution finished successfully.")

    result_files = glob.glob('./results/*.mp4')
    if not result_files:
        print("Error: No video file was generated even though the script ran.")
        return None

    latest_file = max(result_files, key=os.path.getctime)
    print(f"Generated video path: {latest_file}")

    return latest_file


# --- Gradio Interface Construction ---
with gr.Blocks(title="SadTalker UI") as demo:
    gr.Markdown("# 😎 SadTalker Demo")
    gr.Markdown("Upload a source image and an audio file to generate a talking head video.")

    with gr.Row():
        image_input = gr.Image(label="Source Image (Face)", type="filepath")
        audio_input = gr.Audio(label="Driving Audio (WAV/MP3)", type="filepath")

    submit_btn = gr.Button("Generate Video", variant="primary")
    video_output = gr.Video(label="Resulting Video")

    submit_btn.click(
        fn=run_sadtalker,
        inputs=[image_input, audio_input],
        outputs=[video_output]
    )

# Launch the Gradio app
demo.launch(share=True, debug=True)'''

'import gradio as gr\nimport subprocess\nimport os\nimport glob\n\n# --- Main Function to Run SadTalker (with Enhanced Debugging) ---\ndef run_sadtalker(source_image, driven_audio):\n    """\n    This function takes an image and audio file, runs the SadTalker inference script,\n    and returns the path to the generated video.\n    \n    This version has been updated to capture and print detailed error messages.\n    """\n    if source_image is None or driven_audio is None:\n        return None\n\n    command = [\n        \'python\',\n        \'inference.py\',\n        \'--driven_audio\', driven_audio,\n        \'--source_image\', source_image,\n        \'--result_dir\', \'./results\'\n    ]\n\n    print("Executing SadTalker command...")\n    try:\n        # UPDATED: We now capture the output to see any errors from the script\n        result = subprocess.run(command, check=True, capture_output=True, text=True)\n        print("Script STDOUT:", result.stdout)\n\n    except subprocess.Call

In [None]:
%%writefile app.py

import gradio as gr
import subprocess
import os
import glob
import nest_asyncio

# Apply the patch
nest_asyncio.apply()

# Change the working directory to SadTalker
# This is crucial for the script to find all necessary files
try:
    os.chdir('/content/SadTalker')
    print("Successfully changed directory to /content/SadTalker")
except FileNotFoundError:
    print("Error: /content/SadTalker directory not found. Please make sure you ran the setup cells.")
    exit()

# --- Main Function to Run SadTalker ---
def run_sadtalker(source_image, driven_audio):
    if source_image is None or driven_audio is None:
        return None

    command = [
        'python',
        'inference.py',
        '--driven_audio', driven_audio,
        '--source_image', source_image,
        '--result_dir', './results'
    ]

    print("Executing SadTalker command...")
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        print("Script STDOUT:", result.stdout)
    except subprocess.CalledProcessError as e:
        print("\n--- ERROR ---")
        print("The SadTalker script failed to run.")
        print("Error message from the script (STDERR):")
        print(e.stderr)
        print("-------------")
        return None

    print("SadTalker execution finished successfully.")
    result_files = glob.glob('./results/*.mp4')
    if not result_files:
        print("Error: No video file was generated.")
        return None

    latest_file = max(result_files, key=os.path.getctime)
    print(f"Generated video path: {latest_file}")
    return latest_file

# --- Gradio Interface Construction ---
with gr.Blocks(title="SadTalker UI") as demo:
    gr.Markdown("# 😎 SadTalker Demo (Standalone Script)")
    gr.Markdown("Upload a source image and an audio file to generate a talking head video.")

    with gr.Row():
        image_input = gr.Image(label="Source Image (Face)", type="filepath")
        audio_input = gr.Audio(label="Driving Audio (WAV/MP3)", type="filepath")

    submit_btn = gr.Button("Generate Video", variant="primary")
    video_output = gr.Video(label="Resulting Video")

    submit_btn.click(
        fn=run_sadtalker,
        inputs=[image_input, audio_input],
        outputs=[video_output]
    )

# Launch the Gradio app
# share=True is essential for accessing the link from Colab
print("Launching Gradio App...")
demo.launch(share=True)

Writing app.py


In [None]:
!python app.py

Successfully changed directory to /content/SadTalker
Launching Gradio App...
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://9491fd06aeb89ef100.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Executing SadTalker command...
Script STDOUT: using safetensor as default
3DMM Extraction for source image
The generated video is named ./results/2025_10_20_11.08.10/czNmcy1wcml2YXRlL3Jhd3BpeGVsX2ltYWdlcy93ZWJzaXRlX2NvbnRlbnQvbHIvNDc5LW1rLTk2OTAtam9iNTgzLmpwZw.jpg##1760778510297914154jo22mvr-voicemaker.in-speech.mp4
The generated video is named: ./results/2025_10_20_11.08.10.mp4

SadTalker execution finished successfully.
Generated video path: ./results/2025_10_20_11.08.10.mp4
Keyboard interruption in main thread... closing server.
Traceback (most recent call last):
  File "/usr/local/lib/python3

In [None]:
!pip install kornia yacs --upgrade basicsr gfpgan

