In [13]:
import os
import subprocess
from PIL import Image

# Define base directory
base_path = "talking_face_preprocessing"
hdtf_dir = os.path.join(base_path, "HDTF")
dataset_dir = os.path.join(hdtf_dir, "HDTF_dataset")
raw_video_dir = os.path.join(base_path, "raw_videos")
clips_dir = os.path.join(base_path, "clips")
video_dir = os.path.join(base_path, "assets/video")

# HDTF dataset files
video_url_path = os.path.join(dataset_dir, "WDA_video_url.txt")
annotation_time_path = os.path.join(dataset_dir, "WDA_annotion_time.txt")  # Fixed typo from "annotion" to "annotation"

def get_video_annotations(video_name, annotation_file):
    annotations = {}
    try:
        with open(annotation_file, "r", encoding='utf-8-sig') as file:
            for line in file:
                parts = line.strip().split(" ")
                if len(parts) > 1:
                    video = parts[0]
                    times = parts[1:]
                    annotations[video] = times
                    if video.endswith('.mp4'):
                        annotations[video[:-4]] = times
    except FileNotFoundError:
        print(f"Annotation file {annotation_file} not found.")
        return []
    variations = [video_name, video_name + '.mp4', video_name.replace('.mp4', '')]
    for variation in variations:
        if variation in annotations:
            return annotations[variation]
    lower_video_name = video_name.lower()
    for key in annotations:
        if key.lower() == lower_video_name or key.lower() == lower_video_name + '.mp4':
            return annotations[key]
    return []

def process_hdtf_video(video_name, url):
    downloaded_video = os.path.join(raw_video_dir, f"{video_name}.mp4")
    if not os.path.exists(downloaded_video):
        print(f"Downloading HDTF video {video_name} from {url}")
        cmd = (
            f'yt-dlp '
            f'-f "bestvideo[height<=1080]+bestaudio/best" '
            f'-o "{downloaded_video}" '
            f'--merge-output-format mp4 "{url}"'
        )
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Failed to download {video_name}: {result.stderr}")
            return None

    if not os.path.exists(downloaded_video):
        print(f"Download failed for {video_name}")
        return None

    annotations = get_video_annotations(video_name, annotation_time_path)
    if not annotations:
        print(f"No annotations found for {video_name}. Skipping.")
        return None

    clip_outputs = []
    for idx, timestamp in enumerate(annotations):
        start, end = timestamp.split('-')
        clip_output = os.path.join(clips_dir, f"{video_name}_{idx}.mp4")
        if not os.path.exists(clip_output):
            cmd = (
                f'ffmpeg -i "{downloaded_video}" '
                f'-ss {start} -to {end} '
                f'-c copy "{clip_output}" -y'
            )
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
            if result.returncode != 0:
                print(f"Failed to extract clip: {result.stderr}")
                return None
        clip_outputs.append(clip_output)
    
    # Move clips to video_dir for further processing
    video_files = []
    for clip in clip_outputs:
        video_file = os.path.join(video_dir, os.path.basename(clip))
        os.rename(clip, video_file)
        video_files.append(os.path.basename(video_file))
    return video_files

def resize_images_to_256x256(directory):
    """Resize all images in the given directory to 256x256 while maintaining aspect ratio."""
    target_size = (256, 256)
    
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            file_path = os.path.join(directory, filename)
            try:
                img = Image.open(file_path)
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                
                orig_width, orig_height = img.size
                aspect_ratio = orig_width / orig_height
                
                if aspect_ratio > 1:
                    new_height = 256
                    new_width = int(new_height * aspect_ratio)
                else:
                    new_width = 256
                    new_height = int(new_width / aspect_ratio)
                
                img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                new_img = Image.new('RGB', target_size, (0, 0, 0))
                
                paste_x = (256 - new_width) // 2
                paste_y = (256 - new_height) // 2
                
                if new_width > 256 or new_height > 256:
                    left = (new_width - 256) // 2
                    top = (new_height - 256) // 2
                    right = left + 256
                    bottom = top + 256
                    img_resized = img_resized.crop((left, top, right, bottom))
                    new_img = img_resized
                else:
                    new_img.paste(img_resized, (paste_x, paste_y))
                
                new_img.save(file_path, quality=95)
                print(f"Resized {filename} to 256x256")
            except Exception as e:
                print(f"Error resizing {filename}: {e}")

def process_videos(video_files, base_dir):
    """Process downloaded videos with the specified pipeline."""
    video_dir = os.path.join(base_dir, "assets/video")
    audio_dir = os.path.join(base_dir, "processed_data/audio")
    frames_dir = os.path.join(base_dir, "processed_data/frames")
    faces_dir = os.path.join(base_dir, "processed_data/faces")
    landmarks_dir = os.path.join(base_dir, "processed_data/landmarks")
    
    # First command: Extract raw video data
    cmd1 = (
        f"python extract_raw_video_data.py "
        f"--source_folder {video_dir}/ "
        f"--audio_target_folder {audio_dir}/ "
        f"--frames_target_folder {frames_dir}/ "
        f"--extract_frames True"
    )
    subprocess.run(cmd1, shell=True, check=True)
    
    # Process each video
    for video_file in video_files:
        video_name = os.path.splitext(video_file)[0]  # Remove .mp4 extension
        
        # Create faces directory
        os.makedirs(faces_dir, exist_ok=True)
        
        # Second command: Extract cropped faces
        cmd2 = (
            f"python extract_cropped_faces.py "
            f"--from_dir_prefix {frames_dir}/{video_name} "
            f"--output_dir_prefix {faces_dir}/{video_name}/"
        )
        subprocess.run(cmd2, shell=True, check=True)
        
        # Resize faces to 256x256
        faces_video_dir = os.path.join(faces_dir, video_name)
        print(f"Resizing images in {faces_video_dir} to 256x256...")
        resize_images_to_256x256(faces_video_dir)
        
        # Create landmarks directory
        os.makedirs(landmarks_dir, exist_ok=True)
        
        # Third command: Extract frame landmarks
    cmd3 = (
        f"python extract_frame_landmarks.py "
        f"--from_dir {frames_dir} "
        f"--lmd_output_dir {landmarks_dir}/"
    )
    subprocess.run(cmd3, shell=True, check=True) 

def main(num_videos=1):
    # Read video URLs from HDTF dataset
    with open(video_url_path, 'r', encoding='utf-8-sig') as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]

    if not lines:
        print(f"No videos found in {video_url_path}")
        return

    if num_videos < 1:
        print(f"Invalid num_videos: {num_videos}. Must be at least 1.")
        return

    total_videos = len(lines)
    if num_videos > total_videos:
        print(f"Requested {num_videos} videos, but only {total_videos} available. Processing all available videos.")
        num_videos = total_videos

    print(f"Processing {num_videos} HDTF videos...")
    successful_videos = 0
    all_video_files = []

    for line_num in range(total_videos):
        if successful_videos >= num_videos:
            break
        parts = lines[line_num].split()
        if len(parts) >= 2:
            video_name, url = parts[0], parts[1]
            video_files = process_hdtf_video(video_name, url)
            if video_files:
                successful_videos += 1
                all_video_files.extend(video_files)
                print(f"Successfully processed video {successful_videos}/{num_videos}")
            else:
                print(f"Skipping failed video {video_name}, trying next...")

    if not all_video_files:
        print("No videos were successfully processed. Exiting.")
        return

    # Process all clips through the pipeline
    print("Processing videos through pipeline...")
    process_videos(all_video_files, base_path)
    print("Processing complete!")

if __name__ == "__main__":
    try:
        subprocess.run("yt-dlp --version", shell=True, check=True)
        subprocess.run("ffmpeg -version", shell=True, check=True)
    except subprocess.CalledProcessError:
        print("Please install the following dependencies:")
        print("  - yt-dlp: 'pip install yt-dlp'")
        print("  - ffmpeg: Install via your package manager (e.g., 'sudo apt install ffmpeg')")
        exit(1)

    num_videos_to_process = 1  # Change this to process more videos
    main(num_videos=num_videos_to_process)

2025.03.25
ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzim

ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena

Resizing images in talking_face_preprocessing/processed_data/faces/CarolynMaloney2_0 to 256x256...
Resized 000005.png to 256x256
Resized 000004.png to 256x256
Resized 000003.png to 256x256
Resized 000002.png to 256x256
Resized 000010.png to 256x256
Resized 000007.png to 256x256
Resized 000009.png to 256x256
Resized 000008.png to 256x256
Resized 000006.png to 256x256
Resized 000001.png to 256x256


Processing clips:   0%|          | 0/2 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s][A

✅ Found 10 images in talking_face_preprocessing/processed_data/frames/CarolynMaloney2_0



 20%|██        | 2/10 [00:00<00:00, 14.77it/s][A
 40%|████      | 4/10 [00:00<00:00, 15.87it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.75it/s][A
Processing clips:   0%|          | 0/2 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "/home/hrithik-raj/Project/talking_face_preprocessing/extract_frame_landmarks.py", line 98, in <module>
    main(args.from_dir, args.lmd_output_dir, args.skip_existing, args.check_and_padding)
  File "/home/hrithik-raj/Project/talking_face_preprocessing/extract_frame_landmarks.py", line 68, in main
    landmarks, bboxes = torchlm.runtime.forward(frame)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/hrithik-raj/myenv/lib/python3.12/site-packages/torchlm/runtime/_wrappers.py", line 120, in forward
    return RuntimeWrapper.forward(
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/hrithik-raj/myenv/lib/python3.12/site-packages/torchlm/runtime/_wrappers.py", line 50, in forward
    bboxes = cls.face_base.apply_detecting(ima

KeyboardInterrupt: 