In [13]:
import cv2
import numpy as np
import imageio
import os
import subprocess

def get_video_info(video_path):
    """获取视频信息"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"无法打开视频: {video_path}")
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return width, height, fps, frame_count

def make_even(number):
    """确保数字是偶数"""
    return number if number % 2 == 0 else number + 1

def check_has_audio(video_path):
    """检查视频是否有音频流"""
    try:
        cmd = [
            'ffprobe', '-v', 'error',
            '-select_streams', 'a:0',
            '-show_entries', 'stream=codec_type',
            '-of', 'default=nw=1:nk=1',
            video_path
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
        return 'audio' in result.stdout.lower()
    except:
        return False

def extract_audio(video_path, audio_path):
    """从视频中提取音频"""
    try:
        cmd = [
            'ffmpeg', '-y',
            '-i', video_path,
            '-vn',  # 不要视频
            '-acodec', 'aac',
            '-b:a', '192k',
            audio_path
        ]
        result = subprocess.run(cmd, capture_output=True, timeout=30)
        return result.returncode == 0 and os.path.exists(audio_path)
    except Exception as e:
        print(f"  ⚠ 音频提取失败: {e}")
        return False

def merge_video_audio(video_path, audio_path, output_path):
    """将音频合并到视频中"""
    try:
        cmd = [
            'ffmpeg', '-y',
            '-i', video_path,
            '-i', audio_path,
            '-c:v', 'copy',  # 复制视频流
            '-c:a', 'aac',
            '-b:a', '192k',
            '-shortest',  # 以最短的流为准
            output_path
        ]
        result = subprocess.run(cmd, capture_output=True, timeout=60)
        return result.returncode == 0
    except Exception as e:
        print(f"  ⚠ 音频合并失败: {e}")
        return False

def resize_and_pad_video(input_path, output_path, target_height, target_width):
    """处理单个视频：调整大小并添加白边，保留音频"""
    
    # 使用cv2读取视频信息
    cap = cv2.VideoCapture(input_path)
    
    if not cap.isOpened():
        raise ValueError(f"无法打开视频: {input_path}")
    
    # 获取原视频信息
    fps = cap.get(cv2.CAP_PROP_FPS)
    orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # 确保目标尺寸是偶数（libx264要求）
    target_height = make_even(target_height)
    target_width = make_even(target_width)
    
    # 计算缩放后的宽度（保持比例）
    scale = target_height / orig_height
    new_width = int(orig_width * scale)
    new_width = make_even(new_width)  # 确保是偶数
    
    # 计算白边偏移（居中）
    x_offset = (target_width - new_width) // 2
    
    print(f"\n{'='*60}")
    print(f"处理: {os.path.basename(input_path)}")
    print(f"原始尺寸: {orig_width}x{orig_height}")
    print(f"目标尺寸: {target_width}x{target_height} (已调整为偶数)")
    print(f"缩放后宽度: {new_width}px (白边: 左右各{x_offset}px)")
    print(f"帧率: {fps:.2f}fps, 总帧数: {total_frames}")
    
    # 检查是否有音频
    has_audio = check_has_audio(input_path)
    if has_audio:
        print("✓ 检测到音频，将保留音频")
    else:
        print("  无音频")
    
    print(f"{'='*60}")
    
    # 临时文件
    temp_video = output_path.replace('.mp4', '_temp_noaudio.mp4')
    temp_audio = output_path.replace('.mp4', '_temp_audio.aac')
    
    # 如果有音频，先提取
    audio_extracted = False
    if has_audio:
        print("正在提取音频...")
        audio_extracted = extract_audio(input_path, temp_audio)
        if audio_extracted:
            print("✓ 音频提取成功")
        else:
            print("⚠ 音频提取失败，将输出无音频视频")
    
    # 创建视频写入器（写入到临时文件或最终文件）
    output_file = temp_video if audio_extracted else output_path
    
    writer = imageio.get_writer(
        output_file,
        fps=fps,
        codec='libx264',
        quality=8,  # 质量 1-10，10最好
        pixelformat='yuv420p',
        macro_block_size=1
    )
    
    frame_count = 0
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # OpenCV读取的是BGR，转换为RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # 调整帧大小
            resized_frame = cv2.resize(frame_rgb, (new_width, target_height), 
                                      interpolation=cv2.INTER_LINEAR)
            
            # 创建白色背景
            white_bg = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
            
            # 将调整后的帧居中放置
            white_bg[:, x_offset:x_offset+new_width] = resized_frame
            
            # 写入帧
            writer.append_data(white_bg)
            frame_count += 1
            
            # 显示进度
            if frame_count % 30 == 0 or frame_count == total_frames:
                progress = (frame_count / total_frames) * 100 if total_frames > 0 else 0
                print(f"进度: {frame_count}/{total_frames} ({progress:.1f}%)", end='\r')
        
        print(f"\n✓ 视频处理完成！共处理 {frame_count} 帧")
        
    except Exception as e:
        print(f"\n✗ 写入视频时出错: {e}")
        raise
    finally:
        cap.release()
        writer.close()
    
    # 如果提取了音频，合并音频和视频
    if audio_extracted and os.path.exists(temp_audio) and os.path.exists(temp_video):
        print("正在合并音频...")
        if merge_video_audio(temp_video, temp_audio, output_path):
            print("✓ 音频合并成功")
            # 删除临时文件
            try:
                os.remove(temp_video)
                os.remove(temp_audio)
            except:
                pass
        else:
            print("⚠ 音频合并失败，使用无音频版本")
            if os.path.exists(temp_video):
                os.rename(temp_video, output_path)
            if os.path.exists(temp_audio):
                try:
                    os.remove(temp_audio)
                except:
                    pass
    
    print(f"✓ 输出文件: {output_path}\n")

def process_two_videos(video1_path, video2_path, output1_path, output2_path):
    """处理两个视频，使其高度一致，宽度不足补白边"""
    
    print("\n" + "="*60)
    print("视频处理工具 - 统一高度并添加白边 (保留音频)")
    print("="*60)
    print("注意: 输出尺寸会调整为偶数以符合视频编码要求")
    
    # 检查ffmpeg
    try:
        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True, timeout=5)
        print("✓ 检测到 ffmpeg，可以保留音频")
    except:
        print("⚠ 未检测到 ffmpeg，音频将无法保留")
        print("  请安装: https://ffmpeg.org/download.html")
    
    # 检查输入文件
    if not os.path.exists(video1_path):
        print(f"✗ 错误: 找不到文件 {video1_path}")
        return
    if not os.path.exists(video2_path):
        print(f"✗ 错误: 找不到文件 {video2_path}")
        return
    
    try:
        # 获取视频信息
        print("\n获取视频信息...")
        w1, h1, fps1, frames1 = get_video_info(video1_path)
        w2, h2, fps2, frames2 = get_video_info(video2_path)
        
        print(f"\n视频1: {w1}x{h1}, {fps1:.2f}fps, {frames1}帧")
        print(f"视频2: {w2}x{h2}, {fps2:.2f}fps, {frames2}帧")
        
        # 确定目标尺寸
        target_height = min(h1, h2)
        target_height = make_even(target_height)  # 确保是偶数
        
        # 计算调整后的宽度
        new_w1 = int(w1 * target_height / h1)
        new_w2 = int(w2 * target_height / h2)
        
        # 确保是偶数
        new_w1 = make_even(new_w1)
        new_w2 = make_even(new_w2)
        
        # 使用较大的宽度
        target_width = max(new_w1, new_w2)
        target_width = make_even(target_width)  # 确保是偶数
        
        print(f"\n最终目标尺寸: {target_width}x{target_height} (已调整为偶数)")
        
        # 创建输出目录
        output_dir1 = os.path.dirname(output1_path)
        output_dir2 = os.path.dirname(output2_path)
        
        if output_dir1:
            os.makedirs(output_dir1, exist_ok=True)
        if output_dir2:
            os.makedirs(output_dir2, exist_ok=True)
        
        # 处理两个视频
        print("\n开始处理...")
        resize_and_pad_video(video1_path, output1_path, target_height, target_width)
        resize_and_pad_video(video2_path, output2_path, target_height, target_width)
        
        print("\n" + "="*60)
        print("✓ 全部完成！")
        print("="*60)
        print(f"输出文件:")
        print(f"  1. {output1_path}")
        print(f"  2. {output2_path}")
        print()
        
    except Exception as e:
        print(f"\n✗ 错误: {e}")
        import traceback
        traceback.print_exc()

# 使用示例
if __name__ == "__main__":
    # 修改为你的视频路径
    video1 = "/Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid2_concatenated.mp4"
    video2 = "/Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid6_pose_retarget_concatenated.mp4"
    
    output1 = "/Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid2_concatenated_processed.mp4"
    output2 = "/Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid6_pose_retarget_concatenated_processed.mp4"
    
    process_two_videos(video1, video2, output1, output2)



视频处理工具 - 统一高度并添加白边 (保留音频)
注意: 输出尺寸会调整为偶数以符合视频编码要求
✓ 检测到 ffmpeg，可以保留音频

获取视频信息...

视频1: 1728x1152, 60.00fps, 600帧
视频2: 1728x1024, 24.00fps, 225帧

最终目标尺寸: 1728x1024 (已调整为偶数)

开始处理...

处理: vid2_concatenated.mp4
原始尺寸: 1728x1152
目标尺寸: 1728x1024 (已调整为偶数)
缩放后宽度: 1536px (白边: 左右各96px)
帧率: 60.00fps, 总帧数: 600
  无音频
进度: 600/600 (100.0%)
✓ 视频处理完成！共处理 600 帧
✓ 输出文件: /Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid2_concatenated_processed.mp4


处理: vid6_pose_retarget_concatenated.mp4
原始尺寸: 1728x1024
目标尺寸: 1728x1024 (已调整为偶数)
缩放后宽度: 1728px (白边: 左右各0px)
帧率: 24.00fps, 总帧数: 225
✓ 检测到音频，将保留音频
正在提取音频...
✓ 音频提取成功
进度: 225/225 (100.0%)
✓ 视频处理完成！共处理 225 帧
正在合并音频...
✓ 音频合并成功
✓ 输出文件: /Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid6_pose_retarget_concatenated_processed.mp4


✓ 全部完成！
输出文件:
  1. /Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid2_concatenated_processed.mp4
  2. /Users/shijun/temp业务/wan-animate-main/content/aligned pose driven/vid6_pose_re

In [3]:
!pip3 install moviepy

Collecting moviepy
  Downloading moviepy-2.2.1-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 KB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting imageio_ffmpeg>=0.2.0
  Downloading imageio_ffmpeg-0.6.0-py3-none-macosx_11_0_arm64.whl (21.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting proglog<=1.0.0
  Downloading proglog-0.1.12-py3-none-any.whl (6.3 kB)
Collecting python-dotenv>=0.10
  Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB)
Collecting imageio<3.0,>=2.5
  Downloading imageio-2.37.2-py3-none-any.whl (317 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.6/317.6 KB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: python-dotenv, proglog, imageio_ffmpeg, imageio, moviepy
Successfully installed imageio-2.37.2 imageio_ffmpeg-0.6.0 

In [8]:
ls "content/aligned pose driven"

vid1.mp4           vid2.mp4           vid3.mp4           vid4_from1.3b.mp4
vid1_input.jpg     vid2_input.jpg     vid3_input.jpg     vid4_input.png


In [11]:
import cv2
import numpy as np
import imageio
import os
import subprocess
from pathlib import Path

def make_even(number):
    """确保数字是偶数"""
    return number if number % 2 == 0 else number + 1

def get_video_info(video_path):
    """获取视频信息"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"无法打开视频: {video_path}")
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return width, height, fps, frame_count

def center_crop_by_aspect_ratio(img, target_aspect_ratio):
    """
    根据目标宽高比对图片进行center crop
    target_aspect_ratio = width / height
    """
    h, w = img.shape[:2]
    current_aspect_ratio = w / h
    
    if current_aspect_ratio > target_aspect_ratio:
        # 图片太宽，裁剪宽度
        new_w = int(h * target_aspect_ratio)
        new_h = h
        x_offset = (w - new_w) // 2
        y_offset = 0
    else:
        # 图片太高，裁剪高度
        new_w = w
        new_h = int(w / target_aspect_ratio)
        x_offset = 0
        y_offset = (h - new_h) // 2
    
    cropped = img[y_offset:y_offset+new_h, x_offset:x_offset+new_w]
    
    print(f"  Center crop: {w}x{h} -> {new_w}x{new_h} (目标比例: {target_aspect_ratio:.3f})")
    
    return cropped

def check_has_audio(video_path):
    """检查视频是否有音频流"""
    try:
        cmd = [
            'ffprobe', '-v', 'error',
            '-select_streams', 'a:0',
            '-show_entries', 'stream=codec_type',
            '-of', 'default=nw=1:nk=1',
            video_path
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
        return 'audio' in result.stdout.lower()
    except:
        return False

def extract_audio(video_path, audio_path):
    """从视频中提取音频"""
    try:
        cmd = [
            'ffmpeg', '-y',
            '-i', video_path,
            '-vn',  # 不要视频
            '-acodec', 'aac',
            '-b:a', '192k',
            audio_path
        ]
        result = subprocess.run(cmd, capture_output=True, timeout=30)
        return result.returncode == 0 and os.path.exists(audio_path)
    except Exception as e:
        print(f"  ⚠ 音频提取失败: {e}")
        return False

def merge_video_audio(video_path, audio_path, output_path):
    """将音频合并到视频中"""
    try:
        cmd = [
            'ffmpeg', '-y',
            '-i', video_path,
            '-i', audio_path,
            '-c:v', 'copy',  # 复制视频流
            '-c:a', 'aac',
            '-b:a', '192k',
            '-shortest',  # 以最短的流为准
            output_path
        ]
        result = subprocess.run(cmd, capture_output=True, timeout=60)
        return result.returncode == 0
    except Exception as e:
        print(f"  ⚠ 音频合并失败: {e}")
        return False

def concatenate_image_video(image_path, video_path, output_path):
    """
    将图片拼接到视频左边，保留音频
    图片会根据视频的宽高比进行center crop
    最终输出：图片(h*w) + 视频(h*2w) = h*3w
    """
    
    # 读取图片
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"无法读取图片: {image_path}")
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_h, img_w = img_rgb.shape[:2]
    
    # 获取视频信息
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"无法打开视频: {video_path}")
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    vid_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    vid_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"\n{'='*60}")
    print(f"处理: {os.path.basename(image_path)} + {os.path.basename(video_path)}")
    print(f"原始图片尺寸: {img_w}x{img_h}")
    print(f"视频尺寸: {vid_w}x{vid_h}, {fps:.2f}fps, {total_frames}帧")
    
    # 检查是否有音频
    has_audio = check_has_audio(video_path)
    if has_audio:
        print("✓ 检测到音频，将保留音频")
    else:
        print("  无音频")
    
    # 计算视频的宽高比（假设视频是 h*2w，单边比例是 h/w）
    video_aspect_ratio = vid_w / vid_h
    
    # 图片需要匹配视频的单边比例（视频是2倍宽，所以图片比例应该是视频比例的一半）
    # 如果视频是 h*2w，那么 video_aspect = 2w/h
    # 图片应该是 h*w，所以 img_aspect = w/h = video_aspect / 2
    target_img_aspect_ratio = video_aspect_ratio / 2.0
    
    print(f"视频宽高比: {video_aspect_ratio:.3f} (w/h)")
    print(f"目标图片宽高比: {target_img_aspect_ratio:.3f} (w/h)")
    
    # 对图片进行center crop
    cropped_img = center_crop_by_aspect_ratio(img_rgb, target_img_aspect_ratio)
    
    # 使用视频高度作为目标高度
    target_height = vid_h
    target_height = make_even(target_height)
    
    # 调整裁剪后的图片尺寸（高度与视频一致）
    crop_h, crop_w = cropped_img.shape[:2]
    scale_img = target_height / crop_h
    new_img_w = int(crop_w * scale_img)
    new_img_w = make_even(new_img_w)
    
    resized_img = cv2.resize(cropped_img, (new_img_w, target_height), 
                            interpolation=cv2.INTER_LINEAR)
    
    # 调整视频尺寸（如果需要）
    if vid_h != target_height:
        scale_vid = target_height / vid_h
        new_vid_w = int(vid_w * scale_vid)
        new_vid_w = make_even(new_vid_w)
    else:
        new_vid_w = vid_w
        new_vid_w = make_even(new_vid_w)
    
    # 计算最终宽度（图片 + 视频 = w + 2w = 3w）
    final_width = new_img_w + new_vid_w
    final_width = make_even(final_width)
    
    print(f"目标高度: {target_height}")
    print(f"裁剪并调整后图片宽度: {new_img_w} (应该约为视频宽度的一半)")
    print(f"调整后视频宽度: {new_vid_w}")
    print(f"最终尺寸: {final_width}x{target_height} (约 {final_width/target_height:.2f}:1)")
    print(f"理论比例: 图片占 {new_img_w/final_width*100:.1f}%, 视频占 {new_vid_w/final_width*100:.1f}%")
    print(f"{'='*60}")
    
    # 临时文件
    temp_video = output_path.replace('.mp4', '_temp_novideo.mp4')
    temp_audio = output_path.replace('.mp4', '_temp_audio.aac')
    
    # 如果有音频，先提取
    audio_extracted = False
    if has_audio:
        print("正在提取音频...")
        audio_extracted = extract_audio(video_path, temp_audio)
        if audio_extracted:
            print("✓ 音频提取成功")
        else:
            print("⚠ 音频提取失败，将输出无音频视频")
    
    # 创建视频写入器（写入到临时文件）
    output_file = temp_video if audio_extracted else output_path
    
    writer = imageio.get_writer(
        output_file,
        fps=fps,
        codec='libx264',
        quality=8,
        pixelformat='yuv420p',
        macro_block_size=1
    )
    
    frame_count = 0
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # 转换为RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # 调整视频帧大小（如果需要）
            if vid_h != target_height or vid_w != new_vid_w:
                frame_resized = cv2.resize(frame_rgb, (new_vid_w, target_height),
                                         interpolation=cv2.INTER_LINEAR)
            else:
                frame_resized = frame_rgb
            
            # 创建画布
            canvas = np.zeros((target_height, final_width, 3), dtype=np.uint8)
            
            # 左边放图片，右边放视频帧
            canvas[:, :new_img_w] = resized_img
            canvas[:, new_img_w:new_img_w+new_vid_w] = frame_resized
            
            # 写入帧
            writer.append_data(canvas)
            frame_count += 1
            
            # 显示进度
            if frame_count % 30 == 0 or frame_count == total_frames:
                progress = (frame_count / total_frames) * 100 if total_frames > 0 else 0
                print(f"进度: {frame_count}/{total_frames} ({progress:.1f}%)", end='\r')
        
        print(f"\n✓ 视频处理完成！共 {frame_count} 帧")
        
    except Exception as e:
        print(f"\n✗ 处理时出错: {e}")
        raise
    finally:
        cap.release()
        writer.close()
    
    # 如果提取了音频，合并音频和视频
    if audio_extracted and os.path.exists(temp_audio) and os.path.exists(temp_video):
        print("正在合并音频...")
        if merge_video_audio(temp_video, temp_audio, output_path):
            print("✓ 音频合并成功")
            # 删除临时文件
            try:
                os.remove(temp_video)
                os.remove(temp_audio)
            except:
                pass
        else:
            print("⚠ 音频合并失败，使用无音频版本")
            if os.path.exists(temp_video):
                os.rename(temp_video, output_path)
            if os.path.exists(temp_audio):
                os.remove(temp_audio)
    
    print(f"✓ 输出: {output_path}\n")

def process_directory(input_dir, output_dir=None):
    """批量处理目录中的所有视频和图片对"""
    
    input_path = Path(input_dir)
    
    if output_dir is None:
        output_path = input_path
    else:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
    
    print("\n" + "="*60)
    print("批量拼接图片和视频（Center Crop + 保留音频）")
    print("="*60)
    
    # 检查ffmpeg
    try:
        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True, timeout=5)
        print("✓ 检测到 ffmpeg")
    except:
        print("⚠ 未检测到 ffmpeg，音频可能无法保留")
        print("  请安装: https://ffmpeg.org/download.html")
    
    # 查找所有视频文件
    video_files = sorted(input_path.glob("vid*.mp4"))
    
    if not video_files:
        print("✗ 未找到视频文件")
        return
    
    print(f"找到 {len(video_files)} 个视频文件\n")
    
    processed = 0
    failed = 0
    
    for video_file in video_files:
        # 提取基础名称（如 vid1, vid2）
        base_name = video_file.stem  # 例如: vid1, vid4_from1.3b
        
        # 尝试匹配对应的图片
        # 优先匹配完全相同的名称，然后尝试去掉后缀
        if '_' in base_name:
            # 如果是 vid4_from1.3b，尝试 vid4
            short_name = base_name.split('_')[0]
        else:
            short_name = base_name
        
        # 查找对应的输入图片
        image_file = None
        for ext in ['.jpg', '.png', '.jpeg']:
            # 先尝试完整名称
            img_path = input_path / f"{base_name}_input{ext}"
            if img_path.exists():
                image_file = img_path
                break
            
            # 尝试短名称
            img_path = input_path / f"{short_name}_input{ext}"
            
            if img_path.exists():
                image_file = img_path
                break
        
        if image_file is None:
            print(f"⚠ 跳过 {video_file.name}: 未找到对应的输入图片")
            failed += 1
            continue
        
        # 输出文件名
        output_file = output_path / f"{base_name}_concatenated.mp4"
        
        try:
            concatenate_image_video(str(image_file), str(video_file), str(output_file))
            processed += 1
        except Exception as e:
            print(f"✗ 处理失败 {video_file.name}: {e}")
            import traceback
            traceback.print_exc()
            failed += 1
    
    print("\n" + "="*60)
    print(f"✓ 完成! 成功: {processed}, 失败: {failed}")
    print("="*60)

# 使用示例
if __name__ == "__main__":
    # 批量处理整个目录
    input_directory = "content/aligned pose driven"
    process_directory(input_directory)
    
    # 或者单独处理某一对（用于测试）
    # image_path = "content/aligned pose driven/vid1_input.jpg"
    # video_path = "content/aligned pose driven/vid1.mp4"
    # output_path = "content/aligned pose driven/vid1_concatenated.mp4"
    # concatenate_image_video(image_path, video_path, output_path)


批量拼接图片和视频（Center Crop + 保留音频）
✓ 检测到 ffmpeg
找到 6 个视频文件


处理: vid1_input.jpg + vid1.mp4
原始图片尺寸: 1080x1473
视频尺寸: 960x640, 60.00fps, 600帧
  无音频
视频宽高比: 1.500 (w/h)
目标图片宽高比: 0.750 (w/h)
  Center crop: 1080x1473 -> 1080x1440 (目标比例: 0.750)
目标高度: 640
裁剪并调整后图片宽度: 480 (应该约为视频宽度的一半)
调整后视频宽度: 960
最终尺寸: 1440x640 (约 2.25:1)
理论比例: 图片占 33.3%, 视频占 66.7%
进度: 600/600 (100.0%)
✓ 视频处理完成！共 600 帧
✓ 输出: content/aligned pose driven/vid1_concatenated.mp4


处理: vid2_input.jpg + vid2.mp4
原始图片尺寸: 1080x2179
视频尺寸: 1152x1152, 60.00fps, 600帧
  无音频
视频宽高比: 1.000 (w/h)
目标图片宽高比: 0.500 (w/h)
  Center crop: 1080x2179 -> 1080x2160 (目标比例: 0.500)
目标高度: 1152
裁剪并调整后图片宽度: 576 (应该约为视频宽度的一半)
调整后视频宽度: 1152
最终尺寸: 1728x1152 (约 1.50:1)
理论比例: 图片占 33.3%, 视频占 66.7%
进度: 600/600 (100.0%)
✓ 视频处理完成！共 600 帧
✓ 输出: content/aligned pose driven/vid2_concatenated.mp4


处理: vid3_input.jpg + vid3.mp4
原始图片尺寸: 1080x1515
视频尺寸: 960x848, 29.00fps, 300帧
  无音频
视频宽高比: 1.132 (w/h)
目标图片宽高比: 0.566 (w/h)
  Center crop: 1080x1515 -> 857x1515 (目标比例: 0.566)
目标高度: 8