# VHAP + GaussianAvatars 全流程自动化

本 notebook 实现以下流程：  
1. 依次运行 VHAP 的 Preprocess、Track、Export 三个阶段  
2. 运行 GaussianAvatars 的训练脚本  
3. 对训练好的模型进行测试（渲染），并输出测试视频  

注意！：请将你的视频素材放在“/root/autodl-tmp/datasets/”位置

注意！请根据实际情况修改参数设置：  
DATA_FOLDER为你的输入视频存储路径（视频序列的父路径）；  
SEQUENCE为视频序列名（无.mp4的后缀）；  
DOWNSAMPLE_SCALES为视频下采样倍数（下采样后的分辨率为700左右即可，如1440分辨率的视频，取下采样倍数为2）。

注意！VHAP和GaussianAvatars两个阶段用的内核不同：  
跑VHAP阶段时需要选择VHAP内核
跑GaussianAvatars阶段时需选择gaussian-avatars内核

In [1]:
# 参数设置
DATA_FOLDER="/root/autodl-tmp/datasets/ai_video"     # 视频数据集父路径
SEQUENCE="ai_wyz_croped_150_20s"           # 视频序列名
DOWNSAMPLE_SCALES="2"                        # 画面下采样倍数
GPU_IDS="0"

# 因为运行完VHAP后要切换gaussian-avatars的kernel，所以要将前面的变量保存为文件，切换kernel再读取
import json
variables = {
    "DATA_FOLDER": DATA_FOLDER,
    "SEQUENCE": SEQUENCE,
    "GPU_IDS": GPU_IDS
}

# 将字典保存到 JSON 文件中
with open('/root/autodl-tmp/variables.json', 'w') as f:
    json.dump(variables, f)

## 1. VHAP 三阶段依次运行

直接运行 VHAP 的 run_monocular.sh 脚本，包含 Preprocess、Track、Export 三个阶段。
### 1.1. Preprocess预处理

In [None]:
import os
from datetime import datetime
import subprocess

os.chdir('/root/autodl-tmp/VHAP')
DATE_TAG = datetime.now().strftime("%Y%m%d")    # 获取当前日期标签
DATA_FOLDER_NAME = os.path.basename(DATA_FOLDER.rstrip('/'))    # 获取数据集名称（父路径文件夹名）

RAW_VIDEO_PATH = f"{DATA_FOLDER}/{SEQUENCE}.mp4"
preprocess_command = [
    "python", "vhap/preprocess_video.py",
    "--input", RAW_VIDEO_PATH,
    "--downsample_scales", DOWNSAMPLE_SCALES,
    "--target_fps", "25",
    "--matting_method", "rembg"
]

os.makedirs(f"logs/{DATA_FOLDER_NAME}", exist_ok=True)
print("开始预处理阶段...")
with open(f"logs/{DATA_FOLDER_NAME}/{DATE_TAG}_{SEQUENCE}_preprocess_log.txt", "w") as logf:
    subprocess.run(
        preprocess_command,
        env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
        stdout=logf,
        stderr=subprocess.STDOUT
    )
print("预处理阶段完成")
print(f"预处理后的结果保存在{DATA_FOLDER}/{SEQUENCE}")

### 1.2. Track跟踪

In [None]:
os.makedirs(f"/root/autodl-tmp/VHAP_track/{DATA_FOLDER_NAME}/output", exist_ok=True)
TRACK_OUTPUT_FOLDER = f"/root/autodl-tmp/VHAP_track/{DATA_FOLDER_NAME}/output/{SEQUENCE}"
track_command = [
    "python", "vhap/track.py",
    "--data.root_folder", DATA_FOLDER,
    "--exp.output_folder", TRACK_OUTPUT_FOLDER,
    "--data.sequence", SEQUENCE,
    "--data.n_downsample_rgb", DOWNSAMPLE_SCALES,
    "--data.landmark_source", "dslpt"
]

os.makedirs(f"logs/{DATA_FOLDER_NAME}", exist_ok=True)
print("开始跟踪阶段...")
with open(f"logs/{DATA_FOLDER_NAME}/{DATE_TAG}_{SEQUENCE}_track_log.txt", "w") as logf:
    subprocess.run(
        track_command,
        env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
        stdout=logf,
        stderr=subprocess.STDOUT
    )
print("跟踪阶段完成")

### 1.3. 导出阶段

In [None]:
os.makedirs(f"/root/autodl-tmp/VHAP_track/{DATA_FOLDER_NAME}/export", exist_ok=True)
EXPORT_OUTPUT_FOLDER = f"/root/autodl-tmp/VHAP_track/{DATA_FOLDER_NAME}/export/{SEQUENCE}"
export_command = [
    "python", "vhap/export_as_nerf_dataset.py",
    "--src_folder", TRACK_OUTPUT_FOLDER,
    "--tgt_folder", EXPORT_OUTPUT_FOLDER,
    "--background-color", "white",
    "--epoch", "0"
]

print("开始导出阶段...")
with open(f"logs/{DATA_FOLDER_NAME}/{DATE_TAG}_{SEQUENCE}_export_log.txt", "w") as logf:
    subprocess.run(
        export_command,
        env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
        stdout=logf,
        stderr=subprocess.STDOUT
    )
print("导出阶段完成")
print(f"导出阶段完成，导出结果存储在：{EXPORT_OUTPUT_FOLDER}")


## 2. GaussianAvatars 训练

进行 GaussianAvatars 的训练，注意切换选择内核为gaussian-avatars

In [4]:
import os
import random
import json
from datetime import datetime
import subprocess

# 从 JSON 文件中加载变量
with open('/root/autodl-tmp/variables.json', 'r') as f:
    variables = json.load(f)

# 将变量重新赋值
DATA_FOLDER = variables["DATA_FOLDER"]
SEQUENCE = variables["SEQUENCE"]
GPU_IDS = variables["GPU_IDS"]
DATA_FOLDER_NAME = os.path.basename(DATA_FOLDER.rstrip('/'))
DATE_TAG = datetime.now().strftime("%Y%m%d")
EXPORT_OUTPUT_FOLDER = f"/root/autodl-tmp/VHAP_track/{DATA_FOLDER_NAME}/export/{SEQUENCE}_epoch0"

# 进入 GaussianAvatars 目录，训练GaussianAvatars
os.chdir('/root/autodl-tmp/GaussianAvatars')

# 定义变量
os.makedirs(f"/root/autodl-tmp/gaussian_avatars_output/{DATA_FOLDER_NAME}/{SEQUENCE}", exist_ok=True)
GAUSSIANAVATARS_MODEL_FOLDER = f"/root/autodl-tmp/gaussian_avatars_output/{DATA_FOLDER_NAME}/{SEQUENCE}"
PORT = 60000 + random.randint(1, 100)

# 定义训练命令
train_command = [
    "python", "train.py",
    "-s", EXPORT_OUTPUT_FOLDER,
    "-m", GAUSSIANAVATARS_MODEL_FOLDER,
    "--bind_to_mesh",
    "--white_background",
    "--port", str(PORT),
    "--resolution", "1",
    "--data_device", "cpu",
    "--sh_degree", "0",
    "--iteration", "60000"
]

# 打印开始信息
print("开始训练阶段...")

# 运行训练命令
with open(f"/root/autodl-tmp/gaussian_avatars_output/{DATA_FOLDER_NAME}/{SEQUENCE}.log", "w") as logf:
    try:
        subprocess.run(
            train_command,
            env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
            stdout=logf,
            stderr=subprocess.STDOUT,
            check=True
        )
    except subprocess.CalledProcessError as e:
        print(f"训练阶段失败，错误信息：{e}")

# 打印完成信息
print("训练阶段完成")
print(f"训练阶段完成，高斯模型存储在：{GAUSSIANAVATARS_MODEL_FOLDER}")


开始训练阶段...
训练阶段完成
训练阶段完成，高斯模型存储在：/root/autodl-tmp/gaussian_avatars_output/ai_video/ai_wyz_croped_150_20s


## 3. GaussianAvatars 测试（渲染）并输出视频

### 3.1. 自驱动渲染

In [7]:
# 定义渲染命令
render_command = [
    "python", "render.py",
    "-s", EXPORT_OUTPUT_FOLDER,
    "-m", GAUSSIANAVATARS_MODEL_FOLDER,
    "-t", EXPORT_OUTPUT_FOLDER,
    "--select_camera_id", "0",
    "--iteration", "60000"
]

# 打印开始信息
print("开始自驱动渲染阶段...")

# 运行渲染命令
try:
    with open(f"/root/autodl-tmp/gaussian_avatars_output/{DATA_FOLDER_NAME}/{SEQUENCE}_self_driven_render.log", "w") as logf:
        subprocess.run(
            render_command,
            env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
            stdout=logf,
            stderr=subprocess.STDOUT,
            check=True
    )
except subprocess.CalledProcessError as e:
        print(f"渲染阶段失败，错误信息：{e}")

# 打印完成信息
print("自驱动渲染阶段完成")
# 获取渲染结果视频路径
render_output_video = f"{GAUSSIANAVATARS_MODEL_FOLDER}/{SEQUENCE}_epoch0_0/ours_60000/renders_no_audio.mp4"

# 检查视频文件是否存在
if os.path.exists(render_output_video):
    # 显示渲染结果视频
    from IPython.display import Video
    print("渲染结果预览:")
    display(Video(render_output_video))
else:
    print(f"未找到渲染结果视频: {render_output_video}")


开始自驱动渲染阶段...
自驱动渲染阶段完成
渲染结果预览:


### 3.2. diffposetalk驱动渲染

#### 先调整diffposetalk的flame和相机transforms参数

In [9]:
DIFFPOSETALK_DATA = "/root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust"
REFERENCE_FLAME_PATH = f"{EXPORT_OUTPUT_FOLDER}/flame_param/00000.npz"
REFERENCE_JSON_PATH = f"{EXPORT_OUTPUT_FOLDER}/transforms_train.json"

# 定义命令
command = [
    "python", "/root/autodl-tmp/test_diffposetalk_data_for_gs_render/process_flame_transforms.py",
    "--base_path", DIFFPOSETALK_DATA,
    "--reference_flame_path", REFERENCE_FLAME_PATH,
    "--reference_json_path", REFERENCE_JSON_PATH,
    "--adjust-neck-mean"
]

# 打印开始信息
print("开始处理 FLAME和Transforms 转换...")

# 运行命令
try:
    subprocess.run(command, check=True)
except subprocess.CalledProcessError as e:
    print(f"处理 FLAME和Transforms 转换失败，错误信息：{e}")

# 打印完成信息
print("FLAME和Transforms 转换处理完成")

开始处理 FLAME和Transforms 转换...
步骤1: 开始组合flame_param数据...
no_head: False
adjust_neck_mean: True
计算得到的coef neck_pose均值: [[ 0.06217049 -0.00764011 -0.01637743]]


 77%|███████▋  | 193/250 [00:00<00:00, 317.65it/s]

步骤1完成: 已生成250帧flame_param数据到 /root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust/flame_param
步骤2: 开始复制transforms json文件...
已复制 /root/autodl-fs/test_diffposetalk_data_for_gs_render/transforms_train.json 到 /root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust/transforms_train.json
已复制 /root/autodl-fs/test_diffposetalk_data_for_gs_render/transforms_test.json 到 /root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust/transforms_test.json
已复制 /root/autodl-fs/test_diffposetalk_data_for_gs_render/transforms_val.json 到 /root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust/transforms_val.json
步骤2完成: 已复制transforms json文件到 /root/autodl-tmp/test_diffposetalk_data_for_gs_render/default-peter-TH050-small-head-movements-adjust
步骤3: 开始更新transforms_train.json配置...
步骤3完成: 已更新JSON文件，共包含250帧数据
已使用来自 /root/autodl-tmp/V

100%|██████████| 250/250 [00:00<00:00, 318.42it/s]


In [12]:
# 定义渲染命令
render_command = [
    "python", "render.py",
    "-s", EXPORT_OUTPUT_FOLDER,
    "-m", GAUSSIANAVATARS_MODEL_FOLDER,
    "-t", DIFFPOSETALK_DATA,
    "--select_camera_id", "0",
    "--iteration", "60000"
]

# 打印开始信息
print("开始diffposetalk-peter驱动渲染阶段...")

# 运行渲染命令
try:
    with open(f"/root/autodl-tmp/gaussian_avatars_output/{DATA_FOLDER_NAME}/{SEQUENCE}_diffposetalk_driven_render.log", "w") as logf:
        subprocess.run(
            render_command,
            env=dict(os.environ, CUDA_VISIBLE_DEVICES=GPU_IDS),
            stdout=logf,
            stderr=subprocess.STDOUT,
            check=True
    )
except subprocess.CalledProcessError as e:
        print(f"diffposetalk-peter驱动渲染阶段失败，错误信息：{e}")

# 打印完成信息
print("diffposetalk-peter驱动渲染阶段完成")

# 展示渲染得到的视频
# 导入Video类用于显示视频
from IPython.display import Video

# 获取渲染视频路径
render_video_path = f"{GAUSSIANAVATARS_MODEL_FOLDER}/default-peter-TH050-small-head-movements-adjust_0/ours_60000/renders_no_audio.mp4"

# 检查视频文件是否存在
if os.path.exists(render_video_path):
    print("正在展示渲染视频...")
    display(Video(render_video_path))
else:
    print(f"未找到渲染视频文件: {render_video_path}")

开始diffposetalk-peter驱动渲染阶段...
diffposetalk-peter驱动渲染阶段完成
正在展示渲染视频...


## 4. 模型导出

导出各个文件并打包

注意:
你需要选取你希望的呼吸态的开始和结束时间
你需要选取一段你希望的独白的开始和结束时间

In [None]:
IDLE_START_TIME = "0"
IDLE_END_TIME = "3"

MONOLOGUE_START_TIME = "0"
MONOLOGUE_END_TIME = "20"

os.makedirs("/root/autodl-tmp/export_ios_client/{DATA_FOLDER_NAME}/{SEQUENCE}", exist_ok=True)
os.makedirs("/root/autodl-tmp/export_ios_client/{DATA_FOLDER_NAME}/{SEQUENCE}/idle_flame", exist_ok=True)
GSMODEL_EXPORT_FOLDER = "/root/autodl-tmp/export_ios_client/{DATA_FOLDER_NAME}/{SEQUENCE}"

# 导出3dgs点云
!cp -r {GAUSSIANAVATARS_MODEL_FOLDER}/point_cloud/iteration_600000/point_cloud.ply {GSMODEL_EXPORT_FOLDER}

# 导出训练时的训练时候的首帧flame
!cp -r {EXPORT_OUTPUT_FOLDER}/flame_param/00000.npz {GSMODEL_EXPORT_FOLDER}

# 提取呼吸态flame（正序+倒序）
!python root/autodl-tmp/scripts/extract_flames_split.py \
    --src_dir /path/to/source \
    --dst_dir /path/to/destination/idle \
    --start_time IDLE_START_TIME \
    --end_time IDLE_END_TIME \
    --fps 25 \
    --mode idle

# 提取独白flame（仅正序）
!python root/autodl-tmp/scripts/extract_flames_split.py \
    --src_dir /path/to/source \
    --dst_dir /path/to/destination/monologue \
    --start_time MONOLOGUE_START_TIME \
    --end_time MONOLOGUE_END_TIME \
    --fps 25 \
    --mode monologue

