## 以下为数据集构建

创建json文件

转换成obj格式，old是原始的glb转成的obj，new是提取了行的obj

In [None]:
!pip install trimesh

In [None]:
import os
import trimesh

# 此函数用于将 glb 文件转换为 obj 文件
def convert_glb_to_obj(glb_folder, obj_folder, max_count=None):
    if not os.path.exists(obj_folder):
        os.makedirs(obj_folder)
        print(f"创建文件夹: {obj_folder}")

    success_count = 1
    skip_count = 1

    for root, dirs, files in os.walk(glb_folder):
        for file in files:
            if file.endswith('.glb'):
                if max_count is not None and (success_count+skip_count) > max_count:
                    break
                glb_file_path = os.path.join(root, file)
                obj_file_name = os.path.splitext(file)[0] + '.obj'
                obj_file_path = os.path.join(obj_folder, obj_file_name)

                if os.path.exists(obj_file_path):
                    print(f"{obj_file_path} 已存在，跳过转换。[{skip_count}]")
                    skip_count += 1
                    continue

                print(f"正在将 {glb_file_path} 转换为 {obj_file_path}...")
                try:
                    mesh = trimesh.load(glb_file_path)
                    mesh.export(obj_file_path, file_type='obj')
                    print(f"[{success_count}] 成功将 {glb_file_path} 转换为 {obj_file_path}")
                    success_count += 1
                except Exception as e:
                    print(f"将 {glb_file_path} 转换为 {obj_file_path} 时出错: {e}")
                    continue
        if max_count is not None and (success_count+skip_count) > max_count:
            break

# 此函数用于处理 obj 文件，可选择去掉 vt 行及 f 行中对应的部分
def process_obj_files(obj_folder, processed_obj_folder, max_count=None, remove_vt=False):
    if not os.path.exists(processed_obj_folder):
        os.makedirs(processed_obj_folder)
        print(f"创建文件夹: {processed_obj_folder}")

    success_count = 1
    skip_count = 1

    for root, dirs, files in os.walk(obj_folder):
        for file in files:
            if file.endswith('.obj'):
                if max_count is not None and (success_count+skip_count) > max_count:
                    break
                obj_file_path = os.path.join(root, file)
                processed_obj_file_path = os.path.join(processed_obj_folder, file)

                if os.path.exists(processed_obj_file_path):
                    # print(f"{processed_obj_file_path} 已存在，跳过处理。[{skip_count}]")
                    skip_count += 1
                    continue

                print(f"正在处理 {obj_file_path}...")
                try:
                    with open(obj_file_path, 'r') as f_in:
                        lines = f_in.readlines()

                    v_lines = []
                    vt_lines = []
                    f_lines = []

                    for line in lines:
                        if line.startswith('v '):
                            v_lines.append(line)
                        elif line.startswith('vt '):
                            vt_lines.append(line)
                        elif line.startswith('f '):
                            f_lines.append(line)

                    if remove_vt:
                        new_f_lines = []
                        for f_line in f_lines:
                            parts = f_line.strip().split()[1:]
                            new_parts = []
                            for part in parts:
                                sub_parts = part.split('/')
                                # 去掉 vt 部分
                                new_part = f"{sub_parts[0]}" if len(sub_parts) > 0 else part
                                new_parts.append(new_part)
                            new_f_line = f"f {' '.join(new_parts)}\n"
                            new_f_lines.append(new_f_line)
                        f_lines = new_f_lines
                        vt_lines = []

                    filtered_lines = v_lines + vt_lines + f_lines

                    # 以写入模式打开处理后的文件并将过滤后的行写入
                    with open(processed_obj_file_path, 'w') as f_out:
                        f_out.writelines(filtered_lines)
                    print(f"[{success_count}] 成功处理 {obj_file_path} 并保存到 {processed_obj_file_path}")
                    success_count += 1
                except Exception as e:
                    print(f"处理 {obj_file_path} 时出错: {e}")
        if max_count is not None and (success_count+skip_count) > max_count:
            break

# 此函数用于统计指定文件夹中的文件数量
def count_files_in_folder(folder):
    file_count = 0
    for _, _, files in os.walk(folder):
        file_count += len(files)
    return file_count

if __name__ == "__main__":
    glb_folder = ""
    obj_folder = ""
    processed_obj_folder = ""

    max_count = 60000  # 可以根据需要修改此值，如果为 None 则处理所有文件
    # 是否去掉 vt 行
    remove_vt = True  # 可以根据需要修改为 False

    print("开始将 glb 文件转换为 obj 文件...")
    convert_glb_to_obj(glb_folder, obj_folder, max_count+1)
    print("glb 到 obj 转换完成。")

    print("开始处理 obj 文件...")
    process_obj_files(obj_folder, processed_obj_folder, max_count+1, remove_vt)
    print("obj 文件处理完成。")

    file_count = count_files_in_folder(processed_obj_folder)
    print(f"{processed_obj_folder} 中的文件数量为 {file_count}")