In [None]:
import os
import random
import shutil
import re
from collections import defaultdict

label_mapping = {
    "0": 0,             # static
    "0_2": 1,           # slightly_move
    "1": 2,             # move
    "4": 3              # intensely_move
}
# 配置文件夹路径
data_folder = "202_packet_json_new_3500"  # JSON 文件所在的文件夹
train_folder = "202_packet_json_new_3600_train"  # 训练集文件夹
eval_folder = "202_packet_json_new_3600_eval"  # 评估集文件夹

# 创建目标文件夹（如果不存在）
os.makedirs(train_folder, exist_ok=True)
os.makedirs(eval_folder, exist_ok=True)

def get_label_from_filename(filename):
    return label_mapping.get(filename[9:-5],-1)

def load_files_and_labels(folder_path):
    file_list = []
    labels = []
    for file in os.listdir(folder_path):
        if file.endswith(".json"):
            label = get_label_from_filename(file)
            if label != -1:
                file_list.append(os.path.join(folder_path, file))
                labels.append(label)
    return file_list, labels

# 加载文件及其标签
file_list, labels = load_files_and_labels(data_folder)
print(f"总共有 {len(file_list)} 个文件")

# 将文件按标签分组
files_by_label = defaultdict(list)
for file, label in zip(file_list, labels):
    files_by_label[label].append(file)

# 设置划分比例
train_ratio = 0.7  # 70% 数据用于训练

# 分别对每个类别进行划分
train_files = []
eval_files = []

for label, files in files_by_label.items():
    random.shuffle(files)  # 对每个类别的文件随机打乱
    train_size = int(len(files) * train_ratio)
    train_files.extend(files[:train_size])  # 添加到训练集
    eval_files.extend(files[train_size:])  # 添加到评估集

# 将文件复制到对应文件夹
for file in train_files:
    shutil.copy(file, os.path.join(train_folder, os.path.basename(file)))

for file in eval_files:
    shutil.copy(file, os.path.join(eval_folder, os.path.basename(file)))

print(f"训练集包含 {len(train_files)} 个文件")
print(f"评估集包含 {len(eval_files)} 个文件")

总共有 3600 个文件
训练集包含 2520 个文件
评估集包含 1080 个文件
