In [3]:
import os
import json
import shutil
import random

In [None]:
coco_root = '/mnt/data/dfodl/datasets/coco/'
train_annotations = coco_root+'annotations/instances_train2017.json'

In [4]:
def split_dataset_by_coco_annotations(
    coco_root: str,
    images_dir_name: str,
    annotation_file_name: str,
    labels_dir_name: str = None,
    num_splits: int = 10,
    shuffle: bool = True
):
    """
    根据 COCO 标注文件，将指定 (train 或 val) 数据集拆分为若干子集。
    返回拆分后的子集图像信息与对应的子集标注。
    
    :param coco_root: COCO 数据集根目录 (str)
    :param images_dir_name: 图像文件夹名 (train2017 或 val2017) (str)
    :param annotation_file_name: 标注文件名 (如 instances_train2017.json) (str)
    :param labels_dir_name: labels 文件夹下的子文件夹 (与 images_dir_name 对应)，若没有则设为 None (str)
    :param num_splits: 拆分份数 (int)
    :param shuffle: 是否在拆分前打乱图像列表 (bool)
    :return: (subsets, annotations_map)
        subsets: List[List[dict]]，每个子列表包含该子集的所有图像信息 (即 coco_annotations["images"] 中的子集)
        annotations_map: dict，包含
          {
            "all_annotations": List[dict], # 原始所有 annotations
            "categories": List[dict],      # 原始 categories
            "info": dict,                  # 原始 info
            "licenses": List[dict]         # 原始 licenses
          }
    """
    # 1. 准备文件路径
    images_dir = os.path.join(coco_root, "images", images_dir_name)
    annotation_path = os.path.join(coco_root, "annotations", annotation_file_name)

    # 如果传入了 labels_dir_name，则组装 labels 的完整路径
    labels_dir = None
    if labels_dir_name is not None:
        labels_dir = os.path.join(coco_root, "labels", labels_dir_name)
    
    # 2. 读取原始 COCO 标注
    with open(annotation_path, "r", encoding="utf-8") as f:
        coco_anno = json.load(f)

    all_images = coco_anno["images"]
    if shuffle:
        random.shuffle(all_images)

    # 每份大约多少张图像
    total_images = len(all_images)
    split_size = total_images // num_splits

    # 3. 拆分图像信息
    subsets = []
    for i in range(num_splits):
        start_idx = i * split_size
        end_idx = (i + 1) * split_size if i < num_splits - 1 else total_images
        subset_imgs = all_images[start_idx:end_idx]
        subsets.append(subset_imgs)

    # 4. 准备要返回的 annotations_map
    annotations_map = {
        "all_annotations": coco_anno["annotations"],
        "categories": coco_anno.get("categories", []),
        "info": coco_anno.get("info", {}),
        "licenses": coco_anno.get("licenses", [])
    }

    return subsets, annotations_map


def export_subset(
    subset_idx: int,
    subset_images: list,
    annotations_map: dict,
    images_dir_name: str,
    annotation_file_name: str,
    labels_dir_name: str,
    output_root: str,
    coco_root: str
):
    """
    将某个子集的图像 + 标注 + labels 文件导出到指定的子集文件夹中。
    
    :param subset_idx: 子集编号 (从 0 开始)
    :param subset_images: 当前子集的图像列表 (List[dict])
    :param annotations_map: 字典，包含所有标注及配置信息
    :param images_dir_name: 原始图像文件夹 (train2017 / val2017)
    :param annotation_file_name: 对应的标注文件名
    :param labels_dir_name: labels 文件夹下对应的子文件夹 (同上)
    :param output_root: 最终输出的根目录 (coco_splits)
    :param coco_root: 原始 COCO 数据集根目录
    """
    # 1. 目录结构
    subset_dir = os.path.join(output_root, f"subset_{subset_idx + 1}")
    annotations_dir = os.path.join(subset_dir, "annotations")
    images_dir = os.path.join(subset_dir, "images", images_dir_name)
    labels_dir = os.path.join(subset_dir, "labels", labels_dir_name) if labels_dir_name else None

    os.makedirs(annotations_dir, exist_ok=True)
    os.makedirs(images_dir, exist_ok=True)
    if labels_dir:
        os.makedirs(labels_dir, exist_ok=True)

    # 2. 拷贝图像 & 拷贝 label（如果有）
    #   - 原始路径: coco_root/images/images_dir_name
    #   - 原始 labels 路径: coco_root/labels/labels_dir_name
    subset_image_ids = set()
    for img_info in subset_images:
        subset_image_ids.add(img_info["id"])
        img_file = img_info["file_name"]

        # 拷贝图像
        src_img_path = os.path.join(coco_root, "images", images_dir_name, img_file)
        dst_img_path = os.path.join(images_dir, img_file)
        if os.path.exists(src_img_path):
            shutil.copy(src_img_path, dst_img_path)
        else:
            print(f"[警告] {src_img_path} 不存在，跳过。")

        # 拷贝对应 label 文件 (假设仅后缀不同，如 image.jpg -> image.txt)
        if labels_dir:
            base_name, _ = os.path.splitext(img_file)
            label_file = base_name + ".txt"
            src_label_path = os.path.join(coco_root, "labels", labels_dir_name, label_file)
            dst_label_path = os.path.join(labels_dir, label_file)
            if os.path.exists(src_label_path):
                shutil.copy(src_label_path, dst_label_path)
            # 如果没有对应 label 文件，可以打印警告或忽略
            # else:
            #     print(f"[提示] {src_label_path} 不存在，对应 label 文件缺失。")

    # 3. 过滤标注（只保留该子集的图像）
    filtered_annos = [
        anno for anno in annotations_map["all_annotations"]
        if anno["image_id"] in subset_image_ids
    ]

    # 4. 生成新的标注文件
    subset_coco_anno = {
        "info": annotations_map["info"],
        "licenses": annotations_map["licenses"],
        "images": subset_images,
        "annotations": filtered_annos,
        "categories": annotations_map["categories"]
    }

    subset_annotation_file = os.path.join(annotations_dir, annotation_file_name)
    with open(subset_annotation_file, "w", encoding="utf-8") as f:
        json.dump(subset_coco_anno, f, ensure_ascii=False, indent=2)

    print(f"[子集 {subset_idx + 1}] {images_dir_name} -> 图像数: {len(subset_images)}, 标注数: {len(filtered_annos)}")


def split_coco_train_val_into_subsets(
    coco_root: str,
    output_root: str,
    num_splits: int = 10,
    shuffle: bool = True
):
    """
    将 train2017 & val2017 同时拆分为 num_splits 份，输出到 coco_splits/subset_i/ 下，
    并保持与原始 COCO 相同的层级结构 (annotations, images, labels)，
    其中 images 和 labels 分别包含 train2017 与 val2017 两级文件夹。
    
    假设原始目录结构：
    coco/
    ├── annotations/
    │   ├── instances_train2017.json
    │   └── instances_val2017.json
    ├── images/
    │   ├── train2017/
    │   └── val2017/
    └── labels/
        ├── train2017/
        └── val2017/
    
    拆分后结构 (举例 2 份)：
    coco_splits/
    ├── subset_1/
    │   ├── annotations/
    │   │   ├── instances_train2017.json
    │   │   └── instances_val2017.json
    │   ├── images/
    │   │   ├── train2017/
    │   │   └── val2017/
    │   └── labels/
    │       ├── train2017/
    │       └── val2017/
    └── subset_2/
        ├── annotations/
        ├── images/
        └── labels/
    
    :param coco_root: 原始 COCO 根目录
    :param output_root: 输出目录 (如 "coco_splits")
    :param num_splits: 拆分份数 (默认 10)
    :param shuffle: 是否对 train / val 的图像列表分别打乱
    """
    # 1) 拆分 train2017
    train_subsets, train_annos_map = split_dataset_by_coco_annotations(
        coco_root=coco_root,
        images_dir_name='train2017',
        annotation_file_name='instances_train2017.json',
        labels_dir_name='train2017',  # 假设 labels/train2017 与 images/train2017 对应
        num_splits=num_splits,
        shuffle=shuffle
    )

    # 2) 拆分 val2017
    val_subsets, val_annos_map = split_dataset_by_coco_annotations(
        coco_root=coco_root,
        images_dir_name='val2017',
        annotation_file_name='instances_val2017.json',
        labels_dir_name='val2017',  # 假设 labels/val2017 与 images/val2017 对应
        num_splits=num_splits,
        shuffle=shuffle
    )

    # 确保输出根目录存在
    os.makedirs(output_root, exist_ok=True)

    # 3) 依次导出各个子集
    for i in range(num_splits):
        # 导出 train2017
        export_subset(
            subset_idx=i,
            subset_images=train_subsets[i],
            annotations_map=train_annos_map,
            images_dir_name='train2017',
            annotation_file_name='instances_train2017.json',
            labels_dir_name='train2017',
            output_root=output_root,
            coco_root=coco_root
        )
        # 导出 val2017
        export_subset(
            subset_idx=i,
            subset_images=val_subsets[i],
            annotations_map=val_annos_map,
            images_dir_name='val2017',
            annotation_file_name='instances_val2017.json',
            labels_dir_name='val2017',
            output_root=output_root,
            coco_root=coco_root
        )
    print(f"已完成 train2017 & val2017 的 {num_splits} 份拆分，输出至: {output_root}")



In [5]:
coco_root = '/mnt/data/dfodl/datasets/coco/'

coco_root_path = '/mnt/data/dfodl/datasets/coco/'
output_root_path = '/mnt/data/dfodl/datasets/coco_split/'

# 将 train2017 和 val2017 各拆分为 10 个子集
split_coco_train_val_into_subsets(
    coco_root=coco_root_path,
    output_root=output_root_path,
    num_splits=10
)


[子集 1] train2017 -> 图像数: 11828, 标注数: 84693
[子集 1] val2017 -> 图像数: 500, 标注数: 3532
[子集 2] train2017 -> 图像数: 11828, 标注数: 85509
[子集 2] val2017 -> 图像数: 500, 标注数: 3666
[子集 3] train2017 -> 图像数: 11828, 标注数: 85760
[子集 3] val2017 -> 图像数: 500, 标注数: 3780
[子集 4] train2017 -> 图像数: 11828, 标注数: 85673
[子集 4] val2017 -> 图像数: 500, 标注数: 3558
[子集 5] train2017 -> 图像数: 11828, 标注数: 85184
[子集 5] val2017 -> 图像数: 500, 标注数: 3965
[子集 6] train2017 -> 图像数: 11828, 标注数: 85186
[子集 6] val2017 -> 图像数: 500, 标注数: 3708
[子集 7] train2017 -> 图像数: 11828, 标注数: 87313
[子集 7] val2017 -> 图像数: 500, 标注数: 3617
[子集 8] train2017 -> 图像数: 11828, 标注数: 86948
[子集 8] val2017 -> 图像数: 500, 标注数: 3535
[子集 9] train2017 -> 图像数: 11828, 标注数: 86182
[子集 9] val2017 -> 图像数: 500, 标注数: 3798
[子集 10] train2017 -> 图像数: 11835, 标注数: 87553
[子集 10] val2017 -> 图像数: 500, 标注数: 3622
已完成 train2017 & val2017 的 10 份拆分，输出至: /mnt/data/dfodl/datasets/coco_split/


In [1]:
import torch

In [25]:
def load_weights_and_avg(model_paths, owner_idx=0):
    """
    Loads multiple YOLOv5 .pt files, performing partial FedAvg on only
    the parameters that end with '.weight' or '.bias'.
    
    All other parameters (e.g. BatchNorm running_mean, num_batches_tracked, 
    or any custom parameters) are kept from the 'owner' checkpoint (owner_idx).
    
    :param model_paths: List of checkpoint paths for the group (neighbors + self).
    :param owner_idx: Index in 'model_paths' corresponding to the node's own model.
                     We'll use that checkpoint as the "owner" for non-(weight|bias) params.
    :return: aggregated_state_dict
    """
    import torch
    
    # 1) Load all models
    all_sd = []
    for mp in model_paths:
        ckpt = torch.load(mp, map_location="cpu")
        sd = ckpt["model"].state_dict()
        all_sd.append(sd)
    
    # 2) Use the owner node's checkpoint as the base
    base_sd = {k: v.clone() for k, v in all_sd[owner_idx].items()}
    
    # We'll sum up the weights/bias from all models, then average them.
    # For non-weight/bias params, we keep the owner's version.
    
    count = len(model_paths)
    
    # 3) Iterate over every parameter in base_sd
    for param_name in base_sd.keys():
        if param_name.endswith(".weight") or param_name.endswith(".bias"):
            # This is a parameter we want to average.
            summed = None
            for sd in all_sd:
                if summed is None:
                    summed = sd[param_name].clone()
                else:
                    summed += sd[param_name]
            base_sd[param_name] = summed / count
        
        else:
            # This param is not .weight or .bias => keep the owner's param
            # (base_sd already set to the owner version, so we do nothing)
            pass
    
    return base_sd

In [5]:
model_paths = []
for i in range(4):
    mp = f'/mnt/data/dfodl/dfl/dfl_output/node{i}_round0/model_round0_node{i}.pt'
    model_paths.append(mp)

In [20]:
ckpt = torch.load(mp, map_location="cpu", weights_only=False)

In [24]:
ckpt["model"].state_dict().keys()

odict_keys(['model.0.conv.weight', 'model.0.bn.weight', 'model.0.bn.bias', 'model.0.bn.running_mean', 'model.0.bn.running_var', 'model.0.bn.num_batches_tracked', 'model.1.conv.weight', 'model.1.bn.weight', 'model.1.bn.bias', 'model.1.bn.running_mean', 'model.1.bn.running_var', 'model.1.bn.num_batches_tracked', 'model.2.cv1.conv.weight', 'model.2.cv1.bn.weight', 'model.2.cv1.bn.bias', 'model.2.cv1.bn.running_mean', 'model.2.cv1.bn.running_var', 'model.2.cv1.bn.num_batches_tracked', 'model.2.cv2.conv.weight', 'model.2.cv2.bn.weight', 'model.2.cv2.bn.bias', 'model.2.cv2.bn.running_mean', 'model.2.cv2.bn.running_var', 'model.2.cv2.bn.num_batches_tracked', 'model.2.cv3.conv.weight', 'model.2.cv3.bn.weight', 'model.2.cv3.bn.bias', 'model.2.cv3.bn.running_mean', 'model.2.cv3.bn.running_var', 'model.2.cv3.bn.num_batches_tracked', 'model.2.m.0.cv1.conv.weight', 'model.2.m.0.cv1.bn.weight', 'model.2.m.0.cv1.bn.bias', 'model.2.m.0.cv1.bn.running_mean', 'model.2.m.0.cv1.bn.running_var', 'model.2.m

In [26]:
load_weights_and_avg(model_paths,0)

{'model.0.conv.weight': tensor([[[[-0.00202,  0.05011, -0.07983, -0.07135, -0.03714,  0.02542],
           [-0.00427,  0.07410, -0.01005,  0.02411, -0.02960, -0.01906],
           [-0.09412, -0.06598, -0.04156,  0.00210,  0.03717,  0.05664],
           [-0.06689, -0.04370,  0.03302,  0.07867, -0.02112,  0.07007],
           [-0.01727,  0.00813,  0.08545, -0.09027, -0.06158, -0.02571],
           [-0.03864,  0.08167, -0.06372, -0.04562, -0.06854, -0.09143]],
 
          [[-0.05936,  0.07922,  0.03976,  0.04388,  0.00265, -0.05222],
           [ 0.01226, -0.09363, -0.07294, -0.05322,  0.05795,  0.05383],
           [-0.04681, -0.00743,  0.05750,  0.09186,  0.03488,  0.00991],
           [ 0.06070, -0.06030,  0.01419, -0.07751, -0.06982, -0.05255],
           [ 0.03995,  0.03528, -0.05963,  0.02658,  0.05038, -0.01431],
           [ 0.00055,  0.01956,  0.05698,  0.08984, -0.07587, -0.03662]],
 
          [[ 0.03143,  0.07336,  0.07800,  0.07990,  0.01496, -0.08783],
           [ 0.00223, 