In [5]:
import json
import random
import uuid


def generate_node(node_type):
    """生成单个节点的配置"""
    # GPU配置（数量为2的整数倍）
    gpu_configs = {
        "cloud": [4, 8],      # 云端节点4或8个GPU
        "edge": [2, 4],       # 边缘节点2或4个GPU
        "terminal": [1, 2]    # 终端节点1或2个GPU
    }

    # CPU配置（按实际服务器配置，8的整数倍）
    cpu_configs = {
        "cloud": [32, 48, 64],     # 云端节点CPU核心数
        "edge": [16, 24, 32],      # 边缘节点CPU核心数
        "terminal": [8, 16, 24]    # 终端节点CPU核心数
    }

    # 内存配置（按实际服务器配置，16的整数倍，单位GB）
    memory_configs = {
        "cloud": [128, 256, 512],    # 云端节点内存
        "edge": [64, 128, 256],      # 边缘节点内存
        "terminal": [32, 64, 128]    # 终端节点内存
    }

    # GPU型号及其对应的显存配置
    gpu_specs = {
        "cloud": {"model": "V100", "memory": 32},      # V100 32GB
        "edge": {"model": "P100", "memory": 16},       # P100 16GB
        "terminal": {"model": "T4", "memory": 16}      # T4 16GB
    }

    gpu_count = random.choice(gpu_configs[node_type])
    gpu_model = gpu_specs[node_type]["model"]

    gpu_list = []
    for _ in range(gpu_count):
        gpu_list.append(
            {
                "gpu_id": str(uuid.uuid4()),
                "gpu_type": gpu_model,
            }
        )

    return {
        "node_id": f"{node_type}_node_{random.randint(1000, 9999)}",
        "cpu_cores": random.choice(cpu_configs[node_type]),
        "memory": random.choice(memory_configs[node_type]),
        "gpu_count": gpu_count,
        "gpu_model": gpu_model,
        "gpus": gpu_list,
        "gpu_memory": gpu_specs[node_type]["memory"],
        "ip_address": f"192.168.{random.randint(1, 255)}.{random.randint(1, 255)}"
    }


In [6]:
def generate_cluster_config():
    """生成完整的集群配置"""
    # 集群数量配置
    cluster_counts = {
        "cloud": {"min": 1, "max": 2},  # 云端集群数量范围
        "edge": {"min": 2, "max": 3},  # 边缘集群数量范围
        "terminal": {"min": 3, "max": 4},  # 终端集群数量范围
    }

    # 每个集群的节点数量配置
    nodes_per_cluster = {
        "cloud": {"min": 10, "max": 15},  # 每个云端集群的节点数量范围
        "edge": {"min": 4, "max": 8},  # 每个边缘集群的节点数量范围
        "terminal": {"min": 2, "max": 4},  # 每个终端集群的节点数量范围
    }

    # 带宽配置（单位：Mbps）
    intra_domain_bandwidth = {
        "cloud": {"min": 40000, "max": 100000},  # 云端集群内带宽范围 (40Gbps-100Gbps)
        "edge": {"min": 10000, "max": 40000},    # 边缘集群内带宽范围 (10Gbps-40Gbps)
        "terminal": {"min": 1000, "max": 10000}, # 终端集群内带宽范围 (1Gbps-10Gbps)
    }

    inter_domain_bandwidth = {"min": 200, "max": 300}  # 域间带宽

    cluster_config = {"clusters": {"cloud_clusters": [], "edge_clusters": [], "terminal_clusters": []}}

    # 生成云端集群
    for i in range(random.randint(cluster_counts["cloud"]["min"], cluster_counts["cloud"]["max"])):
        cloud_cluster = {
            "cluster_id": f"cloud_cluster_{i+1}",
            "cluster_name": f"CloudCluster-{i+1}",
            "cluster_type": "cloud",
            "nodes": [
                generate_node("cloud")
                for _ in range(random.randint(nodes_per_cluster["cloud"]["min"], nodes_per_cluster["cloud"]["max"]))
            ],
            "intra_domain_bandwidth": random.randint(
                intra_domain_bandwidth["cloud"]["min"], intra_domain_bandwidth["cloud"]["max"]
            ),
            "inter_domain_bandwidth": random.randint(inter_domain_bandwidth["min"], inter_domain_bandwidth["max"]),
        }
        cluster_config["clusters"]["cloud_clusters"].append(cloud_cluster)

    # 生成边缘集群
    for i in range(random.randint(cluster_counts["edge"]["min"], cluster_counts["edge"]["max"])):
        edge_cluster = {
            "cluster_id": f"edge_cluster_{i+1}",
            "cluster_name": f"EdgeCluster-{i+1}",
            "cluster_type": "edge",
            "nodes": [
                generate_node("edge")
                for _ in range(random.randint(nodes_per_cluster["edge"]["min"], nodes_per_cluster["edge"]["max"]))
            ],
            "intra_domain_bandwidth": random.randint(
                intra_domain_bandwidth["edge"]["min"], intra_domain_bandwidth["edge"]["max"]
            ),
            "inter_domain_bandwidth": random.randint(inter_domain_bandwidth["min"], inter_domain_bandwidth["max"]),
        }
        cluster_config["clusters"]["edge_clusters"].append(edge_cluster)

    # 生成终端集群
    for i in range(random.randint(cluster_counts["terminal"]["min"], cluster_counts["terminal"]["max"])):
        terminal_cluster = {
            "cluster_id": f"terminal_cluster_{i+1}",
            "cluster_name": f"TerminalCluster-{i+1}",
            "cluster_type": "terminal",
            "nodes": [
                generate_node("terminal")
                for _ in range(
                    random.randint(nodes_per_cluster["terminal"]["min"], nodes_per_cluster["terminal"]["max"])
                )
            ],
            "intra_domain_bandwidth": random.randint(
                intra_domain_bandwidth["terminal"]["min"], intra_domain_bandwidth["terminal"]["max"]
            ),
            "inter_domain_bandwidth": random.randint(inter_domain_bandwidth["min"], inter_domain_bandwidth["max"]),
        }
        cluster_config["clusters"]["terminal_clusters"].append(terminal_cluster)

    return cluster_config

In [7]:
def generate_fs_config(cluster_config, version="light"):
    """生成训练任务配置"""
    # 定义训练任务信息
    if version == "light":
        training_tasks = {
            "ResNet50": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32, 64],
                "dataset_size": 5000,  # 5GB (CIFAR/部分ImageNet)
                "model_size": 98  # 98MB
            },
            "VGG19": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32],
                "dataset_size": 6000,  # 6GB (部分数据集)
                "model_size": 549  # 549MB
            },
            "InceptionV3": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32],
                "dataset_size": 8000,  # 8GB (部分数据集)
                "model_size": 92  # 92MB
            },
            "DenseNet161": {
                "task_type": "Image Classification",
                "batch_sizes": [8, 16],
                "dataset_size": 4000,  # 4GB (部分数据集)
                "model_size": 110  # 110MB
            },
            "DCGAN": {
                "task_type": "Image-to-Image Translation",
                "batch_sizes": [64, 128, 256],
                "dataset_size": 3000,  # 3GB (如CelebA数据集)
                "model_size": 45  # 45MB
            },
            "LSTM": {
                "task_type": "Language Modeling",
                "batch_sizes": [10, 20, 40, 80],
                "dataset_size": 1500,  # 1.5GB (如Wikipedia子集)
                "model_size": 35  # 35MB
            },
            "Transformer": {
                "task_type": "Language Modeling",
                "batch_sizes": [16, 32, 64, 128],
                "dataset_size": 3500,  # 3.5GB (小型语言数据集)
                "model_size": 550  # 550MB
            }
        }
    else:
        training_tasks = {
            "ResNet50": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32, 64],
                "dataset_size": 150000,  # 15GB (完整ImageNet)
                "model_size": 980  # 980MB (含预训练权重)
            },
            "VGG19": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32],
                "dataset_size": 200000,  # 20GB (大规模数据集)
                "model_size": 5490  # 5.49GB (含预训练权重)
            },
            "InceptionV3": {
                "task_type": "Image Classification",
                "batch_sizes": [16, 32],
                "dataset_size": 180000,  # 18GB (完整数据集)
                "model_size": 920  # 920MB
            },
            "DenseNet161": {
                "task_type": "Image Classification",
                "batch_sizes": [8, 16],
                "dataset_size": 120000,  # 12GB (完整数据集)
                "model_size": 1100  # 1.1GB
            },
            "DCGAN": {
                "task_type": "Image-to-Image Translation",
                "batch_sizes": [64, 128, 256],
                "dataset_size": 80000,  # 8GB (高分辨率图像数据集)
                "model_size": 450  # 450MB
            },
            "LSTM": {
                "task_type": "Language Modeling",
                "batch_sizes": [10, 20, 40, 80],
                "dataset_size": 50000,  # 5GB (大型文本语料库)
                "model_size": 350  # 350MB
            },
            "Transformer": {
                "task_type": "Language Modeling",
                "batch_sizes": [16, 32, 64, 128],
                "dataset_size": 300000,  # 30GB (大型语言模型数据集)
                "model_size": 5500  # 5.5GB (中等规模Transformer)
            }
        }
    # 获取所有节点的ID
    all_nodes = []
    for clusters in cluster_config["clusters"].values():
        for cluster in clusters:
            for node in cluster["nodes"]:
                all_nodes.append(node["node_id"])

    # 为每个训练任务的数据随机分配节点
    training_distribution = {
        "tasks": []
    }

    for model_name, task_info in training_tasks.items():
        data_nodes = random.sample(all_nodes, 2)
        model_nodes = random.sample(all_nodes, 2)

        task_config = {
            "model_name": model_name,
            "task_type": task_info["task_type"],
            "batch_sizes": task_info["batch_sizes"],
            "dataset": {
                "size_mb": task_info["dataset_size"],
                "storage_nodes": data_nodes
            },
            "model": {
                "size_mb": task_info["model_size"],
                "storage_nodes": model_nodes
            }
        }
        training_distribution["tasks"].append(task_config)

    return training_distribution

In [8]:
if __name__ == "__main__":
    # 生成集群资源配置
    cluster_config = generate_cluster_config()

    # 生成数据布局配置
    light_fs_config = generate_fs_config(cluster_config, version="light")
    heavy_fs_config = generate_fs_config(cluster_config, version="heavy")

    # 将配置保存到文件
    with open("cluster_config.json", "w", encoding="utf-8") as f:
        json.dump(cluster_config, f, ensure_ascii=False, indent=2)

    # 保存训练任务配置
    with open("fs_config_light.json", "w", encoding="utf-8") as f:
        json.dump(light_fs_config, f, ensure_ascii=False, indent=2)

    with open("fs_config_heavy.json", "w", encoding="utf-8") as f:
        json.dump(heavy_fs_config, f, ensure_ascii=False, indent=2)