In [8]:
import json
import random


def generate_node(node_type):
    """生成单个节点的配置"""
    # GPU配置（数量为2的整数倍）
    gpu_configs = {
        "cloud": [4, 8],      # 云端节点4或8个GPU
        "edge": [2, 4],       # 边缘节点2或4个GPU
        "terminal": [1, 2]    # 终端节点1或2个GPU
    }

    # CPU配置（按实际服务器配置，8的整数倍）
    cpu_configs = {
        "cloud": [32, 48, 64],     # 云端节点CPU核心数
        "edge": [16, 24, 32],      # 边缘节点CPU核心数
        "terminal": [8, 16, 24]    # 终端节点CPU核心数
    }

    # 内存配置（按实际服务器配置，16的整数倍，单位GB）
    memory_configs = {
        "cloud": [128, 256, 512],    # 云端节点内存
        "edge": [64, 128, 256],      # 边缘节点内存
        "terminal": [32, 64, 128]    # 终端节点内存
    }

    # GPU型号及其对应的显存配置
    gpu_specs = {
        "cloud": {"model": "V100", "memory": 32},      # V100 32GB
        "edge": {"model": "T4", "memory": 16},         # T4 16GB
        "terminal": {"model": "P100", "memory": 16}    # P100 16GB
    }

    gpu_count = random.choice(gpu_configs[node_type])

    return {
        "node_id": f"{node_type}_node_{random.randint(1000, 9999)}",
        "cpu_cores": random.choice(cpu_configs[node_type]),
        "memory": random.choice(memory_configs[node_type]),
        "gpu_count": gpu_count,
        "gpu_model": gpu_specs[node_type]["model"],
        "gpu_memory": gpu_specs[node_type]["memory"],
        "ip_address": f"192.168.{random.randint(1, 255)}.{random.randint(1, 255)}"
    }


In [9]:
def generate_cluster_config():
    """生成完整的集群配置"""
    # 集群数量配置
    cluster_counts = {
        "cloud": {"min": 2, "max": 3},      # 云端集群数量范围
        "edge": {"min": 3, "max": 5},       # 边缘集群数量范围
        "terminal": {"min": 4, "max": 6}    # 终端集群数量范围
    }

    # 每个集群的节点数量配置
    nodes_per_cluster = {
        "cloud": {"min": 6, "max": 8},      # 每个云端集群的节点数量范围
        "edge": {"min": 4, "max": 6},       # 每个边缘集群的节点数量范围
        "terminal": {"min": 2, "max": 4}    # 每个终端集群的节点数量范围
    }

    cluster_config = {
        "clusters": {
            "cloud_clusters": [],
            "edge_clusters": [],
            "terminal_clusters": []
        }
    }

    # 生成云端集群
    for i in range(random.randint(cluster_counts["cloud"]["min"], cluster_counts["cloud"]["max"])):
        cloud_cluster = {
            "cluster_id": f"cloud_cluster_{i+1}",
            "cluster_name": f"CloudCluster-{i+1}",
            "cluster_type": "cloud",
            "nodes": [generate_node("cloud") for _ in range(
                random.randint(nodes_per_cluster["cloud"]["min"],
                             nodes_per_cluster["cloud"]["max"]))
            ]
        }
        cluster_config["clusters"]["cloud_clusters"].append(cloud_cluster)

    # 生成边缘集群
    for i in range(random.randint(cluster_counts["edge"]["min"], cluster_counts["edge"]["max"])):
        edge_cluster = {
            "cluster_id": f"edge_cluster_{i+1}",
            "cluster_name": f"EdgeCluster-{i+1}",
            "cluster_type": "edge",
            "nodes": [generate_node("edge") for _ in range(
                random.randint(nodes_per_cluster["edge"]["min"],
                             nodes_per_cluster["edge"]["max"]))
            ]
        }
        cluster_config["clusters"]["edge_clusters"].append(edge_cluster)

    # 生成终端集群
    for i in range(random.randint(cluster_counts["terminal"]["min"], cluster_counts["terminal"]["max"])):
        terminal_cluster = {
            "cluster_id": f"terminal_cluster_{i+1}",
            "cluster_name": f"TerminalCluster-{i+1}",
            "cluster_type": "terminal",
            "nodes": [generate_node("terminal") for _ in range(
                random.randint(nodes_per_cluster["terminal"]["min"],
                             nodes_per_cluster["terminal"]["max"]))
            ]
        }
        cluster_config["clusters"]["terminal_clusters"].append(terminal_cluster)


    return cluster_config

In [10]:
def generate_fs_config(cluster_config):
    """生成训练任务配置"""
    # 定义训练任务信息
    training_tasks = {
        "ResNet50": {
            "task_type": "Image Classification",
            "batch_sizes": [16, 32, 64],
            "dataset_size": 1000000,  # 100万张图片
            "model_size": 98  # 98MB
        },
        "VGG19": {
            "task_type": "Image Classification",
            "batch_sizes": [16, 32],
            "dataset_size": 1200000,
            "model_size": 549  # 549MB
        },
        "InceptionV3": {
            "task_type": "Image Classification",
            "batch_sizes": [16, 32],
            "dataset_size": 1500000,
            "model_size": 92  # 92MB
        },
        "DenseNet161": {
            "task_type": "Image Classification",
            "batch_sizes": [8, 16],
            "dataset_size": 800000,
            "model_size": 110  # 110MB
        },
        "DCGAN": {
            "task_type": "Image-to-Image Translation",
            "batch_sizes": [64, 128, 256],
            "dataset_size": 500000,
            "model_size": 45  # 45MB
        },
        "LSTM": {
            "task_type": "Language Modeling",
            "batch_sizes": [10, 20, 40, 80],
            "dataset_size": 2000000,  # 200万句子
            "model_size": 35  # 35MB
        },
        "Transformer": {
            "task_type": "Language Modeling",
            "batch_sizes": [16, 32, 64, 128],
            "dataset_size": 3000000,  # 300万句子
            "model_size": 550  # 550MB
        }
    }

    # 获取所有边缘节点的IP地址
    edge_nodes = []
    for cluster in cluster_config["clusters"]["edge_clusters"]:
        for node in cluster["nodes"]:
            edge_nodes.append(node["ip_address"])

    # 为每个训练任务的数据随机分配边缘节点
    training_distribution = {
        "tasks": []
    }

    for model_name, task_info in training_tasks.items():
        # 随机选择2-3个边缘节点用于数据存储
        data_nodes = random.sample(edge_nodes, random.randint(2, 3))
        # 随机选择1-2个边缘节点用于模型存储
        model_nodes = random.sample(edge_nodes, random.randint(1, 2))

        task_config = {
            "model_name": model_name,
            "task_type": task_info["task_type"],
            "batch_sizes": task_info["batch_sizes"],
            "dataset": {
                "size_mb": task_info["dataset_size"],
                "storage_nodes": data_nodes
            },
            "model": {
                "size_mb": task_info["model_size"],
                "storage_nodes": model_nodes
            }
        }
        training_distribution["tasks"].append(task_config)

    return training_distribution

In [11]:
if __name__ == "__main__":
    # 生成集群资源配置
    cluster_config = generate_cluster_config()

    # 生成数据布局配置
    fs_config = generate_fs_config(cluster_config)

    # 将配置保存到文件
    with open("cluster_config.json", "w", encoding="utf-8") as f:
        json.dump(cluster_config, f, ensure_ascii=False, indent=2)

    # 保存训练任务配置
    with open("fs_config.json", "w", encoding="utf-8") as f:
        json.dump(fs_config, f, ensure_ascii=False, indent=2)