# 03 · Simulación Federada con Flower (CPU, CUDA y OpenVINO)

Este notebook ejecuta varios escenarios de simulación federada en una sola máquina Windows utilizando Flower (`flwr.simulation.run_simulation`) con modelos PyTorch y post-fit de inferencia con OpenVINO. Se registran métricas por ronda y cliente en `metrics/03_flower_rounds.csv` para su análisis en el notebook 04.

In [7]:
# Silenciar warnings y bajar verbosidad de Flower solo en este notebook
import warnings, logging, os
warnings.filterwarnings("ignore", category=DeprecationWarning)
os.environ.setdefault("PYTHONWARNINGS", "ignore::DeprecationWarning")
logging.getLogger("flwr").setLevel(logging.ERROR)
logging.getLogger("ray").setLevel(logging.ERROR)


In [1]:
# Configuración general y matriz de escenarios
import os, sys, json, math, time, datetime as dt
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import flwr as fl
from flwr.common import ndarrays_to_parameters, parameters_to_ndarrays

# Reutilizamos utilidades del proyecto
sys.path.append(str(Path('..').resolve()))
from utils.logging_utils import get_logger
from utils.data_utils import get_cifar10_dataloaders
from utils.train_utils import train_model, evaluate_model
from utils.model_utils import count_params
from utils.device_utils import get_available_devices as get_torch_devices, get_gpu_name_and_driver, get_os_string
from utils.ov_utils import get_available_devices as get_ov_devices
from utils.infer_openvino import benchmark_numpy

logger = get_logger('nb03')

# Archivo CSV de salida
METRICS_CSV = str(Path('..').resolve().joinpath('metrics', '03_flower_rounds.csv'))
os.makedirs(Path(METRICS_CSV).parent, exist_ok=True)

# Semillas
SEED = 42
np.random.seed(SEED)

def lib_versions():
    vers = {
        'torch': getattr(torch, '__version__', 'N/A'),
        'flwr': getattr(fl, '__version__', 'N/A'),
    }
    try:
        import onnxruntime as ort
        vers['onnxruntime'] = getattr(ort, '__version__', 'N/A')
    except Exception:
        vers['onnxruntime'] = 'N/A'
    try:
        import openvino as ov
        vers['openvino'] = getattr(ov, '__version__', 'N/A')
    except Exception:
        vers['openvino'] = 'N/A'
    return vers

VERS = lib_versions()
print('Library versions:', VERS)

# Parámetros base del experimento
CFG = {
    'dataset': 'cifar10',
    'num_classes': 10,
    'model_name': 'cnn',
    'rounds': 3,
    'local_epochs': 1,
    'batch_size': 128,
    'num_clients': 4,
    'num_workers': 2,
    'cpu_threads_per_client': 2,
}

# Escenarios
SCENARIOS = {
    'A_cpu_only': {
        'desc': 'Todos en CPU (PyTorch).',
        'cuda_clients_per_round': 0,
        'postfit_ov': False,
        'partition': 'iid',
    },
    'B_cpu_cuda': {
        'desc': '1 cliente en CUDA, resto CPU. NVML energía solo CUDA.',
        'cuda_clients_per_round': 1,
        'postfit_ov': False,
        'partition': 'iid',
    },
    'C_cpu_cuda_postfit_ov': {
        'desc': 'Como B + post-fit inferencia OV (CPU/GPU/NPU).',
        'cuda_clients_per_round': 1,
        'postfit_ov': True,
        'partition': 'iid',
    },
    'D_noniid_dirichlet': {
        'desc': 'No-IID Dirichlet α=0.1 + B.',
        'cuda_clients_per_round': 1,
        'postfit_ov': False,
        'partition': 'dirichlet:0.1',
    }
}

# Mapa de dispositivos por cliente (estático)
# Reservamos el primer cliente como CUDA si disponible en escenarios B/C/D
TORCH_DEVS = get_torch_devices('pytorch')
HAS_CUDA = any(d['type']=='GPU' for d in TORCH_DEVS)

def client_device_map(num_clients: int, scenario_key: str):
    mapping = {}
    cuda_assigned = 0
    for cid in range(num_clients):
        if SCENARIOS[scenario_key]['cuda_clients_per_round']>0 and HAS_CUDA and cuda_assigned < 1 and cid==0:
            mapping[str(cid)] = 'cuda'
            cuda_assigned += 1
        else:
            mapping[str(cid)] = 'cpu'
    return mapping

# Helper: cabecera CSV (columnas mínimas + extras útiles)
CSV_COLUMNS = [
    'ts', 'round', 'cid', 'role', 'scenario', 'device_tag',
    't_train_s', 'energy_train_j', 'bytes_up', 'bytes_down', 'params_bytes', 'n_train',
    't_agg_s', 'acc_local', 'loss_local',
    'ov_cpu_lat_ms', 'ov_cpu_thr_ips',
    'ov_gpu_lat_ms', 'ov_gpu_thr_ips',
    'ov_npu_lat_ms', 'ov_npu_thr_ips',
    'torch_ver', 'onnxruntime_ver', 'openvino_ver', 'flwr_ver', 'os', 'gpu_name', 'gpu_driver'
]
if not os.path.exists(METRICS_CSV):
    pd.DataFrame(columns=CSV_COLUMNS).to_csv(METRICS_CSV, index=False)

# Dispositivos OV para post-fit
OV_DEVS = get_ov_devices()
OV_HAS_CPU = any('CPU' in d for d in OV_DEVS)
OV_HAS_GPU = any('GPU' in d for d in OV_DEVS)
OV_HAS_NPU = any('NPU' in d for d in OV_DEVS)
print('OpenVINO devices:', OV_DEVS)

gpu_name, gpu_driver = get_gpu_name_and_driver()
os_str = get_os_string()


Library versions: {'torch': '2.8.0+cu129', 'flwr': '1.20.0', 'onnxruntime': '1.22.0', 'openvino': '2025.2.0-19140-c01cd93e24d-releases/2025/2'}
OpenVINO devices: ['CPU', 'GPU', 'NPU']


In [2]:
# Carga de datos y particionado (IID y Dirichlet)
from torch.utils.data import Subset


def load_data(cfg):
    train_loader, test_loader = get_cifar10_dataloaders(
        batch_size=cfg['batch_size'], num_workers=cfg['num_workers'], data_augmentation=True, download=False,
    )
    # Extraemos dataset subyacente para particionar índices
    train_ds = train_loader.dataset
    test_ds = test_loader.dataset
    return train_ds, test_ds


def partition_indices_iid(n_samples: int, num_clients: int, seed: int = 0):
    rng = np.random.default_rng(seed)
    idx = np.arange(n_samples)
    rng.shuffle(idx)
    parts = np.array_split(idx, num_clients)
    return [p.tolist() for p in parts]


def partition_indices_dirichlet(y: np.ndarray, num_clients: int, alpha: float = 0.1, seed: int = 0):
    rng = np.random.default_rng(seed)
    K = y.max() + 1
    idx_per_class = [np.where(y == k)[0] for k in range(K)]
    for arr in idx_per_class:
        rng.shuffle(arr)
    quotas = rng.dirichlet([alpha] * num_clients, K)  # K x C
    client_indices = [[] for _ in range(num_clients)]
    for k in range(K):
        n_k = len(idx_per_class[k])
        sizes = (quotas[k] / quotas[k].sum() * n_k).astype(int)
        # Ajuste para asegurar cubrir todos los elementos
        while sizes.sum() < n_k:
            sizes[rng.integers(0, num_clients)] += 1
        splits = np.split(idx_per_class[k], np.cumsum(sizes)[:-1])
        for c in range(num_clients):
            client_indices[c].extend(splits[c].tolist())
    return client_indices


def make_client_loaders(train_ds, client_parts, batch_size: int, num_workers: int):
    from torch.utils.data import DataLoader
    loaders = {}
    for cid, idxs in enumerate(client_parts):
        loaders[str(cid)] = DataLoader(Subset(train_ds, idxs), batch_size=batch_size, shuffle=True, num_workers=num_workers)
    return loaders


In [3]:
# Cliente Flower (NumPyClient) con PyTorch y métricas
from typing import Dict, List, Tuple
from flwr.client import NumPyClient
from flwr.common import Code, Status
from collections import OrderedDict

from models import make_model
from torch import nn, optim
from torch.utils.data import DataLoader

from utils.energy import GpuEnergyMeterNVML


def get_model_and_opt(name: str, num_classes: int, device: torch.device):
    model = make_model(name, num_classes)
    model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    return model, optimizer


def _ndarrays_size_bytes(params: List[np.ndarray]) -> int:
    return int(sum(p.nbytes for p in params))


class CifarClient(NumPyClient):
    def __init__(self, cid: str, train_loader: DataLoader, test_loader: DataLoader, cfg: dict,
                 device_tag: str, postfit_ov: bool, ov_ir_dir: str | None = None):
        self.cid = cid
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.cfg = cfg
        self.device_tag = device_tag  # 'cpu' or 'cuda'
        self.postfit_ov = postfit_ov
        self.ov_ir_dir = ov_ir_dir
        self.model, self.optimizer = get_model_and_opt(cfg['model_name'], cfg['num_classes'], self._torch_device())
        self.criterion = nn.CrossEntropyLoss()
        self.params_bytes = count_params(self.model) * 4  # float32
        self.last_round = 0
        # Energy meter only for CUDA
        self.meter = GpuEnergyMeterNVML(0) if (self.device_tag == 'cuda') else None
        torch.set_num_threads(int(cfg.get('cpu_threads_per_client', 2)))

    def _torch_device(self):
        if self.device_tag == 'cuda' and torch.cuda.is_available():
            return torch.device('cuda:0')
        return torch.device('cpu')

    # Flower API
    def get_parameters(self, config):
        params = [v.cpu().detach().numpy() for _, v in self.model.state_dict().items()]
        return params

    def set_parameters(self, parameters: List[np.ndarray]):
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in zip(self.model.state_dict().keys(), parameters)})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        t0 = time.perf_counter()
        if parameters is not None:
            self.set_parameters(parameters)
        # Local train
        device = self._torch_device()
        self.model.to(device)
        measure_energy = bool(self.device_tag == 'cuda')
        hist = train_model(
            self.model, self.optimizer, self.train_loader, device,
            criterion=self.criterion, num_epochs=self.cfg['local_epochs'], verbose=False,
            measure_energy=measure_energy,
        )
        t_train = float(hist.get('epoch_time', [0.0])[-1])
        energy = float(hist.get('epoch_energy_j', [-1.0])[-1]) if measure_energy else -1.0
        # Payload sizes
        params_after = [v.cpu().detach().numpy() for _, v in self.model.state_dict().items()]
        bytes_up = _ndarrays_size_bytes(params_after)
        bytes_down = _ndarrays_size_bytes(parameters) if parameters is not None else 0
        # Acc/Loss local
        eval_res = evaluate_model(self.model, self.test_loader, device, criterion=self.criterion, verbose=False)
        acc_local = float(eval_res['accuracy'])
        loss_local = float(eval_res['loss'])
        # OpenVINO post-fit benchmark (lat/throughput). Energía N/D
        ov_cpu_lat = ov_cpu_thr = ov_gpu_lat = ov_gpu_thr = ov_npu_lat = ov_npu_thr = 'N/D'
        if self.postfit_ov and self.ov_ir_dir is not None:
            try:
                x_dummy = np.random.randn(32, 3, 32, 32).astype('float32')
                ir_cpu = os.path.join(self.ov_ir_dir, 'cnn_cifar10.xml')  # IR existente
                if OV_HAS_CPU and os.path.exists(ir_cpu):
                    m = benchmark_numpy(ir_cpu, x_dummy, device='CPU', runs=20)
                    ov_cpu_lat, ov_cpu_thr = round(m['lat_ms_mean'], 3), round(m['thr_ips'], 2)
                if OV_HAS_GPU and os.path.exists(ir_cpu):
                    m = benchmark_numpy(ir_cpu, x_dummy, device='GPU', runs=20)
                    ov_gpu_lat, ov_gpu_thr = round(m['lat_ms_mean'], 3), round(m['thr_ips'], 2)
                if OV_HAS_NPU and os.path.exists(ir_cpu):
                    x1 = np.random.randn(1, 3, 32, 32).astype('float32')
                    m = benchmark_numpy(ir_cpu, x1, device='NPU', runs=50)
                    ov_npu_lat, ov_npu_thr = round(m['lat_ms_mean'], 3), round(m['thr_ips'], 2)
            except Exception as ex:
                logger.warning('OV post-fit failed: %s', ex)
        # Registro parcial (el servidor añade t_agg)
        rec = {
            'ts': dt.datetime.now().isoformat(timespec='seconds'),
            'round': int(config.get('current_round', 0)),
            'cid': self.cid,
            'role': 'client',
            'scenario': str(config.get('scenario', 'NA')),
            'device_tag': self.device_tag,
            't_train_s': round(t_train, 4),
            'energy_train_j': round(energy, 6) if energy >= 0 else 'N/D',
            'bytes_up': int(bytes_up),
            'bytes_down': int(bytes_down),
            'params_bytes': int(self.params_bytes),
            'n_train': int(len(self.train_loader.dataset)),
            't_agg_s': 'N/D',
            'acc_local': round(acc_local, 4),
            'loss_local': round(loss_local, 4),
            'ov_cpu_lat_ms': ov_cpu_lat,
            'ov_cpu_thr_ips': ov_cpu_thr,
            'ov_gpu_lat_ms': ov_gpu_lat,
            'ov_gpu_thr_ips': ov_gpu_thr,
            'ov_npu_lat_ms': ov_npu_lat,
            'ov_npu_thr_ips': ov_npu_thr,
            'torch_ver': VERS['torch'],
            'onnxruntime_ver': VERS['onnxruntime'],
            'openvino_ver': VERS['openvino'],
            'flwr_ver': VERS['flwr'],
            'os': os_str,
            'gpu_name': gpu_name,
            'gpu_driver': gpu_driver,
        }
        df = pd.DataFrame([rec])
        df.to_csv(METRICS_CSV, mode='a', header=False, index=False)
        return params_after, len(self.train_loader.dataset), {}

    def evaluate(self, parameters, config):
        if parameters is not None:
            self.set_parameters(parameters)
        device = self._torch_device()
        res = evaluate_model(self.model, self.test_loader, device, criterion=self.criterion, verbose=False)
        return float(res['loss']), len(self.test_loader.dataset), {"accuracy": float(res['accuracy'])}


In [4]:
# Estrategia: FedAvg con medición de tiempo de agregación y anotación CSV
from flwr.server.strategy import FedAvg
from flwr.common import FitRes

class TimedFedAvg(FedAvg):
    def __init__(self, scenario_key: str, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.scenario_key = scenario_key
        self._round = 0

    def aggregate_fit(self, rnd: int, results: List[Tuple], failures: List):
        t0 = time.perf_counter()
        agg = super().aggregate_fit(rnd, results, failures)
        t_agg = time.perf_counter() - t0
        self._round = rnd
        # Post-proceso: insertar t_agg en filas recientes de este round para el escenario
        try:
            df = pd.read_csv(METRICS_CSV)
            mask = (df['round'] == rnd) & (df['scenario'] == self.scenario_key) & (df['t_agg_s'].astype(str) == 'N/D')
            df.loc[mask, 't_agg_s'] = round(float(t_agg), 6)
            df.to_csv(METRICS_CSV, index=False)
        except Exception as ex:
            logger.warning('CSV agg annotate failed: %s', ex)
        return agg


In [5]:
# Ejecución de escenarios con start_simulation/run_simulation (API compatible)
# Preferimos la API legacy start_simulation (acepta client_fn); si no está, usamos run_simulation equivalente
try:
    from flwr.simulation import start_simulation as _run_sim
except Exception:
    from flwr.simulation import run_simulation as _run_sim

from flwr.server import ServerConfig
from copy import deepcopy
from collections import OrderedDict as _OD

# Para serializar uso de GPU: sólo un cliente CUDA total, el resto CPU. El mapeo se respetará creando clientes fijos.

def make_client_fn(train_loaders: Dict[str, DataLoader], test_loader: DataLoader, cfg: dict,
                   scen_key: str, ov_ir_dir: str | None):
    mapping = client_device_map(cfg['num_clients'], scen_key)

    def client_fn(cid: str):
        dev_tag = mapping.get(cid, 'cpu')
        return CifarClient(cid, train_loaders[cid], test_loader, cfg, dev_tag, SCENARIOS[scen_key]['postfit_ov'], ov_ir_dir)

    return client_fn


def build_partitions(train_ds, cfg, partition: str):
    num_clients = cfg['num_clients']
    if partition == 'iid':
        parts = partition_indices_iid(len(train_ds), num_clients, seed=SEED)
    elif partition.startswith('dirichlet:'):
        alpha = float(partition.split(':',1)[1])
        y = np.array(train_ds.targets)
        parts = partition_indices_dirichlet(y, num_clients, alpha=alpha, seed=SEED)
    else:
        raise ValueError('Unknown partition')
    return parts

# Definimos siempre la evaluación central aquí para evitar NameError

def make_evaluate_fn(test_loader: torch.utils.data.DataLoader, cfg: dict):
    base_model = make_model(cfg['model_name'], cfg['num_classes'])
    def evaluate(server_round: int, parameters: fl.common.NDArrays, config):
        model = deepcopy(base_model)
        state_dict = _OD({k: torch.tensor(v) for k, v in zip(model.state_dict().keys(), parameters)})
        model.load_state_dict(state_dict, strict=True)
        device = torch.device('cpu')
        res = evaluate_model(model, test_loader, device, verbose=False)
        rec = {
            'ts': dt.datetime.now().isoformat(timespec='seconds'),
            'round': int(server_round),
            'cid': 'server',
            'role': 'server',
            'scenario': str(config.get('scenario', 'NA')) if isinstance(config, dict) else 'NA',
            'device_tag': 'cpu',
            't_train_s': 'N/D',
            'energy_train_j': 'N/D',
            'bytes_up': 'N/D',
            'bytes_down': 'N/D',
            'params_bytes': count_params(model) * 4,
            'n_train': 'N/D',
            't_agg_s': 'N/D',
            'acc_local': round(float(res['accuracy']), 4),
            'loss_local': round(float(res['loss']), 4),
            'ov_cpu_lat_ms': 'N/D', 'ov_cpu_thr_ips': 'N/D',
            'ov_gpu_lat_ms': 'N/D', 'ov_gpu_thr_ips': 'N/D',
            'ov_npu_lat_ms': 'N/D', 'ov_npu_thr_ips': 'N/D',
            'torch_ver': VERS['torch'], 'onnxruntime_ver': VERS['onnxruntime'], 'openvino_ver': VERS['openvino'], 'flwr_ver': VERS['flwr'],
            'os': os_str, 'gpu_name': gpu_name, 'gpu_driver': gpu_driver,
        }
        try:
            pd.DataFrame([rec]).to_csv(METRICS_CSV, mode='a', header=False, index=False)
        except Exception as ex:
            logger.warning('Server CSV append failed: %s', ex)
        return float(res['loss']), {"accuracy": float(res['accuracy'])}
    return evaluate


def run_scenario(scen_key: str, cfg: dict):
    print(f'\n=== Escenario {scen_key}: {SCENARIOS[scen_key]["desc"]} ===')
    train_ds, test_ds = load_data(cfg)
    parts = build_partitions(train_ds, cfg, SCENARIOS[scen_key]['partition'])
    train_loaders = make_client_loaders(train_ds, parts, cfg['batch_size'], cfg['num_workers'])
    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=cfg['batch_size'], shuffle=False, num_workers=cfg['num_workers'])
    ov_ir_dir = str(Path('..').resolve().joinpath('models_saved', 'openvino_ir'))

    # Estrategia con evaluate_fn y callbacks de config por ronda
    strategy = TimedFedAvg(
        scenario_key=scen_key,
        fraction_fit=1.0,
        fraction_evaluate=0.0,
        min_fit_clients=cfg['num_clients'],
        min_available_clients=cfg['num_clients'],
        evaluate_fn=make_evaluate_fn(test_loader, cfg),
        on_fit_config_fn=lambda rnd: {'current_round': rnd, 'scenario': scen_key},
        on_evaluate_config_fn=lambda rnd: {'current_round': rnd, 'scenario': scen_key},
    )

    # Configuración de simulación (legacy ServerConfig)
    sim_cfg = ServerConfig(num_rounds=cfg['rounds'])

    # Limitar threads BLAS
    os.environ['OMP_NUM_THREADS'] = str(cfg['cpu_threads_per_client'])
    os.environ['MKL_NUM_THREADS'] = str(cfg['cpu_threads_per_client'])

    # Ejecutar
    hist = _run_sim(
        client_fn=make_client_fn(train_loaders, test_loader, cfg, scen_key, ov_ir_dir),
        num_clients=cfg['num_clients'],
        config=sim_cfg,
        strategy=strategy,
        client_resources={'num_cpus': cfg['cpu_threads_per_client'], 'num_gpus': 0.0},
    )
    return hist

for key in ['A_cpu_only', 'B_cpu_cuda', 'C_cpu_cuda_postfit_ov', 'D_noniid_dirichlet']:
    run_scenario(key, CFG)

print('Listo. CSV generado en:', METRICS_CSV)


INFO:utils.data_utils:Dataset CIFAR-10: using cached data at C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\data



=== Escenario A_cpu_only: Todos en CPU (PyTorch). ===


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
2025-08-15 15:39:22,561	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initiali


=== Escenario B_cpu_cuda: 1 cliente en CUDA, resto CPU. NVML energía solo CUDA. ===


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
[36m(ClientAppActor pid=43296)[0m 2025-08-15 15:42:10 | INFO | train_utils | Eval | Loss: 1.5286 | Acc: 0.4463 | Time: 16.4s | Avg/sample: 1.64ms[32m [repeated 3x across cluster][0m
2025-08-15 15:42:22,218	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 22.0, 'memory': 8067133440.0, 'node:127.0.0.1': 1.0, 'object_store_memory': 4033566720.0, 'GPU': 1.0}
[92mINFO [0m:      


=== Escenario C_cpu_cuda_postfit_ov: Como B + post-fit inferencia OV (CPU/GPU/NPU). ===


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
[36m(ClientAppActor pid=37820)[0m 2025-08-15 15:45:01 | INFO | train_utils | Eval | Loss: 1.5025 | Acc: 0.4463 | Time: 13.8s | Avg/sample: 1.38ms
2025-08-15 15:45:12,705	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'accelerator_type:G': 1.0, 'node:__internal_head__': 1.0, 'CPU': 22.0, 'object_store_memory': 4031931187.0, 'node:127.0.0.1': 1.0, 'memory': 8063862375.0, 'GPU': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower V


=== Escenario D_noniid_dirichlet: No-IID Dirichlet α=0.1 + B. ===


	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
[36m(ClientAppActor pid=16112)[0m 2025-08-15 15:48:01 | INFO | infer_openvino | OV metrics: {'lat_ms_mean': 0.6697720009833574, 'lat_ms_p95': 0.7501549887820147, 'thr_ips': 1493.0453923600908, 'acc': 0.0}[32m [repeated 6x across cluster][0m
[36m(ClientAppActor pid=16112)[0m 2025-08-15 15:48:01 | INFO | infer_openvino | Reading IR: C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\models_saved\openvino_ir\cnn_cifar10.xml[32m [repeated 6x across cluster][0m
[36m(ClientAppActor

Listo. CSV generado en: C:\Users\padul\OneDrive\Universidad\Doctorado\Desarrollo\federated-lab-multihw\metrics\03_flower_rounds.csv


In [6]:
# Vista previa del CSV (si existe)
import pandas as _pd
if os.path.exists(METRICS_CSV):
    display(_pd.read_csv(METRICS_CSV).head())

Unnamed: 0,ts,round,cid,role,scenario,device_tag,t_train_s,energy_train_j,bytes_up,bytes_down,...,ov_gpu_thr_ips,ov_npu_lat_ms,ov_npu_thr_ips,torch_ver,onnxruntime_ver,openvino_ver,flwr_ver,os,gpu_name,gpu_driver
0,2025-08-15T15:39:43,0,server,server,,cpu,N/D,N/D,N/D,N/D,...,N/D,N/D,N/D,2.8.0+cu129,1.22.0,2025.2.0-19140-c01cd93e24d-releases/2025/2,1.20.0,Windows-10-10.0.26100-SP0,NVIDIA GeForce RTX 4080 Laptop GPU,580.97
1,2025-08-15T15:40:22,1,3,client,A_cpu_only,cpu,20.7569,N/D,378312,378312,...,N/D,N/D,N/D,2.8.0+cu129,1.22.0,2025.2.0-19140-c01cd93e24d-releases/2025/2,1.20.0,Windows-10-10.0.26100-SP0,NVIDIA GeForce RTX 4080 Laptop GPU,580.97
2,2025-08-15T15:40:28,1,0,client,A_cpu_only,cpu,23.585,N/D,378312,378312,...,N/D,N/D,N/D,2.8.0+cu129,1.22.0,2025.2.0-19140-c01cd93e24d-releases/2025/2,1.20.0,Windows-10-10.0.26100-SP0,NVIDIA GeForce RTX 4080 Laptop GPU,580.97
3,2025-08-15T15:40:29,1,2,client,A_cpu_only,cpu,24.0552,N/D,378312,378312,...,N/D,N/D,N/D,2.8.0+cu129,1.22.0,2025.2.0-19140-c01cd93e24d-releases/2025/2,1.20.0,Windows-10-10.0.26100-SP0,NVIDIA GeForce RTX 4080 Laptop GPU,580.97
4,2025-08-15T15:40:30,1,1,client,A_cpu_only,cpu,24.3428,N/D,378312,378312,...,N/D,N/D,N/D,2.8.0+cu129,1.22.0,2025.2.0-19140-c01cd93e24d-releases/2025/2,1.20.0,Windows-10-10.0.26100-SP0,NVIDIA GeForce RTX 4080 Laptop GPU,580.97
