In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
import pandas as pd
from collections import defaultdict
import os
import torch
import torch.optim as optim
from tqdm import tqdm


from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights


In [22]:
from AggregationStrategy import sync_aggregate,average_weights,sync_aggregate_norm,sync_aggregate_softmax, fedavgm_update

In [23]:
# def average_weights(weights_list):
#     """Averages model weights provided as a list of get_weights outputs."""
#     avg_weights = []
#     num_models = len(weights_list)
#     for layer_weights in zip(*weights_list):
#         avg_layer = np.mean(np.array(layer_weights), axis=0)
#         avg_weights.append(avg_layer)
#     return avg_weights

In [24]:
df = pd.read_feather("train_final.feather")

In [25]:
df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,primary_use,air_temperature
7593144,0,0,2016-05-21 01:00:00,72.221012,Education,25.6
7593145,1,0,2016-05-21 01:00:00,39.611586,Education,25.6
7593146,2,0,2016-05-21 01:00:00,1.920567,Education,25.6
7593147,3,0,2016-05-21 01:00:00,111.532464,Education,25.6
7593148,4,0,2016-05-21 01:00:00,456.734799,Education,25.6


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11712248 entries, 7593144 to 20216099
Data columns (total 6 columns):
 #   Column           Dtype         
---  ------           -----         
 0   building_id      int64         
 1   meter            int64         
 2   timestamp        datetime64[ns]
 3   meter_reading    float64       
 4   primary_use      object        
 5   air_temperature  float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(1)
memory usage: 625.5+ MB


In [27]:

def cluster_buildings_top3_primary_use(df: pd.DataFrame) -> dict:
    """
    Clusters building_ids by top 3 primary_use categories.
    Remaining categories are grouped into 'Other'.

    Args:
        df (pd.DataFrame): Input DataFrame with 'building_id' and 'primary_use'.

    Returns:
        dict: {'cluster_0': [...], 'cluster_1': [...], 'cluster_2': [...], 'other': [...]}
    """
    # Step 1: Get top 3 most common primary_use values
    top3_uses = df['primary_use'].value_counts().nlargest(3).index.tolist()

    # Step 2: Create a mapping of cluster name -> list of building_ids
    clusters = defaultdict(list)

    # Drop duplicate building_id-primary_use pairs to avoid counting duplicates
    unique_buildings = df[['building_id', 'primary_use']].drop_duplicates()

    for _, row in unique_buildings.iterrows():
        bldg_id = row['building_id']
        use = row['primary_use']
        if use == top3_uses[0]:
            clusters['cluster_0'].append(bldg_id)
        elif use == top3_uses[1]:
            clusters['cluster_1'].append(bldg_id)
        elif use == top3_uses[2]:
            clusters['cluster_2'].append(bldg_id)
        else:
            clusters['other'].append(bldg_id)

    return dict(clusters)


In [28]:
top3_uses = df['primary_use'].value_counts().nlargest(3).index.tolist()

In [29]:
top3_uses

['Education', 'Office', 'Entertainment/public assembly']

In [30]:
clusters = cluster_buildings_top3_primary_use(df)

for name, ids in clusters.items():
    print(f"{name}: {len(ids)} buildings {ids[:5]}...")


cluster_0: 537 buildings [0, 1, 2, 3, 4]...
other: 428 buildings [6, 12, 27, 33, 34]...
cluster_1: 269 buildings [9, 15, 17, 19, 21]...
cluster_2: 179 buildings [10, 59, 87, 88, 40]...


In [31]:


# Config
# List of models to experiment with
MODEL_NAMES = ["lstm", "gru", "moe_lstm", "moe_gru"]

# Config
NUM_CLIENTS = 1410
CLIENT_FRAC = 0.15
NUM_ROUNDS = 50
LOCAL_EPOCHS = 5
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_FILE ="train_final.feather" # "meter_0_data_cleaned.feather"


### Naive Model :
Returns values by lag-24 hr (Acts as Lower Bound)

### Global Models
All data, no Fedeerated Aggregation

In [15]:
for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)
    global_model = model_fn(model_name).to(DEVICE)

    sampled_clients = list(range(NUM_CLIENTS))
    

    for cid in tqdm(sampled_clients, desc="Training clients"):
        
        train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

        updated_weights, fin_loss = train_model(
            global_model, train_loader,
            device=DEVICE,
            learning_rate=LR,
            loss_fn=None,
            optimizer_class=optim.Adam,
            epochs=LOCAL_EPOCHS
        )
    checkpoint_path = os.path.join(model_dir, f"{model_name}_global_model.pt")
    torch.save(global_model.state_dict(), checkpoint_path)
    print(f"Saved global model to {checkpoint_path}")

Starting experiment with model: lstm


Training clients: 100%|██████████| 1410/1410 [10:34<00:00,  2.22it/s]


Saved global model to results/lstm/lstm_global_model.pt
Starting experiment with model: gru


Training clients: 100%|██████████| 1410/1410 [10:28<00:00,  2.24it/s]


Saved global model to results/gru/gru_global_model.pt
Starting experiment with model: moe_lstm


Training clients: 100%|██████████| 1410/1410 [12:42<00:00,  1.85it/s]


Saved global model to results/moe_lstm/moe_lstm_global_model.pt
Starting experiment with model: moe_gru


Training clients: 100%|██████████| 1410/1410 [12:38<00:00,  1.86it/s]

Saved global model to results/moe_gru/moe_gru_global_model.pt





## Federated Learning Without Clustering

### FedAvg

In [None]:

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, fin_loss = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

        # Federated averaging
        global_weights = average_weights(local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedAvg_lr.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: lstm
Round 1/50


Training clients:  24%|██▎       | 50/211 [00:22<01:12,  2.22it/s]

### FedAvg-diff

In [32]:
import numpy as np

# Initialize difficulty scores and sampling probabilities
difficulty_scores = np.ones(NUM_CLIENTS)
sampling_probs = np.ones(NUM_CLIENTS) / NUM_CLIENTS  # Start uniform

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        
        # Sample clients using difficulty-aware probability
        sampled_clients = np.random.choice(
            NUM_CLIENTS,
            size=int(CLIENT_FRAC * NUM_CLIENTS),
            replace=False,
            p=sampling_probs
        ).tolist()

        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, loss_history = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

            # === Update difficulty score ===
            final_loss = loss_history[-1]  # Or use a more complex difficulty tracker
            # Optionally apply EMA smoothing
            alpha = 0.9
            difficulty_scores[cid] = alpha * difficulty_scores[cid] + (1 - alpha) * final_loss

        # Update sampling probabilities: lower prob for higher difficulty
        inv_difficulty = 1.0 / (np.array(difficulty_scores) + 1e-8)
        sampling_probs = inv_difficulty / inv_difficulty.sum()

        # Federated averaging
        global_weights = average_weights(local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedAvg_diffsample.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: lstm
Round 1/50


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.22it/s]


Saved global model to results/lstm/lstm_round_1_fedAvg_diffsample.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.20it/s]


Saved global model to results/lstm/lstm_round_2_fedAvg_diffsample.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.20it/s]


Saved global model to results/lstm/lstm_round_3_fedAvg_diffsample.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.19it/s]


Saved global model to results/lstm/lstm_round_4_fedAvg_diffsample.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/lstm/lstm_round_5_fedAvg_diffsample.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_6_fedAvg_diffsample.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_7_fedAvg_diffsample.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_8_fedAvg_diffsample.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_9_fedAvg_diffsample.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_10_fedAvg_diffsample.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_11_fedAvg_diffsample.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_12_fedAvg_diffsample.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_13_fedAvg_diffsample.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_14_fedAvg_diffsample.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_15_fedAvg_diffsample.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_16_fedAvg_diffsample.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_17_fedAvg_diffsample.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_18_fedAvg_diffsample.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_19_fedAvg_diffsample.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_20_fedAvg_diffsample.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_21_fedAvg_diffsample.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_22_fedAvg_diffsample.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  1.99it/s]


Saved global model to results/lstm/lstm_round_23_fedAvg_diffsample.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_24_fedAvg_diffsample.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_25_fedAvg_diffsample.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_26_fedAvg_diffsample.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_27_fedAvg_diffsample.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_28_fedAvg_diffsample.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_29_fedAvg_diffsample.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_30_fedAvg_diffsample.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/lstm/lstm_round_31_fedAvg_diffsample.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_32_fedAvg_diffsample.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_33_fedAvg_diffsample.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_34_fedAvg_diffsample.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_35_fedAvg_diffsample.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_36_fedAvg_diffsample.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_37_fedAvg_diffsample.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/lstm/lstm_round_38_fedAvg_diffsample.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_39_fedAvg_diffsample.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_40_fedAvg_diffsample.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_41_fedAvg_diffsample.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_42_fedAvg_diffsample.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_43_fedAvg_diffsample.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_44_fedAvg_diffsample.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_45_fedAvg_diffsample.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_46_fedAvg_diffsample.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_47_fedAvg_diffsample.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_48_fedAvg_diffsample.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_49_fedAvg_diffsample.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/lstm/lstm_round_50_fedAvg_diffsample.pt
Starting experiment with model: gru
Round 1/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_1_fedAvg_diffsample.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_2_fedAvg_diffsample.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_3_fedAvg_diffsample.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_4_fedAvg_diffsample.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_5_fedAvg_diffsample.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/gru/gru_round_6_fedAvg_diffsample.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_7_fedAvg_diffsample.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_8_fedAvg_diffsample.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_9_fedAvg_diffsample.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_10_fedAvg_diffsample.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_11_fedAvg_diffsample.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/gru/gru_round_12_fedAvg_diffsample.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_13_fedAvg_diffsample.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_14_fedAvg_diffsample.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_15_fedAvg_diffsample.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_16_fedAvg_diffsample.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/gru/gru_round_17_fedAvg_diffsample.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_18_fedAvg_diffsample.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_19_fedAvg_diffsample.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/gru/gru_round_20_fedAvg_diffsample.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_21_fedAvg_diffsample.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_22_fedAvg_diffsample.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [01:46<00:00,  1.98it/s]


Saved global model to results/gru/gru_round_23_fedAvg_diffsample.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/gru/gru_round_24_fedAvg_diffsample.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_25_fedAvg_diffsample.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [01:46<00:00,  1.99it/s]


Saved global model to results/gru/gru_round_26_fedAvg_diffsample.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/gru/gru_round_27_fedAvg_diffsample.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_28_fedAvg_diffsample.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/gru/gru_round_29_fedAvg_diffsample.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_30_fedAvg_diffsample.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_31_fedAvg_diffsample.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_32_fedAvg_diffsample.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_33_fedAvg_diffsample.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_34_fedAvg_diffsample.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/gru/gru_round_35_fedAvg_diffsample.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.03it/s]


Saved global model to results/gru/gru_round_36_fedAvg_diffsample.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  1.99it/s]


Saved global model to results/gru/gru_round_37_fedAvg_diffsample.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_38_fedAvg_diffsample.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_39_fedAvg_diffsample.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.01it/s]


Saved global model to results/gru/gru_round_40_fedAvg_diffsample.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [01:46<00:00,  1.97it/s]


Saved global model to results/gru/gru_round_41_fedAvg_diffsample.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [01:49<00:00,  1.93it/s]


Saved global model to results/gru/gru_round_42_fedAvg_diffsample.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [01:49<00:00,  1.93it/s]


Saved global model to results/gru/gru_round_43_fedAvg_diffsample.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.90it/s]


Saved global model to results/gru/gru_round_44_fedAvg_diffsample.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.91it/s]


Saved global model to results/gru/gru_round_45_fedAvg_diffsample.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [01:49<00:00,  1.92it/s]


Saved global model to results/gru/gru_round_46_fedAvg_diffsample.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.90it/s]


Saved global model to results/gru/gru_round_47_fedAvg_diffsample.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.92it/s]


Saved global model to results/gru/gru_round_48_fedAvg_diffsample.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.91it/s]


Saved global model to results/gru/gru_round_49_fedAvg_diffsample.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_50_fedAvg_diffsample.pt
Starting experiment with model: moe_lstm
Round 1/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_1_fedAvg_diffsample.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_2_fedAvg_diffsample.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_3_fedAvg_diffsample.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_4_fedAvg_diffsample.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [02:00<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_5_fedAvg_diffsample.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_6_fedAvg_diffsample.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [02:00<00:00,  1.75it/s]


Saved global model to results/moe_lstm/moe_lstm_round_7_fedAvg_diffsample.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_8_fedAvg_diffsample.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_9_fedAvg_diffsample.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_10_fedAvg_diffsample.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_11_fedAvg_diffsample.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_12_fedAvg_diffsample.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [02:00<00:00,  1.75it/s]


Saved global model to results/moe_lstm/moe_lstm_round_13_fedAvg_diffsample.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_14_fedAvg_diffsample.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_15_fedAvg_diffsample.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_lstm/moe_lstm_round_16_fedAvg_diffsample.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_17_fedAvg_diffsample.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.71it/s]


Saved global model to results/moe_lstm/moe_lstm_round_18_fedAvg_diffsample.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.72it/s]


Saved global model to results/moe_lstm/moe_lstm_round_19_fedAvg_diffsample.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_20_fedAvg_diffsample.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_21_fedAvg_diffsample.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.72it/s]


Saved global model to results/moe_lstm/moe_lstm_round_22_fedAvg_diffsample.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_23_fedAvg_diffsample.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.73it/s]


Saved global model to results/moe_lstm/moe_lstm_round_24_fedAvg_diffsample.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.70it/s]


Saved global model to results/moe_lstm/moe_lstm_round_25_fedAvg_diffsample.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_26_fedAvg_diffsample.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_27_fedAvg_diffsample.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_28_fedAvg_diffsample.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_29_fedAvg_diffsample.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.69it/s]


Saved global model to results/moe_lstm/moe_lstm_round_30_fedAvg_diffsample.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_31_fedAvg_diffsample.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_32_fedAvg_diffsample.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_33_fedAvg_diffsample.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_34_fedAvg_diffsample.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_35_fedAvg_diffsample.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_36_fedAvg_diffsample.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_37_fedAvg_diffsample.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_38_fedAvg_diffsample.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_39_fedAvg_diffsample.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_40_fedAvg_diffsample.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_41_fedAvg_diffsample.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_42_fedAvg_diffsample.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_43_fedAvg_diffsample.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_44_fedAvg_diffsample.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_lstm/moe_lstm_round_45_fedAvg_diffsample.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_46_fedAvg_diffsample.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_47_fedAvg_diffsample.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_lstm/moe_lstm_round_48_fedAvg_diffsample.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_49_fedAvg_diffsample.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_50_fedAvg_diffsample.pt
Starting experiment with model: moe_gru
Round 1/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_1_fedAvg_diffsample.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_2_fedAvg_diffsample.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_3_fedAvg_diffsample.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_4_fedAvg_diffsample.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_5_fedAvg_diffsample.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_6_fedAvg_diffsample.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_7_fedAvg_diffsample.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_8_fedAvg_diffsample.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_9_fedAvg_diffsample.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_10_fedAvg_diffsample.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_11_fedAvg_diffsample.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_12_fedAvg_diffsample.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_13_fedAvg_diffsample.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_14_fedAvg_diffsample.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_15_fedAvg_diffsample.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_gru/moe_gru_round_16_fedAvg_diffsample.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_17_fedAvg_diffsample.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_18_fedAvg_diffsample.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_19_fedAvg_diffsample.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_gru/moe_gru_round_20_fedAvg_diffsample.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_21_fedAvg_diffsample.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_22_fedAvg_diffsample.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_23_fedAvg_diffsample.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_24_fedAvg_diffsample.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_25_fedAvg_diffsample.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_26_fedAvg_diffsample.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_27_fedAvg_diffsample.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_28_fedAvg_diffsample.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_29_fedAvg_diffsample.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_30_fedAvg_diffsample.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_31_fedAvg_diffsample.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/moe_gru/moe_gru_round_32_fedAvg_diffsample.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/moe_gru/moe_gru_round_33_fedAvg_diffsample.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_34_fedAvg_diffsample.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_gru/moe_gru_round_35_fedAvg_diffsample.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_gru/moe_gru_round_36_fedAvg_diffsample.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_37_fedAvg_diffsample.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_38_fedAvg_diffsample.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_gru/moe_gru_round_39_fedAvg_diffsample.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/moe_gru/moe_gru_round_40_fedAvg_diffsample.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_41_fedAvg_diffsample.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_42_fedAvg_diffsample.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_gru/moe_gru_round_43_fedAvg_diffsample.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.72it/s]


Saved global model to results/moe_gru/moe_gru_round_44_fedAvg_diffsample.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_45_fedAvg_diffsample.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_46_fedAvg_diffsample.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_47_fedAvg_diffsample.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/moe_gru/moe_gru_round_48_fedAvg_diffsample.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/moe_gru/moe_gru_round_49_fedAvg_diffsample.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.70it/s]

Saved global model to results/moe_gru/moe_gru_round_50_fedAvg_diffsample.pt





### FedAvgM 

In [None]:

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    velocity = [np.zeros_like(w) for w in global_weights]

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, fin_loss = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

        # Federated averaging
        global_weights, velocity = fedavgm_update(global_weights,local_weights,velocity)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedAvgM.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: lstm
Round 1/10


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.21it/s]


Saved global model to results/lstm/lstm_round_1_fedAvgM.pt
Round 2/10


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.19it/s]


Saved global model to results/lstm/lstm_round_2_fedAvgM.pt
Round 3/10


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.20it/s]


Saved global model to results/lstm/lstm_round_3_fedAvgM.pt
Round 4/10


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.21it/s]


Saved global model to results/lstm/lstm_round_4_fedAvgM.pt
Round 5/10


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.18it/s]


Saved global model to results/lstm/lstm_round_5_fedAvgM.pt
Round 6/10


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.19it/s]


Saved global model to results/lstm/lstm_round_6_fedAvgM.pt
Round 7/10


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.20it/s]


Saved global model to results/lstm/lstm_round_7_fedAvgM.pt
Round 8/10


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.20it/s]


Saved global model to results/lstm/lstm_round_8_fedAvgM.pt
Round 9/10


Training clients: 100%|██████████| 211/211 [01:35<00:00,  2.22it/s]


Saved global model to results/lstm/lstm_round_9_fedAvgM.pt
Round 10/10


Training clients: 100%|██████████| 211/211 [01:36<00:00,  2.19it/s]


Saved global model to results/lstm/lstm_round_10_fedAvgM.pt
Starting experiment with model: gru
Round 1/10


Training clients: 100%|██████████| 211/211 [01:34<00:00,  2.22it/s]


Saved global model to results/gru/gru_round_1_fedAvgM.pt
Round 2/10


Training clients:  79%|███████▊  | 166/211 [01:14<00:21,  2.09it/s]

### FedProx

In [13]:
def train_model_fedprox(
    model,
    train_loader,
    global_weights,
    device,
    learning_rate,
    loss_fn,
    optimizer_class,
    epochs,
    mu=0.01
):
    model.train()
    model.to(device)
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    loss_fn = loss_fn or torch.nn.MSELoss()

    original_weights = [w.clone().detach().to(device) for w in model.parameters()]
    loss_history = []

    for _ in range(epochs):
        for batch in train_loader:
            optimizer.zero_grad()
            x, y = batch[0].to(device), batch[1].to(device)

            y_pred = model(x)
            loss = loss_fn(y_pred, y)

            # FedProx proximal term
            prox_term = 0.0
            for param, orig in zip(model.parameters(), global_weights):
                prox_term += ((param - orig.to(device)) ** 2).sum()
            prox_term *= (mu / 2)

            total_loss = loss + prox_term
            total_loss.backward()
            optimizer.step()

            loss_history.append(total_loss.item())

    return get_weights(model), loss_history


In [16]:
def train_model_fedprox(
    model,
    train_loader,
    global_weights,
    device=None,
    learning_rate=0.001,
    loss_fn=None,
    optimizer_class=optim.Adam,
    epochs=50,
    mu=0.01
):
    """Train model using FedProx loss (with proximal term)."""
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    loss_fn = loss_fn or nn.MSELoss()
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    loss_history = []

    # Clone global weights as tensors on the correct device
    # prox_ref = [torch.tensor(w, dtype=torch.float32, device=device) for w in global_weights]
    prox_ref = [w.detach().clone().to(device) for w in global_weights]

    model.train()
    for epoch in range(epochs):
        epoch_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            if y_batch.dim() == 3 and y_batch.shape[-1] == 1:
                y_batch = y_batch.squeeze(-1)

            optimizer.zero_grad()
            output = model(X_batch)
            task_loss = loss_fn(output, y_batch)

            # Compute proximal term
            prox_term = 0.0
            for p, w_ref in zip(model.parameters(), prox_ref):
                prox_term += torch.norm(p - w_ref) ** 2
            prox_term = (mu / 2) * prox_term

            total_loss = task_loss + prox_term
            total_loss.backward()
            optimizer.step()

            epoch_loss += total_loss.item()

        loss_history.append(epoch_loss / len(train_loader))

    return get_weights(model), loss_history

In [17]:
for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, loss_history = train_model_fedprox(
                model=local_model,
                train_loader=train_loader,
                global_weights=[torch.tensor(w).to(DEVICE) for w in global_weights],
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS,
                mu=0.01  # optional, default used if not passed
            )
            local_weights.append(updated_weights)

        # FedProx uses FedAvg-style aggregation
        global_weights = average_weights(local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedProx.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: lstm
Round 1/10


Training clients: 100%|██████████| 211/211 [02:46<00:00,  1.27it/s]


Saved global model to results/lstm/lstm_round_1_fedProx.pt
Round 2/10


Training clients: 100%|██████████| 211/211 [02:51<00:00,  1.23it/s]


Saved global model to results/lstm/lstm_round_2_fedProx.pt
Round 3/10


Training clients: 100%|██████████| 211/211 [02:52<00:00,  1.22it/s]


Saved global model to results/lstm/lstm_round_3_fedProx.pt
Round 4/10


Training clients: 100%|██████████| 211/211 [02:49<00:00,  1.24it/s]


Saved global model to results/lstm/lstm_round_4_fedProx.pt
Round 5/10


Training clients: 100%|██████████| 211/211 [02:48<00:00,  1.25it/s]


Saved global model to results/lstm/lstm_round_5_fedProx.pt
Round 6/10


Training clients: 100%|██████████| 211/211 [02:51<00:00,  1.23it/s]


Saved global model to results/lstm/lstm_round_6_fedProx.pt
Round 7/10


Training clients: 100%|██████████| 211/211 [02:52<00:00,  1.22it/s]


Saved global model to results/lstm/lstm_round_7_fedProx.pt
Round 8/10


Training clients: 100%|██████████| 211/211 [02:49<00:00,  1.25it/s]


Saved global model to results/lstm/lstm_round_8_fedProx.pt
Round 9/10


Training clients: 100%|██████████| 211/211 [02:49<00:00,  1.25it/s]


Saved global model to results/lstm/lstm_round_9_fedProx.pt
Round 10/10


Training clients: 100%|██████████| 211/211 [02:56<00:00,  1.19it/s]


Saved global model to results/lstm/lstm_round_10_fedProx.pt
Starting experiment with model: gru
Round 1/10


Training clients: 100%|██████████| 211/211 [02:52<00:00,  1.22it/s]


Saved global model to results/gru/gru_round_1_fedProx.pt
Round 2/10


Training clients: 100%|██████████| 211/211 [02:52<00:00,  1.22it/s]


Saved global model to results/gru/gru_round_2_fedProx.pt
Round 3/10


Training clients: 100%|██████████| 211/211 [02:52<00:00,  1.22it/s]


Saved global model to results/gru/gru_round_3_fedProx.pt
Round 4/10


Training clients: 100%|██████████| 211/211 [02:49<00:00,  1.24it/s]


Saved global model to results/gru/gru_round_4_fedProx.pt
Round 5/10


Training clients: 100%|██████████| 211/211 [02:53<00:00,  1.21it/s]


Saved global model to results/gru/gru_round_5_fedProx.pt
Round 6/10


Training clients: 100%|██████████| 211/211 [02:50<00:00,  1.24it/s]


Saved global model to results/gru/gru_round_6_fedProx.pt
Round 7/10


Training clients: 100%|██████████| 211/211 [02:54<00:00,  1.21it/s]


Saved global model to results/gru/gru_round_7_fedProx.pt
Round 8/10


Training clients: 100%|██████████| 211/211 [02:48<00:00,  1.25it/s]


Saved global model to results/gru/gru_round_8_fedProx.pt
Round 9/10


Training clients: 100%|██████████| 211/211 [02:50<00:00,  1.23it/s]


Saved global model to results/gru/gru_round_9_fedProx.pt
Round 10/10


Training clients: 100%|██████████| 211/211 [02:55<00:00,  1.20it/s]


Saved global model to results/gru/gru_round_10_fedProx.pt
Starting experiment with model: moe_lstm
Round 1/10


Training clients: 100%|██████████| 211/211 [03:32<00:00,  1.01s/it]


Saved global model to results/moe_lstm/moe_lstm_round_1_fedProx.pt
Round 2/10


Training clients: 100%|██████████| 211/211 [03:39<00:00,  1.04s/it]


Saved global model to results/moe_lstm/moe_lstm_round_2_fedProx.pt
Round 3/10


Training clients: 100%|██████████| 211/211 [03:34<00:00,  1.01s/it]


Saved global model to results/moe_lstm/moe_lstm_round_3_fedProx.pt
Round 4/10


Training clients: 100%|██████████| 211/211 [03:33<00:00,  1.01s/it]


Saved global model to results/moe_lstm/moe_lstm_round_4_fedProx.pt
Round 5/10


Training clients: 100%|██████████| 211/211 [03:36<00:00,  1.03s/it]


Saved global model to results/moe_lstm/moe_lstm_round_5_fedProx.pt
Round 6/10


Training clients: 100%|██████████| 211/211 [03:38<00:00,  1.04s/it]


Saved global model to results/moe_lstm/moe_lstm_round_6_fedProx.pt
Round 7/10


Training clients: 100%|██████████| 211/211 [03:32<00:00,  1.01s/it]


Saved global model to results/moe_lstm/moe_lstm_round_7_fedProx.pt
Round 8/10


Training clients: 100%|██████████| 211/211 [03:38<00:00,  1.04s/it]


Saved global model to results/moe_lstm/moe_lstm_round_8_fedProx.pt
Round 9/10


Training clients: 100%|██████████| 211/211 [03:32<00:00,  1.01s/it]


Saved global model to results/moe_lstm/moe_lstm_round_9_fedProx.pt
Round 10/10


Training clients: 100%|██████████| 211/211 [03:23<00:00,  1.04it/s]


Saved global model to results/moe_lstm/moe_lstm_round_10_fedProx.pt
Starting experiment with model: moe_gru
Round 1/10


Training clients: 100%|██████████| 211/211 [03:26<00:00,  1.02it/s]


Saved global model to results/moe_gru/moe_gru_round_1_fedProx.pt
Round 2/10


Training clients: 100%|██████████| 211/211 [03:29<00:00,  1.01it/s]


Saved global model to results/moe_gru/moe_gru_round_2_fedProx.pt
Round 3/10


Training clients: 100%|██████████| 211/211 [03:32<00:00,  1.01s/it]


Saved global model to results/moe_gru/moe_gru_round_3_fedProx.pt
Round 4/10


Training clients: 100%|██████████| 211/211 [03:30<00:00,  1.00it/s]


Saved global model to results/moe_gru/moe_gru_round_4_fedProx.pt
Round 5/10


Training clients: 100%|██████████| 211/211 [03:27<00:00,  1.01it/s]


Saved global model to results/moe_gru/moe_gru_round_5_fedProx.pt
Round 6/10


Training clients: 100%|██████████| 211/211 [03:35<00:00,  1.02s/it]


Saved global model to results/moe_gru/moe_gru_round_6_fedProx.pt
Round 7/10


Training clients: 100%|██████████| 211/211 [03:28<00:00,  1.01it/s]


Saved global model to results/moe_gru/moe_gru_round_7_fedProx.pt
Round 8/10


Training clients: 100%|██████████| 211/211 [03:30<00:00,  1.00it/s]


Saved global model to results/moe_gru/moe_gru_round_8_fedProx.pt
Round 9/10


Training clients: 100%|██████████| 211/211 [03:30<00:00,  1.00it/s]


Saved global model to results/moe_gru/moe_gru_round_9_fedProx.pt
Round 10/10


Training clients: 100%|██████████| 211/211 [03:32<00:00,  1.01s/it]

Saved global model to results/moe_gru/moe_gru_round_10_fedProx.pt





### Kuramoto FedAvg

In [None]:

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, fin_loss = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

        # Federated averaging
        global_weights = sync_aggregate(global_weights,local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_kr.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


### Kuramoto-Norm FedAvg

In [None]:

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, fin_loss = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

        # Federated averaging
        global_weights = sync_aggregate_norm(global_weights,local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_kr_norm.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


### Kuramoto-Softmax

In [None]:

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, fin_loss = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

        # Federated averaging
        global_weights = sync_aggregate_softmax(global_weights,local_weights)
        set_weights(global_model, global_weights)

        # Save model checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_kr_sft.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


### DiffAware FedAvg

In [12]:
class TimeSeriesDifficultyWeight:
    def __init__(self, num_clients, accumulate_iters=20):
        self.num_clients = num_clients
        self.last_loss = torch.ones(num_clients).float().to(DEVICE)
        self.learn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.unlearn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.ema_difficulty = torch.ones(num_clients).float().to(DEVICE)
        self.accumulate_iters = accumulate_iters

    def update(self, cid: int, loss_history: List[float]) -> float:
        """
        Update difficulty based on loss trend for a client.
        Expects a list of per-epoch losses.
        """
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32).to(DEVICE)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=current_loss.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=current_loss.device))

        # EMA update
        momentum = (self.accumulate_iters - 1) / self.accumulate_iters
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        # Difficulty score
        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio #torch.pow(diff_ratio, 1 / 5)

        # Smooth difficulty over rounds
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty

        self.last_loss[cid] = current_loss
        return self.ema_difficulty[cid].item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        weights = [self.ema_difficulty[cid].item() for cid in client_ids]
        total = sum(weights)
        if total == 0:
            return [1.0 / len(client_ids)] * len(client_ids)
        return [w / total for w in weights]


In [14]:
difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)

for rnd in range(NUM_ROUNDS):
    sampled_clients = random.sample(range(NUM_CLIENTS), int(CLIENT_FRAC * NUM_CLIENTS))
    local_weights = []
    difficulty_scores = []

    for cid in tqdm(sampled_clients):
        model = model_fn(model_name).to(DEVICE)
        set_weights(model, global_weights)
        train_loader, _ = load_energy_data_feather(cid, filepath=DATA_FILE)
        updated_weights, loss_history = train_model(
            model, train_loader,
            device=DEVICE, learning_rate=LR,
            loss_fn=None, optimizer_class=optim.Adam,
            epochs=LOCAL_EPOCHS
        )
        local_weights.append(updated_weights)

        # Update difficulty
        difficulty = difficulty_tracker.update(cid, loss_history)
        difficulty_scores.append(difficulty)

    # Normalize difficulty scores
    normalized_weights = difficulty_tracker.get_normalized_weights(sampled_clients)

    # Difficulty-aware weighted aggregation
    global_weights = average_weights(local_weights, client_weights=normalized_weights)
    set_weights(global_model, global_weights)


  0%|          | 0/211 [00:00<?, ?it/s]


NameError: name 'model_name' is not defined

## Federated Learning With Clustering

In [14]:
import os
import torch
import torch.optim as optim
from tqdm import tqdm
import random
import numpy as np

CLUSTERS = cluster_buildings_top3_primary_use(df)

for model_name in MODEL_NAMES:
    for cluster_name in CLUSTERS:
        os.makedirs(os.path.join("results", model_name, cluster_name), exist_ok=True)


### FedAvg

In [None]:

# Main experiment loop
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    # Initialize per-cluster model weights
    cluster_models = {}
    cluster_weights = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = get_weights(model)

    for rnd in range(NUM_ROUNDS):
        print(f"\Round {rnd+1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            # Sample a fraction of clients from the cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f"Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, fin_loss = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # Aggregate and update cluster model
            updated_cluster_weights = average_weights(local_weights)
            set_weights(cluster_models[cluster_name], updated_cluster_weights)
            cluster_weights[cluster_name] = updated_cluster_weights

            # Save checkpoint
            ckpt_path = os.path.join("results", model_name, cluster_name, f"{model_name}_{cluster_name}_round_{rnd+1}.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


### FedAvgM

In [15]:
import os
import torch
import torch.optim as optim
from tqdm import tqdm

# Main experiment loop
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    # Initialize per-cluster model weights and velocity
    cluster_models = {}
    cluster_weights = {}
    cluster_velocities = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        weights = get_weights(model)
        velocity = [np.zeros_like(w) for w in weights]

        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = weights
        cluster_velocities[cluster_name] = velocity

    for rnd in range(NUM_ROUNDS):
        print(f"\nRound {rnd+1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            # Sample a fraction of clients from the cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f"Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, fin_loss = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # ---- FedAvgM Aggregation ----
            new_weights, new_velocity = fedavgm_update(
                cluster_weights[cluster_name],
                local_weights,
                cluster_velocities[cluster_name]
            )

            # Update model, weights, and velocity
            set_weights(cluster_models[cluster_name], new_weights)
            cluster_weights[cluster_name] = new_weights
            cluster_velocities[cluster_name] = new_velocity

            # Save checkpoint
            ckpt_dir = os.path.join("results", model_name, cluster_name)
            os.makedirs(ckpt_dir, exist_ok=True)
            ckpt_path = os.path.join(ckpt_dir, f"{model_name}_{cluster_name}_round_{rnd+1}_fedAvgM.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


Starting experiments for model: lstm

Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:35<00:00,  2.23it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_1_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/lstm/other/lstm_other_round_1_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.19it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_1_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.22it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_1_fedAvgM.pt

Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_2_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.13it/s]


Saved model: results/lstm/other/lstm_other_round_2_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.13it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_2_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.15it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_2_fedAvgM.pt

Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.19it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_3_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]


Saved model: results/lstm/other/lstm_other_round_3_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_3_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.19it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_3_fedAvgM.pt

Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_4_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:30<00:00,  2.13it/s]


Saved model: results/lstm/other/lstm_other_round_4_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_4_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.21it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_4_fedAvgM.pt

Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.20it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_5_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.17it/s]


Saved model: results/lstm/other/lstm_other_round_5_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.14it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_5_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.17it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_5_fedAvgM.pt

Round 6/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:37<00:00,  2.15it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_6_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.15it/s]


Saved model: results/lstm/other/lstm_other_round_6_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.17it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_6_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.10it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_6_fedAvgM.pt

Round 7/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:37<00:00,  2.15it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_7_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:30<00:00,  2.13it/s]


Saved model: results/lstm/other/lstm_other_round_7_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.14it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_7_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.22it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_7_fedAvgM.pt

Round 8/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:37<00:00,  2.14it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_8_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.20it/s]


Saved model: results/lstm/other/lstm_other_round_8_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_8_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.04it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_8_fedAvgM.pt

Round 9/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:37<00:00,  2.11it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_9_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.15it/s]


Saved model: results/lstm/other/lstm_other_round_9_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_9_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.12it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_9_fedAvgM.pt

Round 10/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_10_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]


Saved model: results/lstm/other/lstm_other_round_10_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_10_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.17it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_10_fedAvgM.pt
Starting experiments for model: gru

Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.19it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_1_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]


Saved model: results/gru/other/gru_other_round_1_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_1_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.24it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_1_fedAvgM.pt

Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_2_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]


Saved model: results/gru/other/gru_other_round_2_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_2_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.18it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_2_fedAvgM.pt

Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.20it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_3_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.25it/s]


Saved model: results/gru/other/gru_other_round_3_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_3_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.20it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_3_fedAvgM.pt

Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_4_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.24it/s]


Saved model: results/gru/other/gru_other_round_4_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_4_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.17it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_4_fedAvgM.pt

Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.20it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_5_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.24it/s]


Saved model: results/gru/other/gru_other_round_5_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_5_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.13it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_5_fedAvgM.pt

Round 6/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.20it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_6_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/gru/other/gru_other_round_6_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.19it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_6_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.19it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_6_fedAvgM.pt

Round 7/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_7_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.21it/s]


Saved model: results/gru/other/gru_other_round_7_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_7_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.13it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_7_fedAvgM.pt

Round 8/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.21it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_8_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/gru/other/gru_other_round_8_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_8_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.20it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_8_fedAvgM.pt

Round 9/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_9_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/gru/other/gru_other_round_9_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.26it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_9_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.17it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_9_fedAvgM.pt

Round 10/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_10_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/gru/other/gru_other_round_10_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_10_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.17it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_10_fedAvgM.pt
Starting experiments for model: moe_lstm

Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_1_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:32<00:00,  1.96it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_1_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.93it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_1_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_1_fedAvgM.pt

Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_2_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.90it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_2_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_2_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_2_fedAvgM.pt

Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_3_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:34<00:00,  1.88it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_3_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_3_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.86it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_3_fedAvgM.pt

Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_4_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_4_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_4_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_4_fedAvgM.pt

Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.93it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_5_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_5_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_5_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_5_fedAvgM.pt

Round 6/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_6_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.90it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_6_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_6_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.86it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_6_fedAvgM.pt

Round 7/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_7_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.92it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_7_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_7_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_7_fedAvgM.pt

Round 8/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_8_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_8_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_8_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_8_fedAvgM.pt

Round 9/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.87it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_9_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.92it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_9_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_9_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.94it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_9_fedAvgM.pt

Round 10/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_10_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.90it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_10_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_10_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.87it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_10_fedAvgM.pt
Starting experiments for model: moe_gru

Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_1_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_1_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_1_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_1_fedAvgM.pt

Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_2_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_2_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_2_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.91it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_2_fedAvgM.pt

Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_3_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_3_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_3_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_3_fedAvgM.pt

Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_4_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_4_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.87it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_4_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.95it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_4_fedAvgM.pt

Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_5_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_5_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.96it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_5_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_5_fedAvgM.pt

Round 6/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_6_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_6_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.91it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_6_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_6_fedAvgM.pt

Round 7/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.87it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_7_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:32<00:00,  1.94it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_7_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_7_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.86it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_7_fedAvgM.pt

Round 8/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_8_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_8_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_8_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.91it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_8_fedAvgM.pt

Round 9/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.92it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_9_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_9_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_9_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.85it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_9_fedAvgM.pt

Round 10/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_10_fedAvgM.pt
 Processing other with 428 clients
Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_10_fedAvgM.pt
 Processing cluster_1 with 269 clients
Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_10_fedAvgM.pt
 Processing cluster_2 with 179 clients
Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.92it/s]

Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_10_fedAvgM.pt





### FedAdam

In [17]:
from AggregationStrategy import fed_adam_update

In [18]:
from tqdm import tqdm
import os
import random
import torch

# Main experiment loop using FedAdam
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    cluster_models = {}
    cluster_weights = {}
    cluster_m_t = {}
    cluster_v_t = {}

    # Initialize model, weights, m_t, v_t per cluster
    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        weights = get_weights(model)
        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = weights

        # Initialize m_t and v_t as zero arrays matching model shape
        cluster_m_t[cluster_name] = [np.zeros_like(w) for w in weights]
        cluster_v_t[cluster_name] = [np.zeros_like(w) for w in weights]

    # Federated training rounds
    for rnd in range(NUM_ROUNDS):
        print(f"\nRound {rnd + 1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f" Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, _ = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # Perform FedAdam update
            updated_weights, new_m_t, new_v_t = fed_adam_update(
                global_weights=cluster_weights[cluster_name],
                local_weights_list=local_weights,
                m_t=cluster_m_t[cluster_name],
                v_t=cluster_v_t[cluster_name],
                eta=0.001,       # or your server LR
                beta1=0.9,
                beta2=0.999,
                eps=1e-8
            )

            # Update weights and moments
            set_weights(cluster_models[cluster_name], updated_weights)
            cluster_weights[cluster_name] = updated_weights
            cluster_m_t[cluster_name] = new_m_t
            cluster_v_t[cluster_name] = new_v_t

            # Save checkpoint
            ckpt_dir = os.path.join("results", model_name, cluster_name)
            os.makedirs(ckpt_dir, exist_ok=True)
            ckpt_path = os.path.join(ckpt_dir, f"{model_name}_{cluster_name}_round_{rnd + 1}_fedAdam.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


Starting experiments for model: lstm

Round 1/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0:   0%|          | 0/80 [00:00<?, ?it/s]

Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.22it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_1_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.23it/s]


Saved model: results/lstm/other/lstm_other_round_1_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_1_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.23it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_1_fedAdam.pt

Round 2/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_2_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.23it/s]


Saved model: results/lstm/other/lstm_other_round_2_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  2.00it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_2_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:15<00:00,  1.63it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_2_fedAdam.pt

Round 3/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:48<00:00,  1.64it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_3_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:30<00:00,  2.08it/s]


Saved model: results/lstm/other/lstm_other_round_3_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_3_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.13it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_3_fedAdam.pt

Round 4/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:37<00:00,  2.15it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_4_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.18it/s]


Saved model: results/lstm/other/lstm_other_round_4_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_4_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.86it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_4_fedAdam.pt

Round 5/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:48<00:00,  1.65it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_5_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:31<00:00,  2.03it/s]


Saved model: results/lstm/other/lstm_other_round_5_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_5_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.22it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_5_fedAdam.pt

Round 6/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:40<00:00,  1.99it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_6_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:38<00:00,  1.65it/s]


Saved model: results/lstm/other/lstm_other_round_6_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.78it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_6_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.10it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_6_fedAdam.pt

Round 7/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:35<00:00,  2.23it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_7_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.17it/s]


Saved model: results/lstm/other/lstm_other_round_7_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_7_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.16it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_7_fedAdam.pt

Round 8/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.19it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_8_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.20it/s]


Saved model: results/lstm/other/lstm_other_round_8_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.19it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_8_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.19it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_8_fedAdam.pt

Round 9/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.19it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_9_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.25it/s]


Saved model: results/lstm/other/lstm_other_round_9_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_9_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.15it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_9_fedAdam.pt

Round 10/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model: results/lstm/cluster_0/lstm_cluster_0_round_10_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.25it/s]


Saved model: results/lstm/other/lstm_other_round_10_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:19<00:00,  2.02it/s]


Saved model: results/lstm/cluster_1/lstm_cluster_1_round_10_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.98it/s]


Saved model: results/lstm/cluster_2/lstm_cluster_2_round_10_fedAdam.pt
Starting experiments for model: gru

Round 1/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:40<00:00,  1.98it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_1_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.19it/s]


Saved model: results/gru/other/gru_other_round_1_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_1_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.25it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_1_fedAdam.pt

Round 2/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.22it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_2_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.23it/s]


Saved model: results/gru/other/gru_other_round_2_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_2_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.22it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_2_fedAdam.pt

Round 3/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:35<00:00,  2.25it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_3_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.24it/s]


Saved model: results/gru/other/gru_other_round_3_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.24it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_3_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.17it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_3_fedAdam.pt

Round 4/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:35<00:00,  2.23it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_4_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.21it/s]


Saved model: results/gru/other/gru_other_round_4_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.22it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_4_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.22it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_4_fedAdam.pt

Round 5/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.22it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_5_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.26it/s]


Saved model: results/gru/other/gru_other_round_5_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:19<00:00,  2.02it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_5_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.05it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_5_fedAdam.pt

Round 6/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:40<00:00,  1.98it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_6_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.18it/s]


Saved model: results/gru/other/gru_other_round_6_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_6_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.20it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_6_fedAdam.pt

Round 7/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.16it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_7_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.21it/s]


Saved model: results/gru/other/gru_other_round_7_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_7_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.19it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_7_fedAdam.pt

Round 8/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.19it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_8_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:29<00:00,  2.20it/s]


Saved model: results/gru/other/gru_other_round_8_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.22it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_8_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.20it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_8_fedAdam.pt

Round 9/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.18it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_9_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.21it/s]


Saved model: results/gru/other/gru_other_round_9_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.24it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_9_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.24it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_9_fedAdam.pt

Round 10/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.22it/s]


Saved model: results/gru/cluster_0/gru_cluster_0_round_10_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]


Saved model: results/gru/other/gru_other_round_10_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s]


Saved model: results/gru/cluster_1/gru_cluster_1_round_10_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.24it/s]


Saved model: results/gru/cluster_2/gru_cluster_2_round_10_fedAdam.pt
Starting experiments for model: moe_lstm

Round 1/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.94it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_1_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.90it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_1_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_1_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_1_fedAdam.pt

Round 2/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:44<00:00,  1.81it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_2_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:46<00:00,  1.38it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_2_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:25<00:00,  1.58it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_2_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_2_fedAdam.pt

Round 3/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_3_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_3_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.86it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_3_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_3_fedAdam.pt

Round 4/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_4_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:32<00:00,  1.96it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_4_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_4_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.94it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_4_fedAdam.pt

Round 5/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_5_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_5_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_5_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.95it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_5_fedAdam.pt

Round 6/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.88it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_6_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_6_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_6_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.88it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_6_fedAdam.pt

Round 7/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_7_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_7_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_7_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.90it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_7_fedAdam.pt

Round 8/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.93it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_8_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.94it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_8_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_8_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.92it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_8_fedAdam.pt

Round 9/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.91it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_9_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:39<00:00,  1.60it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_9_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_9_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:18<00:00,  1.42it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_9_fedAdam.pt

Round 10/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]


Saved model: results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_10_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:45<00:00,  1.42it/s]


Saved model: results/moe_lstm/other/moe_lstm_other_round_10_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:29<00:00,  1.36it/s]


Saved model: results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_10_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:18<00:00,  1.43it/s]


Saved model: results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_10_fedAdam.pt
Starting experiments for model: moe_gru

Round 1/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_1_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:36<00:00,  1.73it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_1_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:23<00:00,  1.70it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_1_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:15<00:00,  1.67it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_1_fedAdam.pt

Round 2/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.74it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_2_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.89it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_2_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_2_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_2_fedAdam.pt

Round 3/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_3_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.89it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_3_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_3_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_3_fedAdam.pt

Round 4/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.92it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_4_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:32<00:00,  1.94it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_4_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_4_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.86it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_4_fedAdam.pt

Round 5/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_5_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.91it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_5_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_5_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_5_fedAdam.pt

Round 6/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.94it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_6_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.93it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_6_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.89it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_6_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.91it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_6_fedAdam.pt

Round 7/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_7_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.92it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_7_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.85it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_7_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.84it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_7_fedAdam.pt

Round 8/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:43<00:00,  1.86it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_8_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:34<00:00,  1.83it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_8_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.85it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_8_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.87it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_8_fedAdam.pt

Round 9/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.88it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_9_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_9_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:24<00:00,  1.62it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_9_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.87it/s]


Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_9_fedAdam.pt

Round 10/10
 Processing cluster_0 with 537 clients
 Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:42<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_0/moe_gru_cluster_0_round_10_fedAdam.pt
 Processing other with 428 clients
 Sampling 64 Clients


Training other: 100%|██████████| 64/64 [00:33<00:00,  1.89it/s]


Saved model: results/moe_gru/other/moe_gru_other_round_10_fedAdam.pt
 Processing cluster_1 with 269 clients
 Sampling 40 Clients


Training cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]


Saved model: results/moe_gru/cluster_1/moe_gru_cluster_1_round_10_fedAdam.pt
 Processing cluster_2 with 179 clients
 Sampling 26 Clients


Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.88it/s]

Saved model: results/moe_gru/cluster_2/moe_gru_cluster_2_round_10_fedAdam.pt





### Kuramoto FedAvg

In [None]:

# Main experiment loop
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    # Initialize per-cluster model weights
    cluster_models = {}
    cluster_weights = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = get_weights(model)

    for rnd in range(NUM_ROUNDS):
        print(f"\Round {rnd+1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            # Sample a fraction of clients from the cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f"Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, fin_loss = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # Aggregate and update cluster model
            updated_cluster_weights = sync_aggregate(cluster_weights[cluster_name],local_weights)
            set_weights(cluster_models[cluster_name], updated_cluster_weights)
            cluster_weights[cluster_name] = updated_cluster_weights

            # Save checkpoint
            ckpt_path = os.path.join("results", model_name, cluster_name, f"{model_name}_{cluster_name}_round_{rnd+1}_kr.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


### Kuramoto Softmax FedAvg

In [None]:

# Main experiment loop
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    # Initialize per-cluster model weights
    cluster_models = {}
    cluster_weights = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = get_weights(model)

    for rnd in range(NUM_ROUNDS):
        print(f"\Round {rnd+1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            # Sample a fraction of clients from the cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f"Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, fin_loss = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # Aggregate and update cluster model
            updated_cluster_weights = sync_aggregate_softmax(cluster_weights[cluster_name],local_weights)
            set_weights(cluster_models[cluster_name], updated_cluster_weights)
            cluster_weights[cluster_name] = updated_cluster_weights

            # Save checkpoint
            ckpt_path = os.path.join("results", model_name, cluster_name, f"{model_name}_{cluster_name}_round_{rnd+1}_kr_softmax.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


Starting experiments for model: lstm
\Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:39<00:00,  2.05it/s]


SYNC Weights:[0.01714586280286312, 0.007341246586292982, 0.005376106593757868, 0.009026425890624523, 0.013748370110988617, 0.003524855012074113, 0.015615459531545639, 0.028545405715703964, 0.00037552270805463195, 0.0026444564573466778, 0.015169553458690643, 0.016310110688209534, 0.0002666755754034966, 0.024659018963575363, 0.010934737510979176, 0.005372896324843168, 0.008823621086776257, 0.01801506243646145, 0.03618618845939636, 0.0002804785908665508, 0.007796133868396282, 0.02834409847855568, 0.0006704466068185866, 0.003171334508806467, 0.009118565358221531, 0.013552341610193253, 0.010257434099912643, 0.006262031849473715, 0.025177812203764915, 0.01611536741256714, 0.004576415289193392, 0.0010064546950161457, 0.01566484570503235, 0.006497932132333517, 0.017817247658967972, 0.009647764265537262, 0.018062923103570938, 0.026207640767097473, 0.004003540147095919, 0.020688243210315704, 0.003593906294554472, 0.00176674232352525, 0.0036300714127719402, 0.02171650156378746, 0.0176354553550481

Training other: 100%|██████████| 64/64 [00:32<00:00,  1.99it/s]


SYNC Weights:[3.994156591010501e-25, 1.9981471268692985e-05, 0.000170990388141945, 1.1178276360102046e-10, 2.834427300073966e-13, 0.10563703626394272, 9.04088331976638e-11, 1.2971519254278974e-06, 0.0042074513621628284, 4.567755895745762e-10, 1.4396679860430196e-17, 1.2953025154208153e-12, 2.2268265790353903e-13, 4.590889091105055e-08, 1.1056963111484401e-14, 4.6250628656707704e-05, 4.2638656006562664e-14, 0.02651219815015793, 1.591702675796114e-05, 1.1942501032535802e-07, 8.364056157006416e-06, 2.479996510373894e-05, 1.0171514451757102e-21, 2.4791791730649493e-08, 4.764723144035088e-06, 0.0011091399937868118, 8.503096095074589e-14, 0.007965074852108955, 2.867164994313498e-08, 8.559239358874038e-05, 5.805446591121075e-14, 0.000249513570452109, 1.8039070710074157e-05, 1.2116474490397877e-09, 4.764858507755321e-10, 0.19458331167697906, 0.03818589821457863, 0.17832297086715698, 2.3084856248800634e-10, 3.0443703114002574e-11, 1.6708211203475543e-10, 0.06018122285604477, 0.00239556422457098

Training cluster_1: 100%|██████████| 40/40 [00:19<00:00,  2.04it/s]


SYNC Weights:[0.021405456587672234, 9.466892159082363e-09, 0.1109171211719513, 0.017973661422729492, 0.010668179951608181, 0.007095228880643845, 0.11654827743768692, 0.06396946310997009, 0.03303733468055725, 0.07812351733446121, 2.6365278245066293e-06, 0.060069482773542404, 0.035770233720541, 0.056680068373680115, 0.03316447511315346, 0.018185317516326904, 0.020312946289777756, 0.005112232640385628, 0.010138009674847126, 0.029710428789258003, 5.339424660633085e-06, 0.007267991546541452, 0.0062730396166443825, 0.008765684440732002, 2.5349963794774055e-10, 0.0005733942380174994, 0.08985995501279831, 1.823302581710636e-15, 0.04575058072805405, 0.029619786888360977, 4.146520904413364e-09, 9.541915915178834e-07, 0.020783375948667526, 0.011416907422244549, 0.0015772890765219927, 5.566255837057953e-16, 5.351192058911103e-11, 0.009685173630714417, 0.006550424266606569, 0.0329861156642437] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_1_kr_softmax.pt
 Processing cluster_2 with 179 c

Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.99it/s]


SYNC Weights:[0.0, 0.0, 0.039645805954933167, 0.0, 0.0, 0.0, 0.0, 0.0, 3.156996966138998e-31, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9603541493415833, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_1_kr_softmax.pt
\Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:39<00:00,  2.00it/s]


SYNC Weights:[1.5692940105693622e-06, 0.00014865295195824014, 0.0001511560635544205, 9.077804975142985e-05, 7.938102445614325e-05, 0.1067751331054015, 0.020155718953744753, 0.010380569484483923, 0.00016106533699055688, 0.02336761532409789, 0.0001978276415982943, 0.0003925863141005969, 0.00914500997702444, 0.02503499334513163, 1.1386952421196476e-07, 0.003947799240936813, 0.005525318497569001, 0.0210889038258793, 0.011959034461530599, 5.805308852170572e-05, 0.017975670418242388, 0.005738053259604552, 0.0005121702003172078, 0.00034106371719521577, 0.10873642292188, 0.0002617637606907212, 0.003942935260129184, 3.903131549468162e-05, 0.004389373239842825, 0.0009310327760057465, 0.00022665142431750786, 0.04061096355898936, 0.005495361988955749, 0.00013083421478287055, 0.003204339260778199, 6.581109422041218e-05, 0.0017088949370226473, 2.7724124028368987e-05, 0.0001319575265328038, 0.0005101214990135726, 0.004772415089007284, 2.591162632005135e-07, 0.0026901662139149378, 0.001901116386178080

Training other: 100%|██████████| 64/64 [00:31<00:00,  2.04it/s]


SYNC Weights:[3.601988454154218e-11, 4.3472347454241015e-16, 3.1979330586228056e-05, 2.099329669151854e-17, 1.3602257144091929e-09, 5.91010558314237e-16, 1.3487424639869996e-13, 1.1362707953520222e-23, 1.9428226741447438e-28, 0.0034913349740084074, 0.09404140892781052, 1.4232388636907856e-05, 4.639373432197254e-20, 4.6654921478134734e-08, 9.702405567357372e-06, 9.239382404681646e-18, 4.025499076780274e-09, 5.6587847158708295e-09, 0.0005790210204617013, 7.024308868356554e-17, 0.002454508532450068, 4.916186758062512e-13, 2.1121897578066094e-08, 9.391285278003515e-26, 0.4595661885015488, 3.7917895082117947e-16, 4.5558315336029463e-07, 4.106333658328584e-24, 1.2539928626099741e-32, 1.5932731700262203e-12, 2.5864413866617656e-07, 0.0005035490692478816, 2.3639122068844363e-12, 1.0530102996123041e-05, 1.747410193169896e-08, 2.8218539007170646e-12, 7.495947502947359e-07, 1.817207060828411e-23, 1.1193857435511834e-07, 0.3574361438950604, 7.695680335295569e-15, 6.357889076560096e-08, 4.294259963

Training cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.97it/s]


SYNC Weights:[3.6156768696713206e-09, 0.15157355218828075, 2.407400156934541e-06, 0.00437756581082615, 0.023812564724356215, 2.463281910993613e-07, 4.567548762331003e-09, 0.019951832509857905, 0.15815440125455343, 0.054392023394552985, 0.0032480694214531993, 0.03450451180019956, 2.45693279739197e-17, 0.021016370835000887, 0.01583427820757094, 0.0036921597506434133, 1.1776918710699487e-13, 0.00023196335714871616, 0.17171125691795255, 0.0004079039073783329, 0.08022388266153231, 4.2407640316849826e-13, 7.41039252300914e-08, 0.00022441504453499556, 0.02136536513652872, 0.000195945578872394, 2.159392117966059e-09, 0.00218578476189039, 0.005772006408399909, 0.029813135661771125, 0.0034615809413151293, 0.00021660344238541888, 9.110362367628102e-08, 0.15846405216437617, 0.010070545071724949, 8.981935568881614e-08, 4.590412308279879e-18, 0.005362233804525241, 0.019287721844079047, 0.00044535258276436393] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_2_kr_softmax.pt
 Processing clust

Training cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.96it/s]


SYNC Weights:[0.698261345828447, 0.04900568871561125, 1.6756162485705287e-10, 7.963161391747661e-07, 3.906903720138182e-06, 8.545806816431385e-19, 4.499495121253197e-08, 0.037946046213360604, 6.865081759945577e-17, 5.9456088215139635e-06, 1.2072079688416604e-06, 0.19629194972098077, 0.0011923980374804112, 3.374326089151393e-09, 4.764545305029606e-05, 4.284316816328194e-06, 0.002093788033904626, 1.489574633047522e-06, 8.68600797874767e-09, 4.414492152154982e-08, 9.458805276386944e-06, 0.01507950826014939, 1.606627693386796e-20, 1.1484824625044246e-05, 3.9695404388524184e-05, 3.2524242448407194e-06] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_2_kr_softmax.pt
\Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0:  48%|████▊     | 38/80 [00:19<00:20,  2.04it/s]

### Kuramoto Norm FedAvg

In [None]:
# DEVICE
# Main experiment loop
for model_name in MODEL_NAMES:
    print(f"Starting experiments for model: {model_name}")

    # Initialize per-cluster model weights
    cluster_models = {}
    cluster_weights = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = get_weights(model)

    for rnd in range(NUM_ROUNDS):
        print(f"\Round {rnd+1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f" Processing {cluster_name} with {len(client_ids)} clients")

            # Sample a fraction of clients from the cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            print(f"Sampling {len(sampled_clients)} Clients")
            local_weights = []

            for cid in tqdm(sampled_clients, desc=f"Training {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, cluster_weights[cluster_name])

                train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, fin_loss = train_model(
                    local_model, train_loader,
                    device=DEVICE,
                    learning_rate=LR,
                    loss_fn=None,
                    optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

            # Aggregate and update cluster model
            updated_cluster_weights = sync_aggregate_softmax(cluster_weights[cluster_name],local_weights)
            set_weights(cluster_models[cluster_name], updated_cluster_weights)
            cluster_weights[cluster_name] = updated_cluster_weights

            # Save checkpoint
            ckpt_path = os.path.join("results", model_name, cluster_name, f"{model_name}_{cluster_name}_round_{rnd+1}_kr_norm.pt")
            torch.save(cluster_models[cluster_name].state_dict(), ckpt_path)
            print(f"Saved model: {ckpt_path}")


Starting experiments for model: lstm
\Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.77it/s]


SYNC Weights:[0.011088981293141842, 0.0005276454612612724, 0.009006999433040619, 0.0010065635433420539, 0.02894829772412777, 0.0005409520817920566, 0.002419608412310481, 0.03965654596686363, 0.01275147870182991, 0.0060289776884019375, 0.0029398140031844378, 0.026724137365818024, 0.0005258911987766623, 0.001337263616733253, 0.005358441732823849, 0.007517630700021982, 0.031638406217098236, 0.023924464359879494, 0.009779023006558418, 0.012516401708126068, 0.010698819532990456, 0.008907420560717583, 0.02152569591999054, 0.018377719447016716, 0.000958866614382714, 0.015023786574602127, 0.00016531653818674386, 0.01078857947140932, 0.00011684942728606984, 0.014073467813432217, 0.017146386206150055, 0.010822692885994911, 0.026125604286789894, 0.0010200496762990952, 0.020381156355142593, 0.0013889539986848831, 0.015629161149263382, 0.013110995292663574, 0.0012778245145455003, 0.035792287439107895, 4.69821497972589e-05, 0.00017756293527781963, 0.01291549764573574, 0.0010281517170369625, 0.006933

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.78it/s]


SYNC Weights:[0.07462282478809357, 0.00028802879387512803, 0.00030799463274888694, 0.0001894784509204328, 0.1856829822063446, 0.0013856367440894246, 0.006020246539264917, 0.00015760045789647847, 0.0006232321029528975, 0.034824274480342865, 1.9031719489248644e-07, 5.8000339777208865e-05, 0.001954455394297838, 0.0020411398727446795, 0.0001238178665516898, 3.321071562822908e-05, 0.0008578212582506239, 0.3715418577194214, 0.00020522030536085367, 0.00010857664165087044, 3.190814459230751e-05, 3.9604579797014594e-05, 1.5161933788476745e-06, 0.0057764663361012936, 0.07607322186231613, 2.0615650300152139e-13, 5.689662430086173e-07, 0.0014188580680638552, 0.00011375695612514392, 1.9329167599835273e-08, 1.045159070400814e-09, 1.5019485317679937e-06, 0.0910891517996788, 1.0546632438490633e-05, 0.0007252946961671114, 4.5648281229659915e-05, 3.2138357865816106e-14, 0.0001444595109205693, 0.004598150495439768, 1.1947526301625544e-09, 1.9924131393502853e-21, 0.0003511750837787986, 0.00096852390561252

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.75it/s]


SYNC Weights:[6.544785719597712e-05, 7.233626092784107e-05, 0.006061752792447805, 6.067440239121424e-08, 0.06841098517179489, 2.4495477191521786e-05, 0.24966132640838623, 0.00010738746641436592, 4.547088039430491e-09, 1.3054019149852088e-16, 1.2762859345760713e-13, 3.01448399113724e-05, 1.4362441791426761e-11, 0.008814383298158646, 1.7345676558289174e-18, 1.4914592389617328e-30, 0.09377839416265488, 0.4656314253807068, 1.0919143278442789e-05, 0.005405639298260212, 0.0009363763965666294, 5.583502274896998e-14, 4.245172385708429e-05, 0.004579662811011076, 7.850286259106676e-19, 1.13192678806067e-16, 0.04814663529396057, 0.001987472642213106, 2.4812827632558765e-06, 5.4442076361738145e-05, 0.0014324609655886889, 0.03760834038257599, 0.005090861115604639, 0.0008369539282284677, 0.0002628778456710279, 3.24338524033667e-16, 0.0009442894952371716, 1.9349813092962853e-13, 7.039120805338306e-13, 4.35070723005704e-15] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_1_kr_norm.pt
 Proces

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.78it/s]


SYNC Weights:[0.7009902596473694, 0.0, 3.6794728657696396e-05, 0.022127574309706688, 0.0, 0.04154526814818382, 1.0632530808863876e-32, 9.734602447047935e-13, 7.524082497385623e-17, 0.018217485398054123, 2.1920966730704095e-07, 3.84337863579276e-06, 0.004118343815207481, 3.664712762180057e-29, 0.0, 1.4530882573582371e-11, 2.286676283315066e-17, 2.6271892796014913e-20, 0.0, 0.02523532323539257, 5.587682009797201e-13, 0.0, 3.556105730240233e-05, 0.18458621203899384, 0.0031031910330057144, 4.1679192445300056e-27] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_1_kr_norm.pt
\Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.77it/s]


SYNC Weights:[0.0051833231278448, 0.00769807719075495, 0.007507908374740979, 0.0162548833674811, 0.0016609927828375175, 0.023403138494082612, 0.018980740372679314, 0.0032581521500735775, 0.008097144689884634, 0.009932346660793012, 0.03024027225928262, 0.0076274970835577965, 0.007895615505298666, 0.005978641010762433, 0.015191204969001655, 0.015137093003741525, 0.014134449182821746, 0.0005417743270793262, 0.02166207104568662, 0.003963194036386427, 0.0008265072213782368, 0.005568218735843369, 0.012349753437257447, 0.014085690796868664, 0.02160672050480737, 0.012465585123457495, 0.017113252821819608, 0.033262819747507215, 0.008305482772277703, 0.022373712143038734, 0.010393514100781813, 0.012212374711891832, 0.0014349978835064034, 0.02286061345486316, 0.019587573559834055, 0.008655716664430041, 0.008119500212187582, 0.023304268722722017, 0.012599891679783821, 0.0026042839942204275, 0.008242185119002985, 0.033347129174328166, 0.016813526946632937, 0.01589567219206841, 0.03267221209711756, 

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.76it/s]


SYNC Weights:[5.247684359067735e-07, 0.027877854561395504, 0.013886959676176971, 0.0023023102190991435, 0.07768370672889605, 0.06969558491214357, 0.012007381987352829, 0.0035371690708607944, 6.9576014385086734e-09, 0.0019563172204802325, 0.03149475628383987, 0.0008834240538856031, 0.00026184626740880206, 0.006940411637066655, 0.0009254373713536608, 0.002289552939217573, 4.456567487670814e-08, 0.051009524162843295, 0.032158833486860126, 0.014236157566476539, 0.03391559593025862, 6.315334440178684e-06, 0.021941433155352043, 0.024534910087506984, 0.040912092318853906, 0.0014235561327265998, 0.007890499561779915, 0.010520018638288999, 0.01867650566159459, 0.020617085135922444, 0.05098663668540898, 0.04114116863673065, 1.2429878028986497e-05, 0.0005586483253516549, 0.0028906138459210795, 6.819646625220227e-06, 0.005114918921226653, 0.00022941303472677452, 0.012910461548110604, 0.0006691798379907076, 0.00016887402873309897, 0.000594431326253468, 0.00165214545623434, 0.031348685803513264, 0.0

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


SYNC Weights:[2.76637523769766e-12, 0.09019431946262378, 2.6381538844330002e-09, 9.54521391124605e-23, 0.0024627866661229574, 0.16212705856927023, 1.248393566176846e-08, 8.460825985406839e-12, 1.986364581451871e-09, 0.030859738213531343, 2.000935254238018e-05, 1.562254599189035e-10, 5.994493416103449e-13, 0.1930938584829723, 0.18490478813555125, 5.731721609200364e-19, 3.99988947501976e-12, 2.13603412389036e-15, 4.1460343908862527e-19, 0.00036805231112074804, 0.09019928421989555, 1.6610722930449728e-08, 2.671398509775786e-05, 2.309824129890557e-13, 0.015615637817004044, 1.9715849175827175e-15, 1.0036010113489734e-07, 1.2779599454180876e-22, 7.019262210089072e-11, 3.563862518974842e-08, 0.000991173819628223, 0.18627710957180849, 1.2059495843389682e-12, 1.8741959027984768e-05, 0.0021725397131433575, 0.0011193061659107348, 0.03954757102799371, 2.6446699752715478e-21, 1.1386342275940856e-06, 1.0764478180334503e-31] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_2_kr_norm.pt
 Proc

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.74it/s]


SYNC Weights:[2.3006631220704924e-87, 1.5093435977641497e-37, 1.7692580810401676e-35, 3.351653001652562e-47, 1.768051095949142e-17, 2.0507163393982273e-73, 5.481575586175181e-54, 5.510314868225175e-60, 0.9999999858646114, 6.833827982315204e-33, 1.4233562733557102e-65, 1.5842624793846847e-23, 5.78791984605439e-121, 3.236519575956531e-13, 1.7730364146036888e-09, 2.3238185014159593e-72, 2.3620287978298106e-09, 3.7844991846653985e-57, 1.1794534423927993e-108, 4.061085035267167e-37, 6.2656605059060246e-52, 2.5439329265205904e-41, 1.7607694126204114e-38, 3.4091280494241944e-30, 1.4326072459310612e-48, 1.881523394858432e-58] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_2_kr_norm.pt
\Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


SYNC Weights:[0.029906948886053875, 0.007467947815345978, 0.04093254776069243, 0.04357402233472768, 0.03592546765629756, 0.0002508472611444826, 0.005037047669931573, 0.04520532640414048, 0.026846844377419046, 0.008403582452025929, 0.00022276153775900085, 0.004460452648712106, 7.59678503293102e-05, 0.02468880118855555, 0.00336461084574221, 0.0373326252294478, 0.0017118684551654581, 1.7235394911205432e-05, 0.002034677438245313, 0.028802867452136723, 0.005351210801785962, 0.004408731341634964, 0.008430690487253112, 0.00020103741996238338, 0.0065450806503366335, 0.015746963266042757, 0.0005745367257418736, 0.030041467958594165, 0.00278949836259695, 0.02776411106836444, 0.026758472861647342, 0.0387378051303011, 0.001870195496978842, 0.0042756595925827375, 3.157435010905293e-07, 0.020416530438039007, 0.0014306921310695277, 0.007008839773057883, 0.04663383027696253, 0.011174700488380583, 0.009505368650824452, 0.0016887119686381173, 1.3109631143828426e-06, 0.011435534495044442, 1.8806331791967

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.80it/s]


SYNC Weights:[0.014385845121931607, 4.673277083118516e-08, 8.2155856619688e-06, 3.988365185198354e-05, 7.812540132381074e-09, 0.0031449027856001732, 0.010721576728827335, 0.00011019092637198535, 0.005551752702298138, 0.0012835852910623937, 2.0928105167538925e-06, 0.022434569021673494, 0.030064765145627823, 0.061526683617302576, 2.7924083567523177e-09, 1.0820184484622883e-07, 0.00030137697290356225, 9.461123442373808e-07, 0.013634716425397122, 0.005522633462885341, 1.076070149880066e-10, 0.0008356896619812308, 0.008060152789645303, 0.002856040496654081, 4.711984282203562e-05, 0.16783999517191003, 0.08991302094122279, 0.005791119997278167, 0.0011822269213227557, 3.4304034336508e-05, 2.5747824159658036e-07, 1.6744355070111845e-10, 0.0019456616434768133, 7.810642037791086e-10, 6.052779616266093e-05, 0.13746314212849575, 0.014577022972001795, 0.03359262530736279, 0.0021418832863601394, 0.004533103096565596, 0.0013436980303981698, 1.1801297131172062e-09, 0.010812903260626273, 1.9162137202399

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.80it/s]


SYNC Weights:[1.1113920010365843e-39, 0.013651101790136184, 0.010533434060225258, 0.01756029610785483, 0.3813260220966207, 1.0069988317677004e-06, 2.1083349380876977e-31, 5.428306252743537e-23, 1.5671698423612542e-08, 4.7080836097394816e-11, 0.0013139075726655886, 6.278982145835862e-06, 0.262496104703761, 2.3793716791783463e-10, 0.00027136133327076197, 7.692431767447225e-10, 1.7157624357826033e-10, 0.08432325176380306, 0.021313131025502113, 2.8372210479076194e-05, 1.0284702847913857e-05, 4.751024932725019e-09, 0.06537876724186173, 0.02063548117240607, 0.06833003915572258, 0.0005600600322134939, 0.0002253669650138907, 2.3152310043834435e-05, 1.1991535047139697e-26, 3.292693880165387e-38, 0.04721656974851871, 6.41322323558104e-11, 5.039700007796559e-22, 0.0016479179264761344, 0.0030980428365769844, 4.9973494479936155e-05, 2.4370558644489256e-09, 2.8533128073865158e-11, 2.0271675205993365e-20, 4.777700038296879e-08] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_3_kr_norm.pt
 P

Training cluster_2: 100%|██████████| 26/26 [00:15<00:00,  1.71it/s]


SYNC Weights:[3.138447129735609e-17, 8.85525343247988e-12, 0.0007953758849282652, 1.0677481186024077e-08, 2.2532770319558237e-64, 6.655006692880088e-32, 5.184844533527423e-07, 0.9992034461750134, 1.005270715361556e-32, 6.27299045929273e-07, 1.5369045920411756e-18, 1.2724489577825527e-73, 2.105228264705478e-10, 4.193949286251201e-11, 1.0791675735627515e-08, 3.2375473168267e-14, 8.849492689625443e-44, 3.3807975757846826e-10, 2.9349617877949623e-11, 8.515404714402934e-19, 1.675474465598207e-52, 6.658835881520818e-11, 6.630895896034547e-19, 2.2076307656690245e-18, 2.5353210278116922e-18, 7.234489879173576e-49] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_3_kr_norm.pt
\Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.75it/s]


SYNC Weights:[3.7534895847565393e-11, 0.00018909266742124064, 0.027604699304730227, 4.437938882198298e-10, 1.8394136569203665e-08, 4.032925885722123e-09, 2.4159566514587966e-07, 0.08170087772525882, 1.5258948177942836e-06, 2.1016239482527844e-08, 0.04562098768936708, 5.525004015602678e-10, 0.014873135076032457, 1.6077022994309274e-07, 1.8022975996262845e-09, 0.010036969536231675, 0.0007307582617420667, 0.1060648563082944, 0.00511298306201217, 0.010351315837270988, 0.04195884841347416, 0.17054228245898023, 0.002345276700765804, 8.722348187349103e-09, 7.77813993389024e-09, 8.04526533135592e-05, 0.0018118265755137075, 3.1919042763913643e-09, 0.0009485000371594986, 5.9301788499666995e-06, 0.042019942178603685, 0.03003386789578644, 0.00016569179907029418, 0.034273993022506416, 2.142687173324175e-08, 0.0017541064393350847, 2.294159602322244e-09, 0.0006335824203254123, 6.51600750013476e-06, 3.675319808760102e-08, 1.912859461028279e-05, 0.028308841133331965, 3.269274052552075e-08, 0.0851071904

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.78it/s]


SYNC Weights:[1.9831327258785797e-13, 1.4040076638324031e-05, 2.626865647951807e-09, 0.051752792564279976, 2.9788887196011247e-06, 1.7913625694972565e-05, 0.005267197881140104, 3.3971063682574185e-08, 4.89825437791962e-05, 0.005311333564103986, 2.442086505116658e-05, 6.518798904992247e-07, 0.03115838489187588, 0.01516305485337023, 2.7446096801687344e-06, 0.00014584252363624757, 3.54236979193063e-08, 0.00014207697023392948, 3.0546776746978224e-06, 5.719077245084852e-05, 4.730392800601576e-06, 5.856768206028634e-13, 1.7040611901378707e-09, 0.0003073448397460228, 0.0038871052200531724, 4.465146643710883e-05, 8.925772411399259e-08, 0.0023215974675010655, 0.029923679810415302, 0.00016585513461640905, 0.004113703633572379, 0.022772604286087197, 0.0001232394275823727, 9.820914877239651e-05, 3.6844411197084547e-08, 7.047860089308489e-09, 0.35793305562574207, 0.0007788212447415124, 0.006956095594410748, 3.6815969194284855e-07, 5.0249474487048535e-06, 0.0026736739818948824, 1.306985512997166e-06

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.79it/s]


SYNC Weights:[6.685953320700608e-08, 1.6543382565167286e-10, 5.609738629864971e-40, 1.1598347508435242e-11, 8.551264407124981e-59, 2.5787729889931922e-09, 6.30764098508922e-08, 9.481486746040825e-07, 1.2147132897382718e-14, 4.1333448490869794e-35, 2.800599136549732e-18, 1.5479350234599853e-19, 9.93756276398827e-30, 1.5667390979896473e-40, 5.9282099315987916e-21, 2.009563789875657e-09, 3.421574822969105e-10, 7.956917462632837e-36, 0.9994524568529921, 1.0818475316321857e-10, 2.9635629624953114e-06, 2.5257383071971023e-19, 5.3226476521863745e-06, 1.2728835841183354e-18, 2.2186993349246028e-20, 0.0005377499026431344, 2.1730258867567555e-09, 2.979948793126124e-07, 9.867182727683179e-08, 9.294546683607833e-11, 4.0403434885255255e-28, 1.4229907961509886e-08, 9.691997181391189e-32, 3.628528587592126e-13, 5.75931814070365e-10, 3.6877057544575373e-38, 4.020423147042692e-15, 1.7746057081146014e-31, 1.0584280085207152e-34, 5.0788581009248294e-21] 
Saved model: results/lstm/cluster_1/lstm_cluster_1

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.80it/s]


SYNC Weights:[7.924514589052297e-19, 0.0004311635119544665, 1.7658124600775445e-19, 2.2058879388614888e-16, 0.04069161498347618, 2.2327670362986613e-05, 0.020227328363894608, 0.004461210222870256, 0.005137382396674044, 0.03639063153069308, 4.653793187598196e-19, 0.08433060316159387, 1.075606222856567e-07, 1.1576696833042373e-05, 8.690539238597374e-08, 1.97512446015445e-09, 0.0035959407853862944, 0.009261292476193086, 0.000163140894338393, 0.5329973161347767, 0.015623507378734951, 0.0031612898299647747, 0.23991447228629703, 0.0005073466801002085, 7.44205241444562e-11, 0.0030716531503229188] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_4_kr_norm.pt
\Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.75it/s]


SYNC Weights:[0.0036115785366338834, 0.023331907832179245, 1.8275907936880575e-08, 6.897076470558818e-07, 0.07537895836061188, 0.0003000174104592894, 2.225795067162786e-10, 1.1746290600880533e-11, 0.0006951746821500421, 2.586702177131851e-07, 1.3434938787698699e-06, 1.4178167112207678e-11, 0.005783777987636702, 2.223314893255007e-07, 1.0706427424234618e-06, 1.2105678804045182e-06, 2.337608775207908e-05, 2.8899512248830115e-06, 1.308539942399909e-22, 8.59453748152465e-16, 0.0003390551138263548, 1.289255350739423e-09, 5.747540194097301e-06, 9.836028188172803e-09, 3.3022885896412827e-09, 1.450727730322193e-10, 5.7230671517214225e-09, 0.0017091361148604675, 2.040971335904041e-06, 1.6899405490332147e-09, 5.16338486289055e-17, 6.75010093467738e-10, 1.8360009836155178e-06, 0.027013818769209483, 1.3021955557254594e-05, 1.0755726142939438e-06, 2.8216421250773246e-19, 1.0777415646712325e-08, 6.866861396670026e-09, 6.378027896633545e-12, 0.018246170303080282, 0.00016876808393464674, 9.08118998234

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.81it/s]


SYNC Weights:[3.4324048347652514e-06, 0.0004194547520184463, 4.043715272660517e-12, 0.00039816096176564976, 0.020056542383612222, 0.00024520320550920963, 0.00044725547292944135, 9.13071586521889e-05, 6.380615628680972e-07, 0.0013569374662890069, 1.8277094010005652e-08, 3.630514799355498e-06, 0.0030235168918381775, 2.1861057908320332e-16, 4.909309821548892e-05, 1.349518540244016e-07, 0.02553723155872791, 9.140837608185599e-07, 7.971543161204873e-16, 0.0010972399353603912, 3.670154217964478e-11, 0.5099944352255973, 9.852117281749498e-16, 2.163788043221721e-13, 0.039320363722963686, 0.01672542175880599, 4.902119206208485e-12, 0.006689178067148095, 0.0005944486348702101, 1.3999052710927175e-05, 1.1934511256849557e-06, 3.7840467906478344e-05, 4.506490802355511e-06, 1.4851881950792024e-11, 0.00024599376099228244, 0.0003646408151754971, 2.1983141075652903e-11, 0.0027789213998504546, 0.0006112646945248903, 0.012521118919269896, 0.044459806165302505, 1.8066536238692244e-12, 0.000294048444419107

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


SYNC Weights:[1.2896689201631631e-14, 5.550425313869651e-21, 3.7394249397608476e-05, 1.009870130193444e-17, 1.9375150655795862e-41, 2.5441881000920746e-09, 9.911805454557208e-10, 0.4465426520368932, 4.921899096502289e-06, 2.2770207184123816e-28, 8.635925162667331e-16, 1.2262906691774857e-14, 7.41388890176054e-22, 6.158512962148e-06, 3.5688565444081345e-13, 1.846895093845564e-19, 3.6520139422290456e-13, 5.749451895212838e-13, 1.815015734557538e-07, 6.390384967039325e-12, 7.407983367373007e-18, 8.252750122139169e-10, 2.7244099217489237e-07, 4.209873517183364e-08, 1.4876629170764851e-15, 0.00014647167459047822, 0.5532550494351863, 1.5181129809038699e-30, 1.7600161793960243e-66, 6.603523028055114e-17, 4.461947411205637e-09, 3.7996491982114825e-59, 1.6972218912458288e-13, 3.397899341958706e-06, 1.5852454497729972e-16, 3.4774147467624384e-39, 3.4432370717451597e-06, 4.874887786692807e-15, 6.306586512262207e-10, 2.0469164414815823e-11] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.76it/s]


SYNC Weights:[5.0391018383980976e-24, 3.883374266765043e-08, 3.522061580685893e-28, 0.006279009779893937, 6.188103014684122e-16, 9.176305154358382e-20, 0.009869519804404156, 1.8423622709374665e-05, 3.727821143287729e-17, 6.670065903496241e-14, 1.8425654903001643e-24, 3.0274390011731625e-38, 5.101293504292447e-05, 1.9574095806920249e-13, 5.886532744171802e-18, 3.726343709731995e-15, 0.9834009015553387, 8.166936563110122e-11, 5.049451181169719e-65, 4.9401980302819314e-20, 1.8075118622594027e-43, 5.230797993420357e-17, 1.0217552487677333e-11, 0.00038108354270535293, 1.4349407016771138e-16, 1.5062828089118388e-51] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_5_kr_norm.pt
\Round 6/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:44<00:00,  1.78it/s]


SYNC Weights:[1.876284539501318e-05, 2.7197996024549677e-10, 0.0005870280750087651, 3.1844832658066923e-11, 1.7657302504740936e-05, 7.389261704009959e-11, 1.5059995472723991e-07, 1.0517511763955104e-10, 0.0009244002615638036, 3.4810599089650806e-07, 7.891267209113233e-08, 5.439973108022466e-08, 1.6833248401705447e-17, 2.4571686589628134e-11, 6.543622343385109e-08, 0.003920800372728773, 1.3643587868537745e-09, 0.00994667884038642, 0.0020896198933846866, 4.0251122048056153e-10, 4.742773014452618e-19, 2.6880068282423876e-10, 0.0002969026006540483, 0.010354305102847993, 8.751866596745962e-19, 1.063310898986083e-05, 8.857615191189129e-19, 1.5640664758847543e-08, 0.005813187020511499, 2.0246468505074527e-07, 3.1196773602934465e-06, 1.2804570791517207e-06, 6.840743774616058e-11, 2.945637202805621e-05, 9.767701097903166e-06, 1.5422429584575288e-09, 1.1980061678081564e-09, 2.6157331510648e-10, 1.0674469962598148e-07, 3.052524560079378e-07, 0.04044218716461023, 1.2499077217178315e-05, 8.43532843

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.78it/s]


SYNC Weights:[0.003239287853991067, 2.4738442324221234e-06, 9.967116478952816e-12, 3.125226262442332e-05, 2.550206144796052e-09, 1.0086342580929771e-08, 2.1616302731420283e-27, 0.002438544139355496, 7.800435645566724e-24, 1.4801772650090689e-08, 2.4970961913154435e-07, 8.365622575946066e-24, 3.7630223895572994e-11, 3.064699528123794e-10, 4.897449171165864e-08, 7.901722522023232e-05, 4.774152816643893e-07, 1.1363831059168467e-06, 6.454045647385171e-09, 1.2575311178174557e-23, 0.0023919364742881685, 2.6681200766090704e-06, 2.6521278633293053e-05, 0.09041832871999093, 0.03250275735195298, 0.0023494037822541156, 1.1884203486587062e-08, 5.09351320774463e-22, 9.738223595085859e-13, 2.1208493938110624e-06, 3.804390857173682e-05, 0.027394146303907317, 6.401658388395584e-07, 4.203050855380067e-06, 5.092673209952227e-25, 2.0865234502255074e-09, 7.824890522855146e-18, 1.5770186030529189e-50, 2.3286736493279573e-08, 1.1034247279743879e-07, 4.3839990863753173e-07, 1.1790010032273222e-14, 1.28023244

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.79it/s]


SYNC Weights:[0.00016526013996476802, 0.3075992828759909, 3.0076220328330157e-05, 1.3504993540359715e-05, 6.292121830616403e-05, 0.0017012381442235993, 2.8010410639924873e-14, 0.019063795773301324, 2.201701543939147e-06, 2.345616068257858e-20, 1.2457681703849995e-06, 0.014772735374968315, 0.0011811539669098815, 1.7002875591518892e-31, 0.1225381706345691, 0.00018947937267529914, 1.6615487701354093e-05, 0.0011957662942310712, 0.005229894172393495, 0.11056100130433577, 4.30708317668257e-13, 0.0005750111109523577, 3.068080274683006e-14, 1.0316050169455328e-12, 0.012649190771589552, 1.6887190872125782e-05, 0.21989237036235015, 0.0011188440504362738, 0.0038144508137840445, 3.852990934317097e-13, 0.014386264706498699, 0.06082776772905518, 0.0003368525309857574, 0.004750875357872457, 0.008905898997533515, 0.04190227644805139, 0.0008766470297800574, 0.029718855741202657, 0.014866005777415325, 0.001037454860567308] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_6_kr_norm.pt
 Processin

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.73it/s]


SYNC Weights:[1.4703023101754698e-31, 2.086623898626014e-09, 1.1595563197117013e-170, 2.740745836358487e-35, 4.042774485710099e-52, 4.854049450370923e-155, 2.226203693508455e-11, 3.50724236639778e-25, 1.676459112605204e-99, 8.684528535237921e-23, 1.5664303602722198e-31, 1.4102609596891999e-22, 0.9999954350375123, 8.138907841297406e-38, 7.558395236120818e-34, 2.3191321209559445e-22, 4.552853647536414e-06, 9.160885640362897e-20, 5.992611622018047e-23, 3.5908835257554227e-41, 6.644514387728095e-26, 1.843726847710775e-23, 3.247659480607447e-46, 4.66507470379381e-27, 1.6044254233281743e-28, 5.673045085099248e-25] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_6_kr_norm.pt
\Round 7/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


SYNC Weights:[0.000355159761966427, 0.0005929199737812936, 2.3367366831018344e-08, 3.6481575420459634e-06, 0.004616541137086879, 1.4006280272151224e-07, 6.307725273236874e-05, 0.004828103256869573, 4.10710936161601e-06, 0.007649787431446878, 0.0008857538527665934, 0.00018427929875858515, 2.346590165885707e-05, 0.010260672906501747, 1.8819221392016727e-08, 4.088604948797553e-06, 0.0006034109139473145, 0.0011065094781254504, 0.2891396386330112, 0.0009798393026062666, 0.0022685859187815227, 5.0127194205512304e-06, 0.00024252085949132233, 0.004462256451478426, 3.280593545850422e-05, 2.373545532405725e-06, 0.04094637998701831, 0.3500824466770666, 0.019659045601626082, 4.729541029829973e-17, 0.0006879413513651207, 0.005100768624478987, 0.003710700016888442, 5.802201865113398e-06, 0.005499813513047515, 7.282629807339802e-26, 3.132731357296559e-29, 0.0009471284218052772, 0.006010301350843763, 6.898175787704291e-07, 0.0011815538757812508, 0.0004788748720335368, 0.021146889871716458, 0.006730910

Training other: 100%|██████████| 64/64 [00:35<00:00,  1.78it/s]


SYNC Weights:[9.613314098069852e-22, 9.859035165163924e-07, 4.1139197099288317e-16, 1.764068723299241e-06, 4.857401000218517e-15, 1.7738217663350408e-12, 5.635992810536248e-09, 3.5307583602073517e-13, 1.2964832040447411e-05, 4.243370214134921e-09, 7.276566226053827e-11, 2.9811792386566764e-10, 2.1043927471941825e-24, 8.374831030556027e-56, 4.9401419233359546e-17, 3.059128761709927e-17, 1.085417190524302e-07, 9.513283282157808e-09, 6.001151332267265e-15, 0.999673064172484, 4.203742852154061e-18, 6.671873693624007e-11, 9.59113781433615e-21, 2.831270167945415e-10, 1.277093827504404e-16, 2.1387151288763433e-10, 3.2167065755962923e-11, 1.0386679479333177e-15, 9.74425446236411e-11, 3.1931538781129385e-07, 1.0676729610476497e-12, 1.7744363413109372e-55, 2.394620198329568e-08, 4.467014210046181e-12, 3.615113113227549e-62, 4.810430342804658e-11, 4.706103781700004e-12, 6.716455878562314e-07, 3.368732190443979e-05, 6.797310995588536e-61, 1.7151708959548e-13, 5.633724894767645e-15, 9.5549916437254

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.80it/s]


SYNC Weights:[9.794536905989684e-12, 8.511875570002192e-67, 2.1604340199894716e-17, 1.8777180522373073e-14, 1.8061364759986322e-12, 2.345816675680161e-69, 3.4192006062171703e-08, 6.084039753880571e-10, 8.781974331676087e-11, 1.5405467117529525e-08, 7.599949406269134e-05, 3.1726169283408506e-09, 1.4026523936197587e-15, 6.051110485443361e-11, 1.813440614917242e-25, 3.184641538634922e-17, 2.4636675046330004e-13, 1.303098922207823e-16, 2.9841901095428175e-10, 8.350036704182802e-10, 8.088585906263236e-13, 7.895379775437461e-08, 7.996996548071082e-12, 8.588359849150273e-24, 7.266013261603082e-08, 9.018155945524927e-12, 9.889528726019526e-11, 1.9864966642109107e-18, 1.0916961047314085e-14, 2.6429220358697336e-69, 1.3268136452542911e-09, 4.489686624301992e-15, 1.6191567134850523e-11, 1.8575871672731225e-72, 1.5363609463104702e-10, 1.4952003436539196e-70, 4.290506792691105e-14, 0.9999237826072351, 3.6299638582180796e-72, 1.5688421514093373e-23] 
Saved model: results/lstm/cluster_1/lstm_cluster_

Training cluster_2: 100%|██████████| 26/26 [00:15<00:00,  1.71it/s]


SYNC Weights:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9999999900000002, 0.0, 0.0] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_7_kr_norm.pt
\Round 8/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.77it/s]


SYNC Weights:[9.769252566509687e-05, 0.0009118711541355924, 0.016555803613449487, 0.004492855424642452, 0.0017749738581360186, 0.028292934017885057, 0.021750775626197066, 0.043452360375773734, 0.019275749441229666, 0.0007120342419867444, 0.03782583860946122, 0.0224025190223156, 0.03333394870429971, 0.009522475185470594, 2.06506977507969e-05, 0.025897861026499255, 0.015727271634731352, 0.007939444844836486, 0.00807735745761352, 0.003540733071503744, 0.009857762144428689, 0.011763439972488096, 0.005335571167127941, 2.041749062589013e-11, 4.642134092598966e-08, 0.024088396761207317, 0.0009414318980711556, 2.9152147805543834e-20, 2.3357352914262542e-06, 0.005141579826429978, 0.010477194034495145, 0.005375471517280458, 0.0016968349859976248, 0.040196530105454924, 0.007063115523243186, 0.025104134953461782, 0.0002392792793626677, 0.008158362237045937, 2.2883535584791067e-07, 0.0005870739383186053, 0.004026202880641361, 2.2361098859785983e-08, 0.000853446965211471, 0.00034686487950404994, 0.0

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.74it/s]


SYNC Weights:[1.3288260276653644e-12, 3.819254015135646e-13, 1.3595540447848768e-06, 0.0005272288933099369, 4.085265034494914e-19, 2.0168647496571608e-19, 4.170591295520206e-12, 1.1747080223677403e-21, 9.802283019849971e-22, 4.5679409177451133e-32, 1.778878203971488e-17, 1.2279929592528782e-09, 2.1080489491119256e-05, 2.6263577964942385e-23, 2.5489744084534435e-11, 1.5184808782355782e-14, 4.077869491495287e-27, 1.947855129989231e-09, 1.7199783032007705e-24, 2.4713306097280024e-14, 7.169820193612201e-07, 3.377525311469276e-09, 1.5126567367131124e-10, 5.921663830378265e-06, 2.777949139337848e-19, 7.371801094682166e-18, 3.511552181154302e-16, 9.861615245079972e-18, 2.8264728935356944e-13, 6.072592917111227e-10, 1.1913230513115793e-05, 6.994120041000837e-11, 3.002354919759847e-11, 4.549620426434151e-12, 2.2637333738538078e-10, 3.867938191502849e-10, 5.871156820965296e-07, 3.3310376097609204e-07, 2.1216296512492156e-11, 1.4605519059815947e-08, 1.2735808523786992e-07, 2.7789594139605976e-23,

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.78it/s]


SYNC Weights:[7.150873004355437e-21, 1.4894352143514076e-22, 4.473609241020146e-63, 5.449999715778572e-10, 0.0031922341940500217, 1.5835906015555987e-31, 1.8573991697426499e-09, 1.715866574890252e-11, 4.8920850542962425e-15, 1.217111204839422e-07, 1.4241499061392182e-10, 0.0004018795826989434, 7.886522603496386e-42, 1.602881512431532e-06, 1.2994766757279439e-11, 1.0721096491182672e-12, 2.57404650744867e-06, 7.425128628717752e-05, 5.036488005375366e-12, 2.433665792773345e-92, 0.16598906892042975, 6.226826454456335e-91, 0.000235940802661382, 1.923734241981892e-33, 1.159115976524799e-10, 9.585117847635656e-15, 5.071551249949014e-12, 0.32330071834774443, 5.958626699717646e-10, 9.525558717987384e-08, 1.5090705067147445e-09, 0.4983588664610006, 4.2776305217728616e-98, 0.008407889784737287, 3.47375280481723e-05, 1.375347363782848e-11, 6.756778016550114e-12, 7.535164083018006e-09, 1.1185979423229402e-13, 1.8512329160164853e-09] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_8_kr_nor

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.74it/s]


SYNC Weights:[1.6049680304201125e-32, 0.0017869314604786613, 1.4791458360570712e-18, 1.5563633712456214e-38, 4.881440170312725e-36, 1.811045395127479e-21, 3.7822229521780704e-24, 9.677904005701608e-216, 1.0553119000040361e-18, 1.4982179650721755e-16, 1.530767780121497e-06, 2.431824526174103e-13, 1.0437661647789119e-23, 1.736334239221423e-21, 1.549726936483016e-17, 8.190648493259779e-19, 1.842535482497856e-15, 5.500955163534096e-20, 1.5240514766410432e-26, 3.4971549542582787e-10, 3.604328883287518e-23, 0.9982115274396655, 1.3907463078297855e-17, 2.0323461249764228e-36, 3.6172520699944026e-21, 9.79470070018422e-25] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_8_kr_norm.pt
\Round 9/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


SYNC Weights:[1.5244145033577993e-07, 0.03825929701871654, 0.029867072066521918, 0.014030230217823535, 0.02575360314394582, 0.011888753044332465, 0.02670957816823613, 0.013711847922957446, 0.009262823418505747, 0.0007817993490402119, 1.8275544916704967e-05, 0.006788579536153131, 0.00933422780703982, 0.016648627831494095, 0.023960771377447763, 0.004894627670742642, 0.006854803834140041, 0.012132568138220938, 0.029701067482993435, 0.018293929780107153, 0.01923282796886081, 0.013162643089354893, 0.012392473443372604, 0.004078048117402965, 0.002336063240674498, 0.016740840808936865, 0.009289707093980085, 0.018227834536102362, 0.0032176768414747866, 0.010440459360946897, 0.02326124565568647, 0.012361348810958734, 0.00853549416127963, 0.030253289108337193, 0.017517401027253025, 2.594552570911497e-07, 0.00821432152393952, 0.022462662337850114, 0.010828935226145038, 5.533186593955479e-12, 0.00841856411450475, 0.018904197523515437, 0.011576349137247796, 0.004964845897720604, 0.01135084383590288

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.76it/s]


SYNC Weights:[0.00015835367439643935, 0.0008112202549797017, 0.0009955623355766725, 6.893554651796546e-06, 0.002700881568445189, 9.16658166907867e-05, 0.00824452242846419, 0.0008542081929025597, 0.00023568625993040904, 0.12771390324770193, 0.022654070521865296, 1.3186573248739904e-06, 2.1192499243703574e-07, 0.0022601483752199784, 0.020588843114521943, 0.0025290409196853754, 0.0031119319128331715, 0.004636478302743936, 0.004952727771023697, 4.051114418250976e-06, 8.908479715454863e-06, 0.000687444504093499, 1.969869001203815e-20, 0.00021707013087150404, 2.031448273841186e-20, 0.05612446785852018, 6.402923682947029e-39, 0.004595341076043877, 3.347317608122369e-19, 0.0004928262235357548, 0.0007768013138669694, 2.1808057388234918e-19, 3.828401243464297e-05, 0.00012771685085688337, 4.122757568351839e-05, 0.010005169621018828, 1.182888382795791e-19, 2.6537418747081655e-06, 0.0005071971379155735, 0.004335885289106992, 0.005831996596932776, 0.08253252006935412, 8.47150805112612e-21, 4.0223939

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


SYNC Weights:[0.0031327849809199824, 0.0010964943824883129, 9.98964572724057e-24, 0.04435815644425842, 0.0035536328130637306, 4.022796557120239e-07, 0.07262873738779005, 0.015208730054346023, 0.02615366578195295, 8.872526180079964e-09, 0.029663372443889925, 0.0010088262357126267, 0.008165370366506386, 9.121356682665161e-48, 5.020589476064664e-24, 0.013714559517762067, 7.31889286185313e-18, 1.5251741122754042e-24, 0.012084398807435129, 0.1736939026939311, 0.021219460369352384, 0.055231112267428266, 0.011445439063745245, 0.0004405154633156292, 0.003683169567699242, 6.71464444013131e-07, 0.05958228517104477, 0.02262424468633731, 0.037922786901264015, 0.0021812091929741804, 0.091501152071605, 0.0939517768933718, 0.000212017395878113, 0.0010515645320420485, 0.0032911736059114067, 0.14794960351322473, 0.0010000916652869042, 0.0003706289531996446, 0.034718670565842144, 0.00715938185685561] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_9_kr_norm.pt
 Processing cluster_2 with 179 cl

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.76it/s]


SYNC Weights:[1.0747920132661257e-29, 2.0067908631650854e-11, 9.082045211504637e-31, 1.0139360217438281e-10, 7.040982785485408e-08, 4.326673899493259e-11, 1.7636643717730437e-31, 3.6440223048914024e-48, 4.954692046378651e-16, 3.9419570949899613e-13, 8.329170883671299e-08, 1.4043378022356941e-19, 2.2721961364756264e-40, 1.0512516897923599e-11, 2.56371954451975e-10, 9.495273910697818e-153, 3.1788711198507657e-09, 7.490299748219234e-12, 0.9999740664596817, 1.1316200929207541e-21, 2.767462715462843e-17, 2.550778510697796e-05, 3.861136796847337e-220, 2.584355653286915e-07, 9.418927975519024e-18, 2.6965308706818407e-25] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_9_kr_norm.pt
\Round 10/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.75it/s]


SYNC Weights:[0.0274708180237924, 0.01054737176457349, 0.011219374115668465, 2.973704261185511e-07, 0.010368162883137902, 0.007858136710581986, 0.012754629444046423, 0.013065251383295257, 1.2541996949896569e-11, 0.03486587278723386, 0.00656772315244483, 0.0037801260988116624, 0.01709284058409319, 0.01875777119710352, 4.4746961297866255e-07, 0.022277696417602074, 0.012833469251911483, 0.0015396279797010028, 0.007536326724768026, 0.015950550079915245, 0.022521235510847105, 0.019924002132350183, 0.025964741015214395, 0.01664316393984506, 0.015014643083268876, 0.022370933035898172, 0.009861904808920446, 3.597338082547816e-07, 0.01537636618370163, 0.028066754086003256, 9.300587631165878e-09, 0.00922635520604748, 0.012220802766113991, 0.021112349065510523, 0.01299991886395765, 0.008740335536861953, 0.001621742559124206, 0.01526396322565081, 0.013855631029508127, 0.004593219428121075, 3.8014709595846905e-07, 0.016841491455066485, 0.009881074302504986, 4.528988833118467e-12, 0.0173245877451860

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.76it/s]


SYNC Weights:[5.213096615879112e-07, 0.03307434832211056, 3.11489219810528e-05, 1.0282199759160774e-05, 3.1433470687883493e-28, 0.005211894434143563, 6.741281635044958e-42, 1.0877361503454726e-06, 8.976811943949202e-06, 0.00028240426780276406, 2.424333013266151e-06, 5.999366653403432e-12, 0.00014216954813130463, 0.0015152462852957062, 8.197105380216535e-18, 3.5555378899433374e-08, 8.371678947502305e-28, 0.00033348079429265507, 6.871734315060009e-40, 2.5066702591255014e-06, 0.00021158340647817154, 0.001360489808196827, 3.422988898390975e-07, 3.816998838956359e-06, 0.004965403649239909, 6.39439781328665e-27, 0.0005966527467828649, 0.6257490961425657, 0.0016376817791456558, 5.2797547279797077e-08, 0.0007933207608721376, 0.0006010740226429468, 0.000525819356030473, 3.3812784937834876e-07, 0.011048183648539508, 0.0011504983685110656, 6.276679023643827e-05, 7.371890062418344e-10, 2.282112582534662e-28, 1.1541928203044206e-26, 0.001423030120481795, 5.720680386541172e-05, 1.8299534975772526e-2

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.74it/s]


SYNC Weights:[0.06936716397465932, 3.886857801914165e-79, 2.905533735354056e-75, 8.74580765882328e-06, 0.013932361094312349, 0.017493596663767284, 3.2284275698233966e-08, 7.9484324146943875e-22, 5.833870479490605e-78, 0.004760609214771058, 9.83933995651476e-06, 2.6666467622951793e-05, 0.00011186574765839514, 5.615756642687614e-07, 2.5056294814062257e-17, 8.878553356762081e-06, 1.4122089643675397e-05, 3.326448071194607e-06, 0.00014452023762964752, 8.983129271894254e-08, 1.864511239133545e-08, 0.00011531284623222765, 6.967826437672197e-07, 2.6050110144611346e-10, 6.579992153224386e-08, 1.3850660655704008e-07, 3.4580221922318444e-07, 7.707458705653031e-21, 0.0034176327912569904, 0.06130065860048616, 1.4024685252995572e-06, 2.469902889995742e-77, 1.064487655896009e-13, 2.3708603371193746e-07, 2.705712856454463e-08, 1.3360561691043366e-06, 9.625569885079237e-05, 5.057352381890272e-06, 0.8291784132270332, 1.3396666044299147e-08] 
Saved model: results/lstm/cluster_1/lstm_cluster_1_round_10_kr

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.78it/s]


SYNC Weights:[0.2047221882276828, 0.02478835714391418, 0.011993601010726206, 0.008673624216555206, 0.00758012432128339, 0.007485884424726557, 0.12210332895329189, 0.09994657300759732, 0.020800971837867943, 9.595650027297712e-23, 0.04953406482216942, 8.647624418412357e-07, 0.11074121542244494, 4.2730885040163905e-17, 4.849410374640951e-48, 0.06510938068281363, 0.044960084096214555, 0.0005782736688324472, 0.08818659952065014, 3.869753359234674e-05, 0.010142044574407337, 0.09292292148246967, 0.028730703738978243, 0.0003885422891101316, 3.429105066999832e-06, 0.0005685231099408211] 
Saved model: results/lstm/cluster_2/lstm_cluster_2_round_10_kr_norm.pt
Starting experiments for model: gru
\Round 1/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


SYNC Weights:[0.02298681065440178, 0.00015860056737437844, 0.004489279817789793, 0.007596696726977825, 0.0002749465929809958, 0.010987086221575737, 0.006184590980410576, 0.012236553244292736, 0.013955758884549141, 0.017609713599085808, 0.015744561329483986, 0.01643647439777851, 0.01523723267018795, 0.010016319341957569, 0.0030217624735087156, 0.020732155069708824, 0.015958908945322037, 0.009993139654397964, 0.014833493158221245, 0.01722734235227108, 0.0009547802619636059, 0.01225161924958229, 0.017422081902623177, 0.016025684773921967, 0.011290466412901878, 0.019496120512485504, 0.007311878260225058, 0.010573968291282654, 0.001023990334942937, 0.0015920560108497739, 0.020032593980431557, 0.015033279545605183, 0.02028472349047661, 0.014591703191399574, 0.01479724608361721, 0.014196078293025494, 0.022121066227555275, 0.007923822849988937, 0.009152588434517384, 5.776746911578812e-05, 0.0072640241123735905, 0.0041080559603869915, 0.011804776266217232, 0.01973649673163891, 0.003559138625860

Training other: 100%|██████████| 64/64 [00:37<00:00,  1.73it/s]


SYNC Weights:[0.007329103071242571, 0.004489659331738949, 0.00017598099657334387, 0.03490704298019409, 0.012955659069120884, 0.04461120814085007, 0.009394476190209389, 0.013583706691861153, 0.009805993176996708, 0.009250194765627384, 0.013930865563452244, 0.002642575418576598, 0.021708428859710693, 0.014157671481370926, 0.021007079631090164, 0.01679585874080658, 0.0025746638420969248, 0.004218836314976215, 0.0036713159643113613, 1.6667750969645567e-05, 0.002413596725091338, 0.014839735813438892, 0.009775703772902489, 0.04009859636425972, 0.007169324439018965, 0.00931684672832489, 0.0015753464540466666, 0.04038064554333687, 0.018730884417891502, 0.05048125237226486, 0.0460929311811924, 0.0002992035879287869, 0.013469302095472813, 0.008243810385465622, 0.0101089496165514, 0.016132239252328873, 0.014687278307974339, 5.704087016056292e-07, 0.010000327602028847, 0.00406549172475934, 0.04739401862025261, 0.0013215483631938696, 0.00858102273195982, 0.04004543274641037, 0.00011344051745254546,

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


SYNC Weights:[0.08286959677934647, 0.008072319440543652, 0.07070609927177429, 8.86442358023487e-05, 0.014442050829529762, 4.0526234101889713e-07, 0.02674274705350399, 0.048545729368925095, 0.01764874905347824, 9.43264240049757e-06, 0.06319446116685867, 0.026111681014299393, 0.0003036884299945086, 0.07553574442863464, 5.183236498851329e-05, 2.6419753051953876e-09, 2.480595640008687e-07, 0.0713081955909729, 0.010530192404985428, 0.003200277453288436, 0.05515296384692192, 0.0038108990993350744, 0.004904693458229303, 3.210906651673895e-09, 0.06859401613473892, 0.04642276465892792, 0.008398983627557755, 0.008648974820971489, 0.0573258213698864, 0.006606647279113531, 0.010002847760915756, 0.014301379211246967, 0.044117990881204605, 0.04364694654941559, 0.0866878479719162, 0.00483859283849597, 0.004296382889151573, 0.004681376740336418, 2.7757268981076777e-06, 0.008196073584258556] 
Saved model: results/gru/cluster_1/gru_cluster_1_round_1_kr_norm.pt
 Processing cluster_2 with 179 clients
Samp

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.75it/s]


SYNC Weights:[0.07028742879629135, 0.06501562148332596, 0.06809645891189575, 0.027538644149899483, 0.03077019937336445, 0.019193340092897415, 0.063536636531353, 0.07045262306928635, 0.06955909729003906, 0.06225583329796791, 0.019799334928393364, 0.009895936585962772, 0.04461360350251198, 0.01619780994951725, 0.01777016744017601, 0.02710195630788803, 0.00030602002516388893, 0.04362880811095238, 0.02717437781393528, 0.03227395936846733, 0.04368653893470764, 4.318400169722736e-05, 0.0127869863063097, 0.040016256272792816, 0.07451674342155457, 0.04348241165280342] 
Saved model: results/gru/cluster_2/gru_cluster_2_round_1_kr_norm.pt
\Round 2/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.74it/s]


SYNC Weights:[0.00327815646651486, 0.009779904089468465, 0.029733652648187888, 0.02146854210462036, 0.009982695846772598, 9.515979883116774e-06, 0.004193121653918597, 0.018024762282362713, 0.01858990803500022, 0.02853961600688935, 0.010442360762568626, 0.007840057893628731, 0.02279701561263233, 0.013708151679665359, 0.025933313624584745, 0.018119740720051607, 0.0004428991351766008, 0.009571537120239426, 0.015271187630380429, 0.021429898304910457, 0.008692049291826444, 0.01737394205694162, 0.011324872990971532, 0.02627170545017262, 0.02388817641806068, 0.014701714016626599, 0.002974746316198525, 0.017911951045444807, 0.019844369790576022, 0.017662682139127927, 0.010437163964094467, 0.001137346837100938, 0.02131745262730609, 0.0022861428412113425, 0.0007060089719554845, 0.0250725245200441, 0.0181494434158489, 0.01414222812714966, 0.017946897015661267, 0.01820285661577942, 0.00025833080248217715, 0.011782586428531234, 0.024230975492886515, 0.02964491172749862, 0.0004684717621735348, 0.006

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.77it/s]


SYNC Weights:[0.03826186778969388, 0.027216718707600108, 0.009158033917168615, 0.035164449683498494, 0.01326065201980083, 0.004421525154164068, 0.018765652816308383, 0.002397345549545445, 0.007624593229565637, 0.004132749384261277, 0.022017075884110122, 0.010469200423816551, 0.002303028881453136, 0.00942924805303178, 0.0061608468652120715, 4.156704247147989e-06, 0.015167579080520015, 0.030168066440331466, 0.016545865838445688, 0.006214274854018703, 0.02927156888098128, 0.027422920384816172, 0.03392257010828923, 0.012414291012146503, 0.03629257141045568, 0.017865678641170047, 0.011938454297535775, 0.03441189347476559, 0.024045951894883875, 0.02521355852662603, 0.020836847192253698, 0.013399596522397219, 0.036000601710220285, 0.006090995938410852, 0.016115670412499086, 0.029122533147541235, 0.02143732186806802, 0.00874466456549359, 0.02675931478775067, 0.02464698298039763, 0.01819378947062014, 0.0001901135828116825, 0.03175394087585831, 0.0016100499908810158, 0.01794354058536509, 0.03665

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


SYNC Weights:[0.13126189903158955, 0.005793404948016389, 0.04184238597437201, 0.0002789179263735844, 0.04262366941434668, 0.11244203319869985, 0.016329436842756916, 0.005291787023994988, 7.659102274846014e-05, 1.4946007958256862e-05, 0.029969106119997837, 0.0796215338072988, 4.590707282665635e-11, 0.04285023617818094, 0.00039307145063849906, 0.0001822701159536858, 0.03657516957110752, 0.011930660913822501, 0.029498584346109812, 0.0006629676538479945, 0.00782948259237686, 0.002827866134266398, 0.01022799962441006, 0.015869675476264868, 0.002025867895386899, 0.0769590490280495, 0.0026172706854992282, 0.027694118791868984, 0.005502171339228651, 4.647213874525121e-05, 0.01450524022538643, 0.005414996798959256, 0.12379528680313849, 0.01802369898450648, 0.008315080438646313, 0.04496302700548342, 1.0520221885935658e-05, 0.003789825265000235, 6.4361571674107494e-12, 0.041943677638120215] 
Saved model: results/gru/cluster_1/gru_cluster_1_round_2_kr_norm.pt
 Processing cluster_2 with 179 clients

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.81it/s]


SYNC Weights:[4.634433970551156e-06, 3.610103783969167e-14, 8.883068712461089e-13, 0.00026054146861718156, 1.9605487154698786e-21, 0.9539828726861272, 0.001332704881674555, 3.564354845022326e-16, 3.4304478744336387e-12, 2.3359380444314244e-17, 0.006658952930219284, 7.808272204927821e-10, 6.390659088112543e-18, 1.1536138135885593e-05, 6.732934771150177e-14, 1.807376397046128e-09, 0.024216177871621093, 9.841758948925406e-21, 9.639614205158934e-35, 1.2898185781041876e-10, 4.3449292002899e-29, 1.1072849293042428e-06, 0.00015161585829162749, 2.8000230936561376e-07, 1.015303383491603e-17, 0.013379564182667362] 
Saved model: results/gru/cluster_2/gru_cluster_2_round_2_kr_norm.pt
\Round 3/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


SYNC Weights:[0.01089012525529706, 0.006458798292424528, 0.02590155588555827, 0.000982113222386417, 0.0010272094420806, 0.012282216509506099, 0.004271902364373585, 0.022613441801337224, 0.009804631100475266, 0.017636820839398847, 0.024505844293674025, 0.00308542236934405, 0.011707898113322904, 0.02779272748832448, 0.002626853860985836, 0.026650742858537305, 0.029827558332422628, 0.019482143895854554, 0.005417708757296053, 0.000550825813064287, 0.02447572032323057, 0.026924638358575146, 0.004910592535022054, 0.010166257727689332, 1.4430968238450867e-06, 0.0055291043550384535, 0.0012043863862907043, 0.0019712445307664837, 0.004310881535262795, 0.01333548086237276, 8.149447410381249e-05, 0.03744489130154524, 0.006976271728541349, 1.4309866332252907e-05, 0.038750480204029256, 0.016502430183571233, 0.0029540960020705814, 0.030018667385739904, 0.03922515213538123, 0.016001708031239785, 0.025434469473427715, 0.024437645837221307, 0.004314493594062507, 0.02369513309354538, 0.006193436545648563

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.74it/s]


SYNC Weights:[0.0048061632655667375, 6.8486118780585725e-22, 1.1585301215056e-05, 0.19904696287382717, 5.272794402385542e-09, 0.00016775944650708363, 7.08924005228437e-08, 8.207777190892949e-10, 0.002387791814924401, 9.837533138359994e-07, 2.9404261368716507e-07, 8.961816728229881e-05, 3.571076407560694e-05, 0.06827022424657943, 0.012356105408325952, 4.238486807197601e-13, 3.623270694539822e-10, 0.10804340220314168, 0.1592665377943179, 2.7061215975044706e-11, 7.028016447175046e-06, 1.4318401439406922e-06, 1.1870155250581628e-07, 7.227342685232277e-05, 1.533793428646506e-05, 6.199264228513159e-05, 0.0003396227556816766, 2.0662579091976307e-06, 2.2861058626943978e-05, 1.2639610308640596e-14, 4.3774033686862095e-05, 1.107585978194389e-20, 1.4217237636031098e-07, 2.796116628128415e-06, 7.293833599351467e-07, 2.3145626067395825e-08, 4.974397475655004e-07, 6.300271037228742e-08, 3.001215578526299e-11, 4.988369117654017e-08, 4.741879960152097e-09, 0.1573439757609907, 0.0002978431704994599, 0.

Training cluster_1: 100%|██████████| 40/40 [00:23<00:00,  1.71it/s]


SYNC Weights:[0.2780498348227976, 2.9921307167803177e-05, 2.118757398532109e-15, 1.8320222711662212e-10, 3.5400348103517426e-10, 2.8564094493302897e-09, 0.012072109045876482, 0.07803271905257221, 3.22173263223759e-05, 0.19854732833178113, 0.025451589960722523, 2.3106541867240096e-15, 0.00040533771144983955, 2.0615565785413912e-11, 2.660048701898514e-06, 5.097400882504742e-05, 1.4885177283422096e-05, 0.0018793905884240615, 0.27148853711481086, 3.9154190507911616e-05, 3.624588928091289e-11, 0.030888367563655834, 6.0902427891386623e-05, 6.999536004422084e-06, 0.047057021248715976, 3.0240432723239973e-10, 4.931297954713708e-19, 0.0013492901398064988, 0.00041622633516811116, 9.98750842743712e-12, 0.027580999835429727, 0.00010889137259071753, 0.0035196469212231555, 2.6554407406960434e-07, 0.002421205954431097, 0.0006762821533455482, 0.0004244865616524971, 0.0002218793055430379, 9.736564118896381e-08, 0.019170772504212334] 
Saved model: results/gru/cluster_1/gru_cluster_1_round_3_kr_norm.pt
 

Training cluster_2: 100%|██████████| 26/26 [00:15<00:00,  1.73it/s]


SYNC Weights:[0.00011388347692446912, 0.015159493873580595, 0.0007674920393875842, 0.05237895453212171, 4.422762106417307e-06, 7.24008845883013e-10, 1.1533747142227756e-05, 1.2953165357270427e-05, 2.42790005937043e-05, 4.9159329343900406e-05, 2.06254130739497e-06, 0.0001521252216983466, 3.297673997095157e-10, 0.09991540183419934, 3.559399536690558e-19, 2.178353845575824e-08, 2.086931720920337e-22, 0.23393182458387063, 0.3480936396622655, 0.020349259315671785, 7.362745394697381e-09, 4.375882206522225e-19, 0.0342751676944125, 0.15971207363749898, 0.03458225823953652, 0.00046398166198468867] 
Saved model: results/gru/cluster_2/gru_cluster_2_round_3_kr_norm.pt
\Round 4/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0: 100%|██████████| 80/80 [00:46<00:00,  1.73it/s]


SYNC Weights:[0.02291755358651784, 4.742629542401768e-05, 8.444587995409171e-05, 0.007140847402401427, 4.4799756072412813e-07, 1.574270189777558e-06, 0.0006146731119080289, 4.198735863547668e-07, 0.002276974695027244, 0.0218458936645115, 0.004326851461118609, 0.017179627467345204, 0.00576968605997732, 8.605908260459355e-06, 3.2226548970685093e-07, 0.002041225919946854, 0.0004921981296961806, 0.032680326375194926, 3.792433444747782e-05, 0.04624942314833293, 0.013312988954892347, 0.0034492695715573757, 0.005510518945943844, 0.021700441554174443, 0.002235259061352313, 0.027739091219289348, 0.0025757274835883585, 0.001829468095115252, 0.0043066879681775624, 1.0324824186341774e-06, 0.007222266387593681, 0.0003839007952656567, 0.0017057002329000893, 5.76608207463362e-05, 1.0405114137230743e-05, 0.11864236955247791, 6.422465793614286e-06, 0.02695577213062468, 0.000186080991620861, 0.002492291751219023, 0.016266295463361065, 2.2370730274927617e-08, 0.032154029993358924, 0.006041811086466599, 0

Training other: 100%|██████████| 64/64 [00:36<00:00,  1.76it/s]


SYNC Weights:[0.0015350520091775156, 0.0018273094566708592, 0.0046788589936195245, 0.00303654348993193, 0.006231960395920066, 0.007483093676663472, 0.03259290557861305, 0.009097404118825578, 0.01235370834183865, 5.12404474296929e-06, 0.005640001182855856, 0.012679614236536397, 0.004325131148752181, 0.00525370869995341, 0.002878263652759263, 0.0405547219545553, 0.015025989450034376, 0.0007375151971398329, 0.02538819994159658, 0.06556374987084676, 0.0023902430781531955, 0.0009403322511453965, 0.0002781286706660476, 0.04443457035416806, 1.6161978801474794e-05, 0.028082261088601637, 0.052488052799759564, 0.05944281562691961, 0.009321689092536774, 1.1605231235702283e-05, 0.030600237352722735, 0.03874433737294929, 0.009932193786333391, 0.0001856752879041716, 0.0018437428957606239, 0.01868856571874571, 0.026615668395923317, 0.007511965028261713, 0.03914291883015527, 0.04318359670483984, 0.037166553178038284, 0.00048031109633351525, 0.0036439543476664144, 4.621223666624478e-05, 0.0043452158873

Training cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.75it/s]


SYNC Weights:[0.014699674648086965, 1.2582828521198813e-05, 0.005710630953538031, 0.0033764451547953073, 0.017942139873202076, 0.02504463671239415, 1.686642816380579e-06, 0.00022320883287946835, 0.004667675165671961, 0.09030122976228626, 0.035755672242407624, 3.565133092615024e-05, 0.029339260513795238, 0.023507350320063063, 0.033774325282017396, 0.008941449912009156, 0.019018252616502175, 0.020708851440195984, 0.06288285927977907, 0.0330256393156277, 0.05824745733733166, 0.04660567300817488, 0.019136486750623755, 9.209454891748386e-05, 0.029975622930677744, 0.011862491619755135, 0.008600741231225307, 0.017212682929970396, 0.12140029734910307, 0.004977366248882371, 2.1251402384250566e-06, 0.021833262255366373, 0.024432688008199088, 0.02474519205273004, 0.024642845037726402, 0.057534864629546725, 0.03536397393604041, 5.6922613866958825e-06, 0.0032554282490209457, 0.061103790433564766] 
Saved model: results/gru/cluster_1/gru_cluster_1_round_4_kr_norm.pt
 Processing cluster_2 with 179 cli

Training cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.75it/s]


SYNC Weights:[8.175430601066606e-105, 2.908272360250939e-163, 6.369173081823506e-151, 3.4030753823157673e-74, 2.4022690517037835e-55, 3.1303208597061415e-40, 5.595839008732146e-134, 2.9262235613824243e-134, 1.0279549655462605e-129, 2.415450556482585e-129, 2.204440335747829e-75, 1.361276129976628e-134, 1.4743179206554549e-174, 7.143436488861361e-142, 1.127590788455467e-75, 2.547726016629489e-133, 4.217097925158844e-146, 3.804723652910579e-158, 0.9999999900000002, 1.2071680837615078e-56, 3.409698392356127e-96, 5.321674145677865e-103, 5.067189627587751e-209, 1.3015043786000759e-101, 7.876063959569107e-267, 4.243257671402249e-201] 
Saved model: results/gru/cluster_2/gru_cluster_2_round_4_kr_norm.pt
\Round 5/10
 Processing cluster_0 with 537 clients
Sampling 80 Clients


Training cluster_0:  76%|███████▋  | 61/80 [00:35<00:10,  1.78it/s]

### Clustered

### Kuramoto FedAvg

In [23]:


# Config
# List of models to experiment with
MODEL_NAMES = ["lstm", "gru", "moe_lstm", "moe_gru"]

# Config
NUM_CLIENTS = 1500
CLIENT_FRAC = 0.15
NUM_ROUNDS = 10
LOCAL_EPOCHS = 10
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_FILE ="train_cleaned_reindex.feather" # "meter_0_data_cleaned.feather"


### Diff-Aware Fed Avg

In [12]:
class TimeSeriesDifficultyWeight:
    def __init__(self, num_clients, accumulate_iters=20):
        self.num_clients = num_clients
        self.last_loss = torch.ones(num_clients).float().to(DEVICE)
        self.learn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.unlearn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.ema_difficulty = torch.ones(num_clients).float().to(DEVICE)
        self.accumulate_iters = accumulate_iters

    def update(self, cid: int, loss_history: List[float]) -> float:
        """
        Update difficulty based on loss trend for a client.
        Expects a list of per-epoch losses.
        """
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32).to(DEVICE)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=current_loss.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=current_loss.device))

        # EMA update
        momentum = (self.accumulate_iters - 1) / self.accumulate_iters
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        # Difficulty score
        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio #torch.pow(diff_ratio, 1 / 5)

        # Smooth difficulty over rounds
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty

        self.last_loss[cid] = current_loss
        return self.ema_difficulty[cid].item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        weights = [self.ema_difficulty[cid].item() for cid in client_ids]
        total = sum(weights)
        if total == 0:
            return [1.0 / len(client_ids)] * len(client_ids)
        return [w / total for w in weights]


In [15]:
1/np.array([0.1,0.2,2])

array([10. ,  5. ,  0.5])

In [18]:
MODEL_NAMES = ['gru','lstm']

In [19]:
for model_name in MODEL_NAMES:
    difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS,accumulate_iters=10)
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        sampled_clients = random.sample(range(NUM_CLIENTS), int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []
        difficulty_scores = []

        for cid in tqdm(sampled_clients):
            model = model_fn(model_name).to(DEVICE)
            set_weights(model, global_weights)
            train_loader, _ = load_energy_data_feather(cid, filepath=DATA_FILE)
            updated_weights, loss_history = train_model(
                model, train_loader,
                device=DEVICE, learning_rate=LR,
                loss_fn=None, optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

            # Update difficulty
            difficulty = difficulty_tracker.update(cid, loss_history)
            difficulty_scores.append(difficulty)

        # Normalize difficulty scores
        normalized_weights = difficulty_tracker.get_normalized_weights(sampled_clients)

        # Difficulty-aware weighted aggregation
        global_weights = average_weights(local_weights, client_weights=normalized_weights)
        set_weights(global_model, global_weights)

        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_diff_lr2.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: gru


100%|██████████| 211/211 [01:35<00:00,  2.21it/s]


Saved global model to results/gru/gru_round_1_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_2_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_3_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_4_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_5_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_6_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_7_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_8_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_9_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_10_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_11_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_12_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_13_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_14_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_15_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_16_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_17_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_18_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_19_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_20_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_21_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_22_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_23_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_24_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_25_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_26_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.27it/s]


Saved global model to results/gru/gru_round_27_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_28_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_29_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_30_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_31_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_32_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_33_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_34_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.22it/s]


Saved global model to results/gru/gru_round_35_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_36_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_37_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_38_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_39_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_40_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_41_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_42_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_43_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_44_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_45_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_46_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_47_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_48_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_49_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_50_diff_lr2.pt
Starting experiment with model: lstm


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_1_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_2_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_3_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_4_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/lstm/lstm_round_5_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_6_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_7_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_8_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_9_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_10_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_11_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_12_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_13_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_14_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_15_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_16_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.22it/s]


Saved global model to results/lstm/lstm_round_17_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_18_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_19_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_20_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_21_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_22_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_23_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_24_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_25_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_26_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_27_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_28_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_29_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_30_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/lstm/lstm_round_31_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_32_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_33_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_34_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_35_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_36_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_37_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_38_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_39_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_40_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_41_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_42_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_43_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_44_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_45_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_46_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/lstm/lstm_round_47_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_48_diff_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/lstm/lstm_round_49_diff_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]

Saved global model to results/lstm/lstm_round_50_diff_lr2.pt





In [None]:
class TimeSeriesDifficultyWeightRev:
    def __init__(self, num_clients, accumulate_iters=20):
        self.num_clients = num_clients
        self.last_loss = torch.ones(num_clients).float().to(DEVICE)
        self.learn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.unlearn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.ema_difficulty = torch.ones(num_clients).float().to(DEVICE)
        self.accumulate_iters = accumulate_iters

    def update(self, cid: int, loss_history: List[float]) -> float:
        """
        Update difficulty based on loss trend for a client.
        Expects a list of per-epoch losses.
        """
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32).to(DEVICE)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=current_loss.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=current_loss.device))

        # EMA update
        momentum = (self.accumulate_iters - 1) / self.accumulate_iters
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        # Difficulty score
        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio #torch.pow(diff_ratio, 1 / 5)

        # Smooth difficulty over rounds
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty

        self.last_loss[cid] = current_loss
        return self.ema_difficulty[cid].item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        weights = [1/self.ema_difficulty[cid].item() for cid in client_ids]
        total = sum(weights)
        if total == 0:
            return [1.0 / len(client_ids)] * len(client_ids)
        return [w / total for w in weights]


In [20]:
for model_name in MODEL_NAMES:
    difficulty_tracker = TimeSeriesDifficultyWeightRev(num_clients=NUM_CLIENTS,accumulate_iters=20)
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        sampled_clients = random.sample(range(NUM_CLIENTS), int(CLIENT_FRAC * NUM_CLIENTS))
        local_weights = []
        difficulty_scores = []

        for cid in tqdm(sampled_clients):
            model = model_fn(model_name).to(DEVICE)
            set_weights(model, global_weights)
            train_loader, _ = load_energy_data_feather(cid, filepath=DATA_FILE)
            updated_weights, loss_history = train_model(
                model, train_loader,
                device=DEVICE, learning_rate=LR,
                loss_fn=None, optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            local_weights.append(updated_weights)

            # Update difficulty
            difficulty = difficulty_tracker.update(cid, loss_history)
            difficulty_scores.append(difficulty)

        # Normalize difficulty scores
        normalized_weights = difficulty_tracker.get_normalized_weights(sampled_clients)

        # Difficulty-aware weighted aggregation
        global_weights = average_weights(local_weights, client_weights=normalized_weights)
        set_weights(global_model, global_weights)

        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_diff_rev_lr2.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: gru


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_1_diff_rev_lr2.pt


100%|██████████| 211/211 [01:35<00:00,  2.21it/s]


Saved global model to results/gru/gru_round_2_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.22it/s]


Saved global model to results/gru/gru_round_3_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_4_diff_rev_lr2.pt


100%|██████████| 211/211 [02:00<00:00,  1.76it/s]


Saved global model to results/gru/gru_round_5_diff_rev_lr2.pt


100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_6_diff_rev_lr2.pt


100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_7_diff_rev_lr2.pt


100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_8_diff_rev_lr2.pt


100%|██████████| 211/211 [02:05<00:00,  1.69it/s]


Saved global model to results/gru/gru_round_9_diff_rev_lr2.pt


100%|██████████| 211/211 [02:02<00:00,  1.72it/s]


Saved global model to results/gru/gru_round_10_diff_rev_lr2.pt


100%|██████████| 211/211 [02:01<00:00,  1.73it/s]


Saved global model to results/gru/gru_round_11_diff_rev_lr2.pt


100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_12_diff_rev_lr2.pt


100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/gru/gru_round_13_diff_rev_lr2.pt


100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_14_diff_rev_lr2.pt


100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_15_diff_rev_lr2.pt


100%|██████████| 211/211 [02:05<00:00,  1.68it/s]


Saved global model to results/gru/gru_round_16_diff_rev_lr2.pt


100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/gru/gru_round_17_diff_rev_lr2.pt


100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/gru/gru_round_18_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_19_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_20_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_21_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_22_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_23_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_24_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_25_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_26_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_27_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_28_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_29_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_30_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_31_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_32_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_33_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_34_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_35_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_36_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_37_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_38_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_39_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_40_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/gru/gru_round_41_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.26it/s]


Saved global model to results/gru/gru_round_42_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.24it/s]


Saved global model to results/gru/gru_round_43_diff_rev_lr2.pt


100%|██████████| 211/211 [01:32<00:00,  2.27it/s]


Saved global model to results/gru/gru_round_44_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_45_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_46_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.27it/s]


Saved global model to results/gru/gru_round_47_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.27it/s]


Saved global model to results/gru/gru_round_48_diff_rev_lr2.pt


100%|██████████| 211/211 [01:33<00:00,  2.25it/s]


Saved global model to results/gru/gru_round_49_diff_rev_lr2.pt


100%|██████████| 211/211 [01:35<00:00,  2.21it/s]


Saved global model to results/gru/gru_round_50_diff_rev_lr2.pt
Starting experiment with model: lstm


100%|██████████| 211/211 [01:36<00:00,  2.18it/s]


Saved global model to results/lstm/lstm_round_1_diff_rev_lr2.pt


100%|██████████| 211/211 [01:38<00:00,  2.15it/s]


Saved global model to results/lstm/lstm_round_2_diff_rev_lr2.pt


100%|██████████| 211/211 [01:39<00:00,  2.12it/s]


Saved global model to results/lstm/lstm_round_3_diff_rev_lr2.pt


100%|██████████| 211/211 [01:34<00:00,  2.23it/s]


Saved global model to results/lstm/lstm_round_4_diff_rev_lr2.pt


100%|██████████| 211/211 [01:39<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_5_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_6_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_7_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_8_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_9_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_10_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_11_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_12_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_13_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_14_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_15_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_16_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_17_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_18_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_19_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_20_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_21_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_22_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_23_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_24_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_25_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_26_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_27_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_28_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_29_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_30_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_31_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_32_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_33_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_34_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_35_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_36_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_37_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.03it/s]


Saved global model to results/lstm/lstm_round_38_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_39_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_40_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/lstm/lstm_round_41_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_42_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_43_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_44_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_45_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_46_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_47_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/lstm/lstm_round_48_diff_rev_lr2.pt


100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/lstm/lstm_round_49_diff_rev_lr2.pt


100%|██████████| 211/211 [01:42<00:00,  2.06it/s]

Saved global model to results/lstm/lstm_round_50_diff_rev_lr2.pt





In [26]:
for model_name in MODEL_NAMES:
    difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)
    print(f"Starting experiment with model: {model_name}")
    # Initialize cluster-specific models and difficulty trackers
    cluster_models = {}
    cluster_weights = {}
    cluster_difficulty = {}

    for cluster_name in CLUSTERS:
        model = model_fn(model_name).to(DEVICE)
        weights = get_weights(model)
        difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)

        cluster_models[cluster_name] = model
        cluster_weights[cluster_name] = weights
        cluster_difficulty[cluster_name] = difficulty_tracker

    # Training loop
    for rnd in range(NUM_ROUNDS):
        print(f"\nRound {rnd + 1}/{NUM_ROUNDS}")

        for cluster_name, client_ids in CLUSTERS.items():
            print(f"Training {cluster_name} with {len(client_ids)} clients")

            # Sample a subset of clients in this cluster
            sampled_clients = random.sample(client_ids, k=int(CLIENT_FRAC * len(client_ids)))
            local_weights = []

            difficulty_tracker = cluster_difficulty[cluster_name]
            weights_before = cluster_weights[cluster_name]
            model_template = cluster_models[cluster_name]

            for cid in tqdm(sampled_clients, desc=f"Cluster {cluster_name}"):
                local_model = model_fn(model_name).to(DEVICE)
                set_weights(local_model, weights_before)

                train_loader, _ = load_energy_data_feather(cid, filepath=DATA_FILE)
                updated_weights, loss_history = train_model(
                    local_model, train_loader,
                    device=DEVICE, learning_rate=LR,
                    loss_fn=None, optimizer_class=optim.Adam,
                    epochs=LOCAL_EPOCHS
                )
                local_weights.append(updated_weights)

                # Update difficulty tracking
                difficulty_tracker.update(cid, loss_history)

            # Get difficulty-based normalized weights
            normalized_weights = difficulty_tracker.get_normalized_weights(sampled_clients)

            # Weighted aggregation
            new_weights = average_weights(local_weights, client_weights=normalized_weights)

            # Update cluster model
            set_weights(model_template, new_weights)
            cluster_weights[cluster_name] = new_weights

            # Save checkpoint
            ckpt_dir = os.path.join("results", model_name, cluster_name)
            os.makedirs(ckpt_dir, exist_ok=True)
            ckpt_path = os.path.join(ckpt_dir, f"{model_name}_{cluster_name}_round_{rnd + 1}_diff.pt")
            torch.save(model_template.state_dict(), ckpt_path)
            print(f"Saved model to {ckpt_path}")


Starting experiment with model: lstm

Round 1/10
Training cluster_0 with 537 clients


Cluster cluster_0:   0%|          | 0/80 [00:00<?, ?it/s]

Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.14it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_1_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s]


Saved model to results/lstm/other/lstm_other_round_1_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.09it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_1_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.05it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_1_diff.pt

Round 2/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.19it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_2_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s]


Saved model to results/lstm/other/lstm_other_round_2_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.07it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_2_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.21it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_2_diff.pt

Round 3/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.13it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_3_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s]


Saved model to results/lstm/other/lstm_other_round_3_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.17it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_3_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.08it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_3_diff.pt

Round 4/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.10it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_4_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.14it/s]


Saved model to results/lstm/other/lstm_other_round_4_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.12it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_4_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.12it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_4_diff.pt

Round 5/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:26<00:00,  3.00it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_5_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.06it/s]


Saved model to results/lstm/other/lstm_other_round_5_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.17it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_5_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.14it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_5_diff.pt

Round 6/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.18it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_6_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.14it/s]


Saved model to results/lstm/other/lstm_other_round_6_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.05it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_6_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.06it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_6_diff.pt

Round 7/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.13it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_7_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s]


Saved model to results/lstm/other/lstm_other_round_7_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.15it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_7_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.09it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_7_diff.pt

Round 8/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.11it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_8_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s]


Saved model to results/lstm/other/lstm_other_round_8_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.12it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_8_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.10it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_8_diff.pt

Round 9/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.12it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_9_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s]


Saved model to results/lstm/other/lstm_other_round_9_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.21it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_9_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.15it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_9_diff.pt

Round 10/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.18it/s]


Saved model to results/lstm/cluster_0/lstm_cluster_0_round_10_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.17it/s]


Saved model to results/lstm/other/lstm_other_round_10_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.20it/s]


Saved model to results/lstm/cluster_1/lstm_cluster_1_round_10_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.02it/s]


Saved model to results/lstm/cluster_2/lstm_cluster_2_round_10_diff.pt
Starting experiment with model: gru

Round 1/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.11it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_1_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.08it/s]


Saved model to results/gru/other/gru_other_round_1_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.13it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_1_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.10it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_1_diff.pt

Round 2/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:25<00:00,  3.09it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_2_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s]


Saved model to results/gru/other/gru_other_round_2_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.04it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_2_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.02it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_2_diff.pt

Round 3/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:27<00:00,  2.90it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_3_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:53<00:00,  1.21it/s]


Saved model to results/gru/other/gru_other_round_3_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:32<00:00,  1.24it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_3_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:20<00:00,  1.29it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_3_diff.pt

Round 4/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [01:06<00:00,  1.21it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_4_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:51<00:00,  1.24it/s]


Saved model to results/gru/other/gru_other_round_4_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:28<00:00,  1.42it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_4_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:17<00:00,  1.52it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_4_diff.pt

Round 5/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:45<00:00,  1.76it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_5_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:32<00:00,  2.00it/s]


Saved model to results/gru/other/gru_other_round_5_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:20<00:00,  1.96it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_5_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:13<00:00,  1.96it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_5_diff.pt

Round 6/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:36<00:00,  2.17it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_6_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:26<00:00,  2.41it/s]


Saved model to results/gru/other/gru_other_round_6_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.59it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_6_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  2.90it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_6_diff.pt

Round 7/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:26<00:00,  2.98it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_7_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:21<00:00,  2.94it/s]


Saved model to results/gru/other/gru_other_round_7_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.05it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_7_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  2.93it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_7_diff.pt

Round 8/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:26<00:00,  3.05it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_8_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.07it/s]


Saved model to results/gru/other/gru_other_round_8_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.01it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_8_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  2.97it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_8_diff.pt

Round 9/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:27<00:00,  2.96it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_9_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.07it/s]


Saved model to results/gru/other/gru_other_round_9_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:13<00:00,  3.02it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_9_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.38it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_9_diff.pt

Round 10/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:30<00:00,  2.62it/s]


Saved model to results/gru/cluster_0/gru_cluster_0_round_10_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s]


Saved model to results/gru/other/gru_other_round_10_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:12<00:00,  3.22it/s]


Saved model to results/gru/cluster_1/gru_cluster_1_round_10_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:08<00:00,  3.07it/s]


Saved model to results/gru/cluster_2/gru_cluster_2_round_10_diff.pt
Starting experiment with model: moe_lstm

Round 1/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:33<00:00,  2.40it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_1_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.47it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_1_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.52it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_1_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:12<00:00,  2.07it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_1_diff.pt

Round 2/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:43<00:00,  1.86it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_2_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:34<00:00,  1.85it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_2_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.41it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_2_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:11<00:00,  2.30it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_2_diff.pt

Round 3/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:33<00:00,  2.36it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_3_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.50it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_3_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.49it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_3_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.43it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_3_diff.pt

Round 4/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:32<00:00,  2.50it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_4_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.54it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_4_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.55it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_4_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.45it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_4_diff.pt

Round 5/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:32<00:00,  2.49it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_5_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:27<00:00,  2.32it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_5_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.78it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_5_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.79it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_5_diff.pt

Round 6/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.94it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_6_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.54it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_6_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.39it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_6_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.43it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_6_diff.pt

Round 7/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:33<00:00,  2.36it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_7_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:26<00:00,  2.45it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_7_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.46it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_7_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.48it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_7_diff.pt

Round 8/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.54it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_8_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:26<00:00,  2.46it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_8_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:17<00:00,  2.32it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_8_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.43it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_8_diff.pt

Round 9/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:33<00:00,  2.40it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_9_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.56it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_9_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.58it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_9_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.53it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_9_diff.pt

Round 10/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:32<00:00,  2.47it/s]


Saved model to results/moe_lstm/cluster_0/moe_lstm_cluster_0_round_10_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.48it/s]


Saved model to results/moe_lstm/other/moe_lstm_other_round_10_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.43it/s]


Saved model to results/moe_lstm/cluster_1/moe_lstm_cluster_1_round_10_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.50it/s]


Saved model to results/moe_lstm/cluster_2/moe_lstm_cluster_2_round_10_diff.pt
Starting experiment with model: moe_gru

Round 1/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:34<00:00,  2.35it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_1_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:27<00:00,  2.36it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_1_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.37it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_1_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.51it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_1_diff.pt

Round 2/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:32<00:00,  2.49it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_2_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:31<00:00,  2.06it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_2_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:21<00:00,  1.86it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_2_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.75it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_2_diff.pt

Round 3/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:38<00:00,  2.06it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_3_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:30<00:00,  2.12it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_3_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_3_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:14<00:00,  1.81it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_3_diff.pt

Round 4/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:41<00:00,  1.94it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_4_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:25<00:00,  2.53it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_4_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.59it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_4_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.53it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_4_diff.pt

Round 5/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.57it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_5_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:24<00:00,  2.57it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_5_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.60it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_5_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.56it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_5_diff.pt

Round 6/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.56it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_6_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:24<00:00,  2.66it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_6_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.58it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_6_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.59it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_6_diff.pt

Round 7/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.54it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_7_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:24<00:00,  2.58it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_7_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.51it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_7_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.48it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_7_diff.pt

Round 8/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.53it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_8_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:24<00:00,  2.61it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_8_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_8_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:09<00:00,  2.61it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_8_diff.pt

Round 9/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:31<00:00,  2.52it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_9_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:23<00:00,  2.67it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_9_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.49it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_9_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.44it/s]


Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_9_diff.pt

Round 10/10
Training cluster_0 with 537 clients


Cluster cluster_0: 100%|██████████| 80/80 [00:32<00:00,  2.48it/s]


Saved model to results/moe_gru/cluster_0/moe_gru_cluster_0_round_10_diff.pt
Training other with 428 clients


Cluster other: 100%|██████████| 64/64 [00:24<00:00,  2.65it/s]


Saved model to results/moe_gru/other/moe_gru_other_round_10_diff.pt
Training cluster_1 with 269 clients


Cluster cluster_1: 100%|██████████| 40/40 [00:16<00:00,  2.49it/s]


Saved model to results/moe_gru/cluster_1/moe_gru_cluster_1_round_10_diff.pt
Training cluster_2 with 179 clients


Cluster cluster_2: 100%|██████████| 26/26 [00:10<00:00,  2.48it/s]

Saved model to results/moe_gru/cluster_2/moe_gru_cluster_2_round_10_diff.pt





### Diff-Sync FedAvg