In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
import pandas as pd
from collections import defaultdict
import os
import torch
import torch.optim as optim
from tqdm import tqdm


from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from AggregationStrategy import sync_aggregate,average_weights,sync_aggregate_norm,sync_aggregate_softmax, fedavgm_update

In [3]:
# def average_weights(weights_list):
#     """Averages model weights provided as a list of get_weights outputs."""
#     avg_weights = []
#     num_models = len(weights_list)
#     for layer_weights in zip(*weights_list):
#         avg_layer = np.mean(np.array(layer_weights), axis=0)
#         avg_weights.append(avg_layer)
#     return avg_weights

In [4]:
df = pd.read_feather("train_final.feather")

In [5]:
df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,primary_use,air_temperature
7593144,0,0,2016-05-21 01:00:00,72.221012,Education,25.6
7593145,1,0,2016-05-21 01:00:00,39.611586,Education,25.6
7593146,2,0,2016-05-21 01:00:00,1.920567,Education,25.6
7593147,3,0,2016-05-21 01:00:00,111.532464,Education,25.6
7593148,4,0,2016-05-21 01:00:00,456.734799,Education,25.6


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11712248 entries, 7593144 to 20216099
Data columns (total 6 columns):
 #   Column           Dtype         
---  ------           -----         
 0   building_id      int64         
 1   meter            int64         
 2   timestamp        datetime64[ns]
 3   meter_reading    float64       
 4   primary_use      object        
 5   air_temperature  float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(1)
memory usage: 625.5+ MB


In [7]:

def cluster_buildings_top3_primary_use(df: pd.DataFrame) -> dict:
    """
    Clusters building_ids by top 3 primary_use categories.
    Remaining categories are grouped into 'Other'.

    Args:
        df (pd.DataFrame): Input DataFrame with 'building_id' and 'primary_use'.

    Returns:
        dict: {'cluster_0': [...], 'cluster_1': [...], 'cluster_2': [...], 'other': [...]}
    """
    # Step 1: Get top 3 most common primary_use values
    top3_uses = df['primary_use'].value_counts().nlargest(3).index.tolist()

    # Step 2: Create a mapping of cluster name -> list of building_ids
    clusters = defaultdict(list)

    # Drop duplicate building_id-primary_use pairs to avoid counting duplicates
    unique_buildings = df[['building_id', 'primary_use']].drop_duplicates()

    for _, row in unique_buildings.iterrows():
        bldg_id = row['building_id']
        use = row['primary_use']
        if use == top3_uses[0]:
            clusters['cluster_0'].append(bldg_id)
        elif use == top3_uses[1]:
            clusters['cluster_1'].append(bldg_id)
        elif use == top3_uses[2]:
            clusters['cluster_2'].append(bldg_id)
        else:
            clusters['other'].append(bldg_id)

    return dict(clusters)


In [8]:
top3_uses = df['primary_use'].value_counts().nlargest(3).index.tolist()

In [9]:
top3_uses

['Education', 'Office', 'Entertainment/public assembly']

In [10]:
clusters = cluster_buildings_top3_primary_use(df)

for name, ids in clusters.items():
    print(f"{name}: {len(ids)} buildings {ids[:5]}...")


cluster_0: 537 buildings [0, 1, 2, 3, 4]...
other: 428 buildings [6, 12, 27, 33, 34]...
cluster_1: 269 buildings [9, 15, 17, 19, 21]...
cluster_2: 179 buildings [10, 59, 87, 88, 40]...


In [11]:


# Config
# List of models to experiment with
MODEL_NAMES = ["lstm", "gru", "moe_lstm", "moe_gru"]

# Config
NUM_CLIENTS = 1410
CLIENT_FRAC = 0.15
NUM_ROUNDS = 10
LOCAL_EPOCHS = 10
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_FILE ="train_final.feather" # "meter_0_data_cleaned.feather"


### Diff-Sync FedAvg

In [11]:
def sigmoid_rampup(current: int, rampup_length: int, max_value: float = 1.0) -> float:
    """Sigmoid ramp-up function that smoothly increases from 0 to max_value."""
    if rampup_length == 0:
        return max_value
    current = np.clip(current, 0, rampup_length)
    phase = 1.0 - current / rampup_length
    return max_value * float(np.exp(-5.0 * phase * phase))


In [12]:
import torch
import numpy as np
from typing import List, Dict

class DASWeightManager:
    def __init__(self, num_clients, rampup_rounds=20, device="cpu"):
        self.num_clients = num_clients
        self.device = device
        self.last_loss = torch.ones(num_clients, dtype=torch.float32, device=device)
        self.learn_score = torch.zeros(num_clients, dtype=torch.float32, device=device)
        self.unlearn_score = torch.zeros(num_clients, dtype=torch.float32, device=device)
        self.ema_difficulty = torch.ones(num_clients, dtype=torch.float32, device=device)
        self.alignments = torch.ones(num_clients, dtype=torch.float32, device=device)  # cosine similarities
        self.round = 0
        self.rampup_rounds = rampup_rounds

    def sigmoid_rampup(self) -> float:
        return sigmoid_rampup(self.round, self.rampup_rounds)

    def update_difficulty(self, cid: int, loss_history: List[float]):
        """Update difficulty using EMA of learnability/unlearnability."""
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32, device=self.device)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=self.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=self.device))

        momentum = (self.rampup_rounds - 1) / self.rampup_rounds
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty
        self.last_loss[cid] = current_loss

    def update_alignment(self, cid: int, local_update: torch.Tensor, global_update: torch.Tensor):
        """Compute cosine similarity between local and global update direction."""
        cos_sim = torch.nn.functional.cosine_similarity(
            local_update.flatten(), global_update.flatten(), dim=0
        )
        self.alignments[cid] = cos_sim.item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        """Return normalized interpolation between alignment and difficulty."""
        gamma = self.sigmoid_rampup()

        # Normalize alignments
        align_vals = torch.tensor([self.alignments[cid].item() for cid in client_ids], device=self.device)
        align_vals = (align_vals - align_vals.min()) / (align_vals.max() - align_vals.min() + 1e-8)

        # Normalize difficulty
        diff_vals = torch.tensor([self.ema_difficulty[cid].item() for cid in client_ids], device=self.device)
        diff_vals = diff_vals / (diff_vals.sum() + 1e-8)

        combined = (1 - gamma) * align_vals + gamma * diff_vals
        combined = combined / (combined.sum() + 1e-8)
        return combined.cpu().tolist()

    def step_round(self):
        self.round += 1


In [13]:
model_name = 'gru'

In [14]:
def average_weights(weights_list, client_weights):
    return [
        sum(w[i] * client_weights[j] for j, w in enumerate(weights_list))
        for i in range(len(weights_list[0]))
    ]


In [15]:


# Config
# List of models to experiment with
MODEL_NAMES = ["lstm", "gru", "moe_lstm", "moe_gru"]

# Config
NUM_CLIENTS = 1410
CLIENT_FRAC = 0.15
NUM_ROUNDS = 50
LOCAL_EPOCHS = 5
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_FILE ="train_final.feather" # "meter_0_data_cleaned.feather"


In [30]:
# MODEL_NAMES = ['gru','lstm']

In [16]:
das_weight_manager = DASWeightManager(num_clients=NUM_CLIENTS, rampup_rounds=NUM_ROUNDS, device=DEVICE)


In [17]:
for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    # Directory to save checkpoints
    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Init model and weights
    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")
        sampled_clients = random.sample(range(NUM_CLIENTS), k=int(CLIENT_FRAC * NUM_CLIENTS))

        local_weights = []
        local_updates = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, loss_history = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )

            # Store weights
            local_weights.append(updated_weights)

            # Update difficulty score
            das_weight_manager.update_difficulty(cid, loss_history)

            # Compute local update Δw = w_local - w_global
            local_update = []
            for gw, lw in zip(global_weights, updated_weights):
                delta = torch.tensor(lw, device=DEVICE) - torch.tensor(gw, device=DEVICE)
                local_update.append(delta.flatten())
            local_update = torch.cat(local_update)
            local_updates.append((cid, local_update))

        # Compute reference direction (mean update)
        all_updates = torch.stack([update for _, update in local_updates])
        global_update_ref = all_updates.mean(dim=0)

        # Update alignment scores for each client
        for cid, local_update in local_updates:
            das_weight_manager.update_alignment(cid, local_update, global_update_ref)

        # Get DAS weights for aggregation
        das_weights = das_weight_manager.get_normalized_weights(sampled_clients)

        # Aggregate model using DAS weights
        global_weights = average_weights(local_weights, client_weights=das_weights)
        # print(f"SYNC WEIGHTS: {das_weights}")
        set_weights(global_model, global_weights)

        # Save checkpoint
        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_das2.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")

        # Advance to next round
        das_weight_manager.step_round()


Starting experiment with model: lstm
Round 1/50


Training clients:   0%|          | 0/211 [00:00<?, ?it/s]

Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_1_das2.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [01:46<00:00,  1.98it/s]


Saved global model to results/lstm/lstm_round_2_das2.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_3_das2.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.86it/s]


Saved global model to results/lstm/lstm_round_4_das2.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_5_das2.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_6_das2.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [01:51<00:00,  1.90it/s]


Saved global model to results/lstm/lstm_round_7_das2.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [01:50<00:00,  1.90it/s]


Saved global model to results/lstm/lstm_round_8_das2.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_9_das2.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.86it/s]


Saved global model to results/lstm/lstm_round_10_das2.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_11_das2.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_12_das2.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.86it/s]


Saved global model to results/lstm/lstm_round_13_das2.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_14_das2.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_15_das2.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.86it/s]


Saved global model to results/lstm/lstm_round_16_das2.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_17_das2.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_18_das2.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.88it/s]


Saved global model to results/lstm/lstm_round_19_das2.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_20_das2.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_21_das2.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_22_das2.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [01:51<00:00,  1.89it/s]


Saved global model to results/lstm/lstm_round_23_das2.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_24_das2.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [01:52<00:00,  1.87it/s]


Saved global model to results/lstm/lstm_round_25_das2.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [02:01<00:00,  1.74it/s]


Saved global model to results/lstm/lstm_round_26_das2.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/lstm/lstm_round_27_das2.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [02:37<00:00,  1.34it/s]


Saved global model to results/lstm/lstm_round_28_das2.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/lstm/lstm_round_29_das2.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_30_das2.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_31_das2.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/lstm/lstm_round_32_das2.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.83it/s]


Saved global model to results/lstm/lstm_round_33_das2.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.85it/s]


Saved global model to results/lstm/lstm_round_34_das2.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_35_das2.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.83it/s]


Saved global model to results/lstm/lstm_round_36_das2.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_37_das2.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/lstm/lstm_round_38_das2.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_39_das2.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_40_das2.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/lstm/lstm_round_41_das2.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [01:53<00:00,  1.86it/s]


Saved global model to results/lstm/lstm_round_42_das2.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/lstm/lstm_round_43_das2.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/lstm/lstm_round_44_das2.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.82it/s]


Saved global model to results/lstm/lstm_round_45_das2.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/lstm/lstm_round_46_das2.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.71it/s]


Saved global model to results/lstm/lstm_round_47_das2.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/lstm/lstm_round_48_das2.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_49_das2.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_50_das2.pt
Starting experiment with model: gru
Round 1/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_1_das2.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_2_das2.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_3_das2.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/gru/gru_round_4_das2.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/gru/gru_round_5_das2.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_6_das2.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_7_das2.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_8_das2.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_9_das2.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_10_das2.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_11_das2.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_12_das2.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_13_das2.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/gru/gru_round_14_das2.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/gru/gru_round_15_das2.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/gru/gru_round_16_das2.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_17_das2.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.65it/s]


Saved global model to results/gru/gru_round_18_das2.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.66it/s]


Saved global model to results/gru/gru_round_19_das2.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_20_das2.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_21_das2.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_22_das2.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_23_das2.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_24_das2.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_25_das2.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_26_das2.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_27_das2.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/gru/gru_round_28_das2.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_29_das2.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_30_das2.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_31_das2.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.60it/s]


Saved global model to results/gru/gru_round_32_das2.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_33_das2.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/gru/gru_round_34_das2.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_35_das2.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_36_das2.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/gru/gru_round_37_das2.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/gru/gru_round_38_das2.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.60it/s]


Saved global model to results/gru/gru_round_39_das2.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_40_das2.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_41_das2.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_42_das2.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_43_das2.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_44_das2.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.60it/s]


Saved global model to results/gru/gru_round_45_das2.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_46_das2.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.60it/s]


Saved global model to results/gru/gru_round_47_das2.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/gru/gru_round_48_das2.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.71it/s]


Saved global model to results/gru/gru_round_49_das2.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [02:02<00:00,  1.72it/s]


Saved global model to results/gru/gru_round_50_das2.pt
Starting experiment with model: moe_lstm
Round 1/50


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.56it/s]


Saved global model to results/moe_lstm/moe_lstm_round_1_das2.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [02:18<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_2_das2.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.54it/s]


Saved global model to results/moe_lstm/moe_lstm_round_3_das2.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_4_das2.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.54it/s]


Saved global model to results/moe_lstm/moe_lstm_round_5_das2.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_6_das2.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/moe_lstm/moe_lstm_round_7_das2.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_8_das2.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_9_das2.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_lstm/moe_lstm_round_10_das2.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_11_das2.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_12_das2.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_13_das2.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_lstm/moe_lstm_round_14_das2.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_15_das2.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_16_das2.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/moe_lstm/moe_lstm_round_17_das2.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/moe_lstm/moe_lstm_round_18_das2.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/moe_lstm/moe_lstm_round_19_das2.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_20_das2.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_21_das2.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/moe_lstm/moe_lstm_round_22_das2.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_23_das2.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_24_das2.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_25_das2.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_26_das2.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_27_das2.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_28_das2.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_29_das2.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_30_das2.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_31_das2.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_32_das2.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_33_das2.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_34_das2.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/moe_lstm/moe_lstm_round_35_das2.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_36_das2.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_37_das2.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_38_das2.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/moe_lstm/moe_lstm_round_39_das2.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_40_das2.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_41_das2.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_42_das2.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_43_das2.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_44_das2.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_lstm/moe_lstm_round_45_das2.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_46_das2.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_47_das2.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_48_das2.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_49_das2.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_lstm/moe_lstm_round_50_das2.pt
Starting experiment with model: moe_gru
Round 1/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_1_das2.pt
Round 2/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_2_das2.pt
Round 3/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_3_das2.pt
Round 4/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_4_das2.pt
Round 5/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_5_das2.pt
Round 6/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_6_das2.pt
Round 7/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_7_das2.pt
Round 8/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/moe_gru/moe_gru_round_8_das2.pt
Round 9/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_9_das2.pt
Round 10/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/moe_gru/moe_gru_round_10_das2.pt
Round 11/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_11_das2.pt
Round 12/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_12_das2.pt
Round 13/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_13_das2.pt
Round 14/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_14_das2.pt
Round 15/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_15_das2.pt
Round 16/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_16_das2.pt
Round 17/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_17_das2.pt
Round 18/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/moe_gru/moe_gru_round_18_das2.pt
Round 19/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_19_das2.pt
Round 20/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_20_das2.pt
Round 21/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_21_das2.pt
Round 22/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_22_das2.pt
Round 23/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_23_das2.pt
Round 24/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_24_das2.pt
Round 25/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_25_das2.pt
Round 26/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_26_das2.pt
Round 27/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_27_das2.pt
Round 28/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_28_das2.pt
Round 29/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_29_das2.pt
Round 30/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_30_das2.pt
Round 31/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_31_das2.pt
Round 32/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_32_das2.pt
Round 33/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/moe_gru/moe_gru_round_33_das2.pt
Round 34/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_34_das2.pt
Round 35/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_35_das2.pt
Round 36/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_36_das2.pt
Round 37/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_37_das2.pt
Round 38/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_38_das2.pt
Round 39/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_39_das2.pt
Round 40/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_40_das2.pt
Round 41/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_41_das2.pt
Round 42/50


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.77it/s]


Saved global model to results/moe_gru/moe_gru_round_42_das2.pt
Round 43/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_43_das2.pt
Round 44/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_44_das2.pt
Round 45/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_45_das2.pt
Round 46/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_46_das2.pt
Round 47/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_47_das2.pt
Round 48/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_48_das2.pt
Round 49/50


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_49_das2.pt
Round 50/50


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]

Saved global model to results/moe_gru/moe_gru_round_50_das2.pt



