In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
import pandas as pd
from collections import defaultdict
import os
import torch
import torch.optim as optim
from tqdm import tqdm


from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights


In [2]:
from AggregationStrategy import sync_aggregate,average_weights,sync_aggregate_norm,sync_aggregate_softmax, fedavgm_update

In [3]:
df = pd.read_feather("train_final.feather")

In [4]:
df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,primary_use,air_temperature
7593144,0,0,2016-05-21 01:00:00,72.221012,Education,25.6
7593145,1,0,2016-05-21 01:00:00,39.611586,Education,25.6
7593146,2,0,2016-05-21 01:00:00,1.920567,Education,25.6
7593147,3,0,2016-05-21 01:00:00,111.532464,Education,25.6
7593148,4,0,2016-05-21 01:00:00,456.734799,Education,25.6


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11712248 entries, 7593144 to 20216099
Data columns (total 6 columns):
 #   Column           Dtype         
---  ------           -----         
 0   building_id      int64         
 1   meter            int64         
 2   timestamp        datetime64[ns]
 3   meter_reading    float64       
 4   primary_use      object        
 5   air_temperature  float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(1)
memory usage: 625.5+ MB


In [6]:


# Config
# List of models to experiment with
MODEL_NAMES = ["cnn-lstm", "cnn-gru"]

# Config
NUM_CLIENTS = 1410
CLIENT_FRAC = 0.15
NUM_ROUNDS = 50
LOCAL_EPOCHS = 5
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_FILE ="train_final.feather" # "meter_0_data_cleaned.feather"


In [7]:
class TimeSeriesDifficultyWeight:
    def __init__(self, num_clients, accumulate_iters=20):
        self.num_clients = num_clients
        self.last_loss = torch.ones(num_clients).float().to(DEVICE)
        self.learn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.unlearn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.ema_difficulty = torch.ones(num_clients).float().to(DEVICE)
        self.accumulate_iters = accumulate_iters

    def update(self, cid: int, loss_history: List[float]) -> float:
        """
        Update difficulty based on loss trend for a client.
        Expects a list of per-epoch losses.
        """
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32).to(DEVICE)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=current_loss.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=current_loss.device))

        # EMA update
        momentum = (self.accumulate_iters - 1) / self.accumulate_iters
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        # Difficulty score
        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio #torch.pow(diff_ratio, 1 / 5)

        # Smooth difficulty over rounds
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty

        self.last_loss[cid] = current_loss
        return self.ema_difficulty[cid].item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        weights = [self.ema_difficulty[cid].item() for cid in client_ids]
        total = sum(weights)
        if total == 0:
            return [1.0 / len(client_ids)] * len(client_ids)
        return [w / total for w in weights]
    
    def get_sampling_probabilities(self, min_prob=0.05):
        difficulty = self.ema_difficulty
        inv_difficulty = 1.0 / (difficulty + 1e-6)
        inv_difficulty = inv_difficulty / inv_difficulty.sum()
        probs = torch.clamp(inv_difficulty, min=min_prob)
        return (probs / probs.sum()).cpu().numpy()



In [8]:
train_loader, test_loader = load_energy_data_feather(10, filepath=DATA_FILE)

In [9]:
for batch in train_loader:
    print(batch[0].size())
    break

torch.Size([1024, 168, 1])


### FedAvg-diff

In [None]:
# difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")
    difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)

    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")

        # === Difficulty-aware sampling ===
        sampling_probs = difficulty_tracker.get_sampling_probabilities(min_prob=0.05)
        sampled_clients = np.random.choice(
            np.arange(NUM_CLIENTS),
            size=int(CLIENT_FRAC * NUM_CLIENTS),
            replace=False,
            p=sampling_probs
        )
        print(f"Sampled {len(sampled_clients)} clients")

        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, loss_history = train_model(
                local_model, train_loader,
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS
            )
            # local_weights.append(updated_weights)

            # === Update difficulty score ===
            difficulty_tracker.update(cid, loss_history)

            local_weights.append(updated_weights)

        # === FedAvg-style aggregation ===
        global_weights = average_weights(local_weights)
        set_weights(global_model, global_weights)

        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedAvg_diff.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: cnn-lstm
Round 1/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:19<00:00,  1.51it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_1_fedAvg_diff0.pt
Round 2/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:22<00:00,  1.48it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_2_fedAvg_diff0.pt
Round 3/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:30<00:00,  1.40it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_3_fedAvg_diff0.pt
Round 4/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:36<00:00,  1.35it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_4_fedAvg_diff0.pt
Round 5/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:37<00:00,  1.34it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_5_fedAvg_diff0.pt
Round 6/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:35<00:00,  1.36it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_6_fedAvg_diff0.pt
Round 7/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:37<00:00,  1.34it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_7_fedAvg_diff0.pt
Round 8/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:38<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_8_fedAvg_diff0.pt
Round 9/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_9_fedAvg_diff0.pt
Round 10/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_10_fedAvg_diff0.pt
Round 11/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_11_fedAvg_diff0.pt
Round 12/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:38<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_12_fedAvg_diff0.pt
Round 13/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_13_fedAvg_diff0.pt
Round 14/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_14_fedAvg_diff0.pt
Round 15/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_15_fedAvg_diff0.pt
Round 16/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_16_fedAvg_diff0.pt
Round 17/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_17_fedAvg_diff0.pt
Round 18/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_18_fedAvg_diff0.pt
Round 19/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_19_fedAvg_diff0.pt
Round 20/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_20_fedAvg_diff0.pt
Round 21/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:38<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_21_fedAvg_diff0.pt
Round 22/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_22_fedAvg_diff0.pt
Round 23/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_23_fedAvg_diff0.pt
Round 24/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_24_fedAvg_diff0.pt
Round 25/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_25_fedAvg_diff0.pt
Round 26/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_26_fedAvg_diff0.pt
Round 27/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_27_fedAvg_diff0.pt
Round 28/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_28_fedAvg_diff0.pt
Round 29/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_29_fedAvg_diff0.pt
Round 30/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_30_fedAvg_diff0.pt
Round 31/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_31_fedAvg_diff0.pt
Round 32/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:43<00:00,  1.29it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_32_fedAvg_diff0.pt
Round 33/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:43<00:00,  1.29it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_33_fedAvg_diff0.pt
Round 34/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:47<00:00,  1.26it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_34_fedAvg_diff0.pt
Round 35/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:43<00:00,  1.29it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_35_fedAvg_diff0.pt
Round 36/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_36_fedAvg_diff0.pt
Round 37/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.30it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_37_fedAvg_diff0.pt
Round 38/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_38_fedAvg_diff0.pt
Round 39/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_39_fedAvg_diff0.pt
Round 40/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:47<00:00,  1.26it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_40_fedAvg_diff0.pt
Round 41/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:46<00:00,  1.27it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_41_fedAvg_diff0.pt
Round 42/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_42_fedAvg_diff0.pt
Round 43/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:50<00:00,  1.24it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_43_fedAvg_diff0.pt
Round 44/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_44_fedAvg_diff0.pt
Round 45/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_45_fedAvg_diff0.pt
Round 46/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:38<00:00,  1.33it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_46_fedAvg_diff0.pt
Round 47/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_47_fedAvg_diff0.pt
Round 48/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_48_fedAvg_diff0.pt
Round 49/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.31it/s]


Saved global model to results/cnn-lstm/cnn-lstm_round_49_fedAvg_diff0.pt
Round 50/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.31it/s]

Saved global model to results/cnn-lstm/cnn-lstm_round_50_fedAvg_diff0.pt
Starting experiment with model: cnn-gru





ValueError: Unknown model name: cnn-gru

In [9]:
class TimeSeriesDifficultyWeight:
    def __init__(self, num_clients, accumulate_iters=20):
        self.num_clients = num_clients
        self.last_loss = torch.ones(num_clients).float().to(DEVICE)
        self.learn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.unlearn_score = torch.zeros(num_clients).float().to(DEVICE)
        self.ema_difficulty = torch.ones(num_clients).float().to(DEVICE)
        self.accumulate_iters = accumulate_iters

    def update(self, cid: int, loss_history: List[float]) -> float:
        """
        Update difficulty based on loss trend for a client.
        Expects a list of per-epoch losses.
        """
        current_loss = torch.tensor(loss_history[-1], dtype=torch.float32).to(DEVICE)
        previous_loss = self.last_loss[cid]
        delta = current_loss - previous_loss
        ratio = torch.log((current_loss + 1e-8) / (previous_loss + 1e-8))

        learn = torch.where(delta < 0, -delta * ratio, torch.tensor(0.0, device=current_loss.device))
        unlearn = torch.where(delta >= 0, delta * ratio, torch.tensor(0.0, device=current_loss.device))

        # EMA update
        momentum = (self.accumulate_iters - 1) / self.accumulate_iters
        self.learn_score[cid] = momentum * self.learn_score[cid] + (1 - momentum) * learn
        self.unlearn_score[cid] = momentum * self.unlearn_score[cid] + (1 - momentum) * unlearn

        # Difficulty score
        diff_ratio = (self.unlearn_score[cid] + 1e-8) / (self.learn_score[cid] + 1e-8)
        difficulty = diff_ratio #torch.pow(diff_ratio, 1 / 5)

        # Smooth difficulty over rounds
        self.ema_difficulty[cid] = momentum * self.ema_difficulty[cid] + (1 - momentum) * difficulty

        self.last_loss[cid] = current_loss
        return self.ema_difficulty[cid].item()

    def get_normalized_weights(self, client_ids: List[int]) -> List[float]:
        weights = [self.ema_difficulty[cid].item() for cid in client_ids]
        total = sum(weights)
        if total == 0:
            return [1.0 / len(client_ids)] * len(client_ids)
        return [w / total for w in weights]
    
    def get_sampling_probabilities(self, min_prob=0.05):
        difficulty = self.ema_difficulty
        inv_difficulty = 1.0 / (difficulty + 1e-6)
        inv_difficulty = inv_difficulty / inv_difficulty.sum()
        probs = torch.clamp(inv_difficulty, min=min_prob)
        return (probs / probs.sum()).cpu().numpy()



In [44]:
# from my_utils import TimeSeriesDifficultyWeight  # make sure it's imported

difficulty_tracker = TimeSeriesDifficultyWeight(num_clients=NUM_CLIENTS)

for model_name in MODEL_NAMES:
    print(f"Starting experiment with model: {model_name}")

    model_dir = os.path.join("results", model_name)
    os.makedirs(model_dir, exist_ok=True)

    global_model = model_fn(model_name).to(DEVICE)
    global_weights = get_weights(global_model)

    for rnd in range(NUM_ROUNDS):
        print(f"Round {rnd+1}/{NUM_ROUNDS}")

        # === Difficulty-aware sampling ===
        sampling_probs = difficulty_tracker.get_sampling_probabilities(min_prob=0.05)
        sampled_clients = np.random.choice(
            np.arange(NUM_CLIENTS),
            size=int(CLIENT_FRAC * NUM_CLIENTS),
            replace=False,
            p=sampling_probs
        )
        print(f"Sampled {len(sampled_clients)} clients")

        local_weights = []

        for cid in tqdm(sampled_clients, desc="Training clients"):
            local_model = model_fn(model_name).to(DEVICE)
            set_weights(local_model, global_weights)
            train_loader, test_loader = load_energy_data_feather(cid, filepath=DATA_FILE)

            updated_weights, loss_history = train_model_fedprox(
                model=local_model,
                train_loader=train_loader,
                global_weights=[torch.tensor(w).to(DEVICE) for w in global_weights],
                device=DEVICE,
                learning_rate=LR,
                loss_fn=None,
                optimizer_class=optim.Adam,
                epochs=LOCAL_EPOCHS,
                mu=0.01
            )

            # === Update difficulty score ===
            difficulty_tracker.update(cid, loss_history)

            local_weights.append(updated_weights)

        # === FedAvg-style aggregation ===
        global_weights = average_weights(local_weights)
        set_weights(global_model, global_weights)

        checkpoint_path = os.path.join(model_dir, f"{model_name}_round_{rnd+1}_fedProx_diff.pt")
        torch.save(global_model.state_dict(), checkpoint_path)
        print(f"Saved global model to {checkpoint_path}")


Starting experiment with model: lstm
Round 1/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:39<00:00,  2.13it/s]


Saved global model to results/lstm/lstm_round_1_fedProx_diff.pt
Round 2/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_2_fedProx_diff.pt
Round 3/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_3_fedProx_diff.pt
Round 4/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_4_fedProx_diff.pt
Round 5/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_5_fedProx_diff.pt
Round 6/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:38<00:00,  2.13it/s]


Saved global model to results/lstm/lstm_round_6_fedProx_diff.pt
Round 7/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:39<00:00,  2.12it/s]


Saved global model to results/lstm/lstm_round_7_fedProx_diff.pt
Round 8/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.09it/s]


Saved global model to results/lstm/lstm_round_8_fedProx_diff.pt
Round 9/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/lstm/lstm_round_9_fedProx_diff.pt
Round 10/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.10it/s]


Saved global model to results/lstm/lstm_round_10_fedProx_diff.pt
Round 11/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.10it/s]


Saved global model to results/lstm/lstm_round_11_fedProx_diff.pt
Round 12/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/lstm/lstm_round_12_fedProx_diff.pt
Round 13/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/lstm/lstm_round_13_fedProx_diff.pt
Round 14/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.11it/s]


Saved global model to results/lstm/lstm_round_14_fedProx_diff.pt
Round 15/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.10it/s]


Saved global model to results/lstm/lstm_round_15_fedProx_diff.pt
Round 16/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.09it/s]


Saved global model to results/lstm/lstm_round_16_fedProx_diff.pt
Round 17/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.09it/s]


Saved global model to results/lstm/lstm_round_17_fedProx_diff.pt
Round 18/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.09it/s]


Saved global model to results/lstm/lstm_round_18_fedProx_diff.pt
Round 19/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:39<00:00,  2.13it/s]


Saved global model to results/lstm/lstm_round_19_fedProx_diff.pt
Round 20/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.10it/s]


Saved global model to results/lstm/lstm_round_20_fedProx_diff.pt
Round 21/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.10it/s]


Saved global model to results/lstm/lstm_round_21_fedProx_diff.pt
Round 22/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/lstm/lstm_round_22_fedProx_diff.pt
Round 23/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/lstm/lstm_round_23_fedProx_diff.pt
Round 24/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/lstm/lstm_round_24_fedProx_diff.pt
Round 25/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:06<00:00,  1.67it/s]


Saved global model to results/lstm/lstm_round_25_fedProx_diff.pt
Round 26/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_26_fedProx_diff.pt
Round 27/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_27_fedProx_diff.pt
Round 28/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_28_fedProx_diff.pt
Round 29/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_29_fedProx_diff.pt
Round 30/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_30_fedProx_diff.pt
Round 31/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_31_fedProx_diff.pt
Round 32/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/lstm/lstm_round_32_fedProx_diff.pt
Round 33/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/lstm/lstm_round_33_fedProx_diff.pt
Round 34/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/lstm/lstm_round_34_fedProx_diff.pt
Round 35/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.64it/s]


Saved global model to results/lstm/lstm_round_35_fedProx_diff.pt
Round 36/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/lstm/lstm_round_36_fedProx_diff.pt
Round 37/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:09<00:00,  1.63it/s]


Saved global model to results/lstm/lstm_round_37_fedProx_diff.pt
Round 38/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/lstm/lstm_round_38_fedProx_diff.pt
Round 39/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.62it/s]


Saved global model to results/lstm/lstm_round_39_fedProx_diff.pt
Round 40/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.61it/s]


Saved global model to results/lstm/lstm_round_40_fedProx_diff.pt
Round 41/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:10<00:00,  1.61it/s]


Saved global model to results/lstm/lstm_round_41_fedProx_diff.pt
Round 42/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/lstm/lstm_round_42_fedProx_diff.pt
Round 43/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:13<00:00,  1.58it/s]


Saved global model to results/lstm/lstm_round_43_fedProx_diff.pt
Round 44/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:11<00:00,  1.60it/s]


Saved global model to results/lstm/lstm_round_44_fedProx_diff.pt
Round 45/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/lstm/lstm_round_45_fedProx_diff.pt
Round 46/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/lstm/lstm_round_46_fedProx_diff.pt
Round 47/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.65it/s]


Saved global model to results/lstm/lstm_round_47_fedProx_diff.pt
Round 48/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/lstm/lstm_round_48_fedProx_diff.pt
Round 49/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:07<00:00,  1.66it/s]


Saved global model to results/lstm/lstm_round_49_fedProx_diff.pt
Round 50/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:03<00:00,  1.71it/s]


Saved global model to results/lstm/lstm_round_50_fedProx_diff.pt
Starting experiment with model: gru
Round 1/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:59<00:00,  1.76it/s]


Saved global model to results/gru/gru_round_1_fedProx_diff.pt
Round 2/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_2_fedProx_diff.pt
Round 3/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_3_fedProx_diff.pt
Round 4/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.83it/s]


Saved global model to results/gru/gru_round_4_fedProx_diff.pt
Round 5/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_5_fedProx_diff.pt
Round 6/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_6_fedProx_diff.pt
Round 7/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_7_fedProx_diff.pt
Round 8/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.77it/s]


Saved global model to results/gru/gru_round_8_fedProx_diff.pt
Round 9/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_9_fedProx_diff.pt
Round 10/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/gru/gru_round_10_fedProx_diff.pt
Round 11/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_11_fedProx_diff.pt
Round 12/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_12_fedProx_diff.pt
Round 13/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_13_fedProx_diff.pt
Round 14/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_14_fedProx_diff.pt
Round 15/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_15_fedProx_diff.pt
Round 16/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_16_fedProx_diff.pt
Round 17/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_17_fedProx_diff.pt
Round 18/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_18_fedProx_diff.pt
Round 19/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_19_fedProx_diff.pt
Round 20/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_20_fedProx_diff.pt
Round 21/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_21_fedProx_diff.pt
Round 22/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/gru/gru_round_22_fedProx_diff.pt
Round 23/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_23_fedProx_diff.pt
Round 24/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_24_fedProx_diff.pt
Round 25/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_25_fedProx_diff.pt
Round 26/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/gru/gru_round_26_fedProx_diff.pt
Round 27/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/gru/gru_round_27_fedProx_diff.pt
Round 28/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


Saved global model to results/gru/gru_round_28_fedProx_diff.pt
Round 29/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_29_fedProx_diff.pt
Round 30/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.07it/s]


Saved global model to results/gru/gru_round_30_fedProx_diff.pt
Round 31/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_31_fedProx_diff.pt
Round 32/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.09it/s]


Saved global model to results/gru/gru_round_32_fedProx_diff.pt
Round 33/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:40<00:00,  2.09it/s]


Saved global model to results/gru/gru_round_33_fedProx_diff.pt
Round 34/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_34_fedProx_diff.pt
Round 35/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.07it/s]


Saved global model to results/gru/gru_round_35_fedProx_diff.pt
Round 36/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/gru/gru_round_36_fedProx_diff.pt
Round 37/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:44<00:00,  2.02it/s]


Saved global model to results/gru/gru_round_37_fedProx_diff.pt
Round 38/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_38_fedProx_diff.pt
Round 39/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:42<00:00,  2.06it/s]


Saved global model to results/gru/gru_round_39_fedProx_diff.pt
Round 40/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.07it/s]


Saved global model to results/gru/gru_round_40_fedProx_diff.pt
Round 41/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_41_fedProx_diff.pt
Round 42/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_42_fedProx_diff.pt
Round 43/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_43_fedProx_diff.pt
Round 44/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:41<00:00,  2.08it/s]


Saved global model to results/gru/gru_round_44_fedProx_diff.pt
Round 45/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:42<00:00,  2.05it/s]


Saved global model to results/gru/gru_round_45_fedProx_diff.pt
Round 46/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/gru/gru_round_46_fedProx_diff.pt
Round 47/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:43<00:00,  2.04it/s]


Saved global model to results/gru/gru_round_47_fedProx_diff.pt
Round 48/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:42<00:00,  2.07it/s]


Saved global model to results/gru/gru_round_48_fedProx_diff.pt
Round 49/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:04<00:00,  1.70it/s]


Saved global model to results/gru/gru_round_49_fedProx_diff.pt
Round 50/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:08<00:00,  1.64it/s]


Saved global model to results/gru/gru_round_50_fedProx_diff.pt
Starting experiment with model: moe_lstm
Round 1/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_1_fedProx_diff.pt
Round 2/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_2_fedProx_diff.pt
Round 3/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_3_fedProx_diff.pt
Round 4/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_4_fedProx_diff.pt
Round 5/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:26<00:00,  1.44it/s]


Saved global model to results/moe_lstm/moe_lstm_round_5_fedProx_diff.pt
Round 6/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:13<00:00,  1.58it/s]


Saved global model to results/moe_lstm/moe_lstm_round_6_fedProx_diff.pt
Round 7/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:29<00:00,  1.41it/s]


Saved global model to results/moe_lstm/moe_lstm_round_7_fedProx_diff.pt
Round 8/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:26<00:00,  1.44it/s]


Saved global model to results/moe_lstm/moe_lstm_round_8_fedProx_diff.pt
Round 9/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_9_fedProx_diff.pt
Round 10/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_10_fedProx_diff.pt
Round 11/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:28<00:00,  1.42it/s]


Saved global model to results/moe_lstm/moe_lstm_round_11_fedProx_diff.pt
Round 12/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_12_fedProx_diff.pt
Round 13/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:28<00:00,  1.42it/s]


Saved global model to results/moe_lstm/moe_lstm_round_13_fedProx_diff.pt
Round 14/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:27<00:00,  1.43it/s]


Saved global model to results/moe_lstm/moe_lstm_round_14_fedProx_diff.pt
Round 15/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:28<00:00,  1.42it/s]


Saved global model to results/moe_lstm/moe_lstm_round_15_fedProx_diff.pt
Round 16/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:29<00:00,  1.41it/s]


Saved global model to results/moe_lstm/moe_lstm_round_16_fedProx_diff.pt
Round 17/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:29<00:00,  1.41it/s]


Saved global model to results/moe_lstm/moe_lstm_round_17_fedProx_diff.pt
Round 18/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:29<00:00,  1.41it/s]


Saved global model to results/moe_lstm/moe_lstm_round_18_fedProx_diff.pt
Round 19/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:31<00:00,  1.40it/s]


Saved global model to results/moe_lstm/moe_lstm_round_19_fedProx_diff.pt
Round 20/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:31<00:00,  1.39it/s]


Saved global model to results/moe_lstm/moe_lstm_round_20_fedProx_diff.pt
Round 21/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:32<00:00,  1.38it/s]


Saved global model to results/moe_lstm/moe_lstm_round_21_fedProx_diff.pt
Round 22/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:32<00:00,  1.38it/s]


Saved global model to results/moe_lstm/moe_lstm_round_22_fedProx_diff.pt
Round 23/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:31<00:00,  1.39it/s]


Saved global model to results/moe_lstm/moe_lstm_round_23_fedProx_diff.pt
Round 24/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:31<00:00,  1.39it/s]


Saved global model to results/moe_lstm/moe_lstm_round_24_fedProx_diff.pt
Round 25/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:34<00:00,  1.37it/s]


Saved global model to results/moe_lstm/moe_lstm_round_25_fedProx_diff.pt
Round 26/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:33<00:00,  1.37it/s]


Saved global model to results/moe_lstm/moe_lstm_round_26_fedProx_diff.pt
Round 27/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:33<00:00,  1.38it/s]


Saved global model to results/moe_lstm/moe_lstm_round_27_fedProx_diff.pt
Round 28/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:28<00:00,  1.42it/s]


Saved global model to results/moe_lstm/moe_lstm_round_28_fedProx_diff.pt
Round 29/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:26<00:00,  1.44it/s]


Saved global model to results/moe_lstm/moe_lstm_round_29_fedProx_diff.pt
Round 30/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:22<00:00,  1.48it/s]


Saved global model to results/moe_lstm/moe_lstm_round_30_fedProx_diff.pt
Round 31/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:18<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_31_fedProx_diff.pt
Round 32/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/moe_lstm/moe_lstm_round_32_fedProx_diff.pt
Round 33/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/moe_lstm/moe_lstm_round_33_fedProx_diff.pt
Round 34/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.57it/s]


Saved global model to results/moe_lstm/moe_lstm_round_34_fedProx_diff.pt
Round 35/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:13<00:00,  1.58it/s]


Saved global model to results/moe_lstm/moe_lstm_round_35_fedProx_diff.pt
Round 36/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.56it/s]


Saved global model to results/moe_lstm/moe_lstm_round_36_fedProx_diff.pt
Round 37/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.57it/s]


Saved global model to results/moe_lstm/moe_lstm_round_37_fedProx_diff.pt
Round 38/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.56it/s]


Saved global model to results/moe_lstm/moe_lstm_round_38_fedProx_diff.pt
Round 39/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.56it/s]


Saved global model to results/moe_lstm/moe_lstm_round_39_fedProx_diff.pt
Round 40/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.57it/s]


Saved global model to results/moe_lstm/moe_lstm_round_40_fedProx_diff.pt
Round 41/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:13<00:00,  1.58it/s]


Saved global model to results/moe_lstm/moe_lstm_round_41_fedProx_diff.pt
Round 42/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:13<00:00,  1.58it/s]


Saved global model to results/moe_lstm/moe_lstm_round_42_fedProx_diff.pt
Round 43/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.57it/s]


Saved global model to results/moe_lstm/moe_lstm_round_43_fedProx_diff.pt
Round 44/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.56it/s]


Saved global model to results/moe_lstm/moe_lstm_round_44_fedProx_diff.pt
Round 45/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:16<00:00,  1.55it/s]


Saved global model to results/moe_lstm/moe_lstm_round_45_fedProx_diff.pt
Round 46/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:16<00:00,  1.54it/s]


Saved global model to results/moe_lstm/moe_lstm_round_46_fedProx_diff.pt
Round 47/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_47_fedProx_diff.pt
Round 48/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_48_fedProx_diff.pt
Round 49/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:19<00:00,  1.51it/s]


Saved global model to results/moe_lstm/moe_lstm_round_49_fedProx_diff.pt
Round 50/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:17<00:00,  1.53it/s]


Saved global model to results/moe_lstm/moe_lstm_round_50_fedProx_diff.pt
Starting experiment with model: moe_gru
Round 1/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.57it/s]


Saved global model to results/moe_gru/moe_gru_round_1_fedProx_diff.pt
Round 2/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:16<00:00,  1.55it/s]


Saved global model to results/moe_gru/moe_gru_round_2_fedProx_diff.pt
Round 3/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:14<00:00,  1.56it/s]


Saved global model to results/moe_gru/moe_gru_round_3_fedProx_diff.pt
Round 4/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:15<00:00,  1.55it/s]


Saved global model to results/moe_gru/moe_gru_round_4_fedProx_diff.pt
Round 5/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/moe_gru/moe_gru_round_5_fedProx_diff.pt
Round 6/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.82it/s]


Saved global model to results/moe_gru/moe_gru_round_6_fedProx_diff.pt
Round 7/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.83it/s]


Saved global model to results/moe_gru/moe_gru_round_7_fedProx_diff.pt
Round 8/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_8_fedProx_diff.pt
Round 9/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/moe_gru/moe_gru_round_9_fedProx_diff.pt
Round 10/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.85it/s]


Saved global model to results/moe_gru/moe_gru_round_10_fedProx_diff.pt
Round 11/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_11_fedProx_diff.pt
Round 12/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_12_fedProx_diff.pt
Round 13/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_13_fedProx_diff.pt
Round 14/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_14_fedProx_diff.pt
Round 15/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_15_fedProx_diff.pt
Round 16/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.82it/s]


Saved global model to results/moe_gru/moe_gru_round_16_fedProx_diff.pt
Round 17/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.82it/s]


Saved global model to results/moe_gru/moe_gru_round_17_fedProx_diff.pt
Round 18/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_18_fedProx_diff.pt
Round 19/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/moe_gru/moe_gru_round_19_fedProx_diff.pt
Round 20/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:55<00:00,  1.83it/s]


Saved global model to results/moe_gru/moe_gru_round_20_fedProx_diff.pt
Round 21/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/moe_gru/moe_gru_round_21_fedProx_diff.pt
Round 22/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:58<00:00,  1.78it/s]


Saved global model to results/moe_gru/moe_gru_round_22_fedProx_diff.pt
Round 23/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_23_fedProx_diff.pt
Round 24/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/moe_gru/moe_gru_round_24_fedProx_diff.pt
Round 25/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.79it/s]


Saved global model to results/moe_gru/moe_gru_round_25_fedProx_diff.pt
Round 26/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_26_fedProx_diff.pt
Round 27/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:56<00:00,  1.81it/s]


Saved global model to results/moe_gru/moe_gru_round_27_fedProx_diff.pt
Round 28/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:57<00:00,  1.80it/s]


Saved global model to results/moe_gru/moe_gru_round_28_fedProx_diff.pt
Round 29/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [01:54<00:00,  1.84it/s]


Saved global model to results/moe_gru/moe_gru_round_29_fedProx_diff.pt
Round 30/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:12<00:00,  1.59it/s]


Saved global model to results/moe_gru/moe_gru_round_30_fedProx_diff.pt
Round 31/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:34<00:00,  1.37it/s]


Saved global model to results/moe_gru/moe_gru_round_31_fedProx_diff.pt
Round 32/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:35<00:00,  1.36it/s]


Saved global model to results/moe_gru/moe_gru_round_32_fedProx_diff.pt
Round 33/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:33<00:00,  1.38it/s]


Saved global model to results/moe_gru/moe_gru_round_33_fedProx_diff.pt
Round 34/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:34<00:00,  1.37it/s]


Saved global model to results/moe_gru/moe_gru_round_34_fedProx_diff.pt
Round 35/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:35<00:00,  1.36it/s]


Saved global model to results/moe_gru/moe_gru_round_35_fedProx_diff.pt
Round 36/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:34<00:00,  1.36it/s]


Saved global model to results/moe_gru/moe_gru_round_36_fedProx_diff.pt
Round 37/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:36<00:00,  1.35it/s]


Saved global model to results/moe_gru/moe_gru_round_37_fedProx_diff.pt
Round 38/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:35<00:00,  1.36it/s]


Saved global model to results/moe_gru/moe_gru_round_38_fedProx_diff.pt
Round 39/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/moe_gru/moe_gru_round_39_fedProx_diff.pt
Round 40/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:36<00:00,  1.35it/s]


Saved global model to results/moe_gru/moe_gru_round_40_fedProx_diff.pt
Round 41/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:38<00:00,  1.33it/s]


Saved global model to results/moe_gru/moe_gru_round_41_fedProx_diff.pt
Round 42/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:39<00:00,  1.32it/s]


Saved global model to results/moe_gru/moe_gru_round_42_fedProx_diff.pt
Round 43/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:40<00:00,  1.32it/s]


Saved global model to results/moe_gru/moe_gru_round_43_fedProx_diff.pt
Round 44/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.31it/s]


Saved global model to results/moe_gru/moe_gru_round_44_fedProx_diff.pt
Round 45/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/moe_gru/moe_gru_round_45_fedProx_diff.pt
Round 46/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.30it/s]


Saved global model to results/moe_gru/moe_gru_round_46_fedProx_diff.pt
Round 47/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.31it/s]


Saved global model to results/moe_gru/moe_gru_round_47_fedProx_diff.pt
Round 48/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:41<00:00,  1.30it/s]


Saved global model to results/moe_gru/moe_gru_round_48_fedProx_diff.pt
Round 49/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:42<00:00,  1.30it/s]


Saved global model to results/moe_gru/moe_gru_round_49_fedProx_diff.pt
Round 50/50
Sampled 211 clients


Training clients: 100%|██████████| 211/211 [02:45<00:00,  1.27it/s]

Saved global model to results/moe_gru/moe_gru_round_50_fedProx_diff.pt



