In [1]:
import copy
import torch
import torch.nn as nn
import numpy as np
import random
import os
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.preprocessing import label_binarize
from sklearn import metrics

from sklearn.decomposition import PCA

from flcore.pflniid_utils.data_utils import read_client_data
from utils.custom_loss_class import CPHSLoss
from utils.emg_dataset_class import *

In [2]:
import time
from flcore.pflniid_utils.privacy import *

In [15]:
from flcore.clients.clientbase import Client
from flcore.clients.clientavg import clientAVG
from flcore.servers.serverbase import Server
from flcore.servers.serveravg import FedAvg
from flcore.servers.serverlocal import Local

In [48]:
update_ix = [0,  1200,  2402,  3604,  4806,  6008,  7210,  8412,  9614, 10816, 12018, 13220, 14422, 15624, 16826, 18028, 19230, 20432, 20769]

In [6]:
import argparse
parser = argparse.ArgumentParser()

In [7]:
# general
parser.add_argument('-go', "--goal", type=str, default="test", 
                    help="The goal for this experiment")
parser.add_argument('-dev', "--device", type=str, default="cpu",  # KAI: Changed the default to cpu
                    choices=["cpu", "cuda"])
parser.add_argument('-did', "--device_id", type=str, default="0")
parser.add_argument('-data', "--dataset", type=str, default="cphs")  # KAI: Changed the default to cphs (from mnist)
#parser.add_argument('-nb', "--num_classes", type=int, default=10)  # Not doing classification...
parser.add_argument('-m', "--model", type=str, default="LinearRegression")  # KAI: Changed the default to Linear Regression
parser.add_argument('-lbs', "--batch_size", type=int, default=1200)  # Setting it to a full update would be 1300ish... how many batches does it run? In one epoch? Not even sure where that is set
# The 1300 and the batch size are 2 separate things...
# I want to restrict the given dataset to just the 1300, but then iterate in batches... or do I since we don't have that much data and can probably just use all the data at once? Make batch size match the update size? ...
parser.add_argument('-lr', "--local_learning_rate", type=float, default=0.005,
                    help="Local learning rate")
parser.add_argument('-ld', "--learning_rate_decay", type=bool, default=False)
parser.add_argument('-ldg', "--learning_rate_decay_gamma", type=float, default=0.99)
parser.add_argument('-gr', "--global_rounds", type=int, default=250)  # KAI: Switched to 250 down from 2000
parser.add_argument('-ls', "--local_epochs", type=int, default=1, 
                    help="Multiple update steps in one local epoch.")  # KAI: I think it was 1 originally.  I'm gonna keep it there.  Does this mean I can set batchsize to 1300 and cook?Is my setup capable or running multiple epochs? Implicitly I was doing 1 epoch before, using the full update data I believe...
parser.add_argument('-algo', "--algorithm", type=str, default="FedAvg")
parser.add_argument('-jr', "--join_ratio", type=float, default=0.2,
                    help="Ratio of clients per round")
parser.add_argument('-rjr', "--random_join_ratio", type=bool, default=False,
                    help="Random ratio of clients per round")
parser.add_argument('-nc', "--num_clients", type=int, default=14,
                    help="Total number of clients")
parser.add_argument('-dp', "--privacy", type=bool, default=False,
                    help="differential privacy")
parser.add_argument('-dps', "--dp_sigma", type=float, default=0.0)
parser.add_argument('-sfn', "--save_folder_name", type=str, default='items')

# SECTION: practical
parser.add_argument('-cdr', "--client_drop_rate", type=float, default=0.0,
                    help="Rate for clients that train but drop out")
parser.add_argument('-tsr', "--train_slow_rate", type=float, default=0.0,
                    help="The rate for slow clients when training locally")
parser.add_argument('-ssr', "--send_slow_rate", type=float, default=0.0,
                    help="The rate for slow clients when sending global model")
parser.add_argument('-ts', "--time_select", type=bool, default=False,
                    help="Whether to group and select clients at each round according to time cost")
parser.add_argument('-tth', "--time_threthold", type=float, default=10000,
                    help="The threthold for droping slow clients")

# SECTION: Kai's additional args
parser.add_argument('-pca_channels', "--pca_channels", type=int, default=64,
                    help="Number of principal components. 64 means do not use any PCA")
parser.add_argument('-lambdaF', "--lambdaF", type=float, default=0.0,
                    help="Penalty term for user EMG input (user effort)")
parser.add_argument('-lambdaD', "--lambdaD", type=float, default=1e-3,
                    help="Penalty term for the decoder norm (interface effort)")
parser.add_argument('-lambdaE', "--lambdaE", type=float, default=1e-4,
                    help="Penalty term on performance error norm")
parser.add_argument('-starting_update', "--starting_update", type=int, default=0,
                    help="Which update to start on (for CPHS Simulation). Use 0 or 10.")
parser.add_argument('-test_split_fraction', "--test_split_fraction", type=float, default=0.2,
                    help="Fraction of data to use for testing")
parser.add_argument('-device_channels', "--device_channels", type=int, default=64,
                    help="Number of recording channels with the used EMG device")
parser.add_argument('-dt', "--dt", type=float, default=1/60,
                    help="Delta time, amount of time (sec?) between measurements")
parser.add_argument('-normalize_emg', "--normalize_emg", type=bool, default=False,
                    help="Normalize the input EMG signals")
parser.add_argument('-normalize_V', "--normalize_V", type=bool, default=False,
                    help="Normalize the V term in the cost function")
parser.add_argument('-local_round_threshold', "--local_round_threshold", type=int, default=50,
                    help="Number of communication rounds per client until a client will advance to the next batch of streamed data")
parser.add_argument('-debug_mode', "--debug_mode", type=bool, default=False,
                    help="In debug mode, the code is run to minimize overhead time in order to debug as fast as possible.  Namely, the data is held at the server to decrease init time, and communication delays are ignored.")
parser.add_argument('-condition_number', "--condition_number", type=int, default=1,
                    help="Which condition number (trial) to train on")
parser.add_argument('-test_split_each_update', "--test_split_each_update", type=bool, default=False,
                    help="Implement train/test split within each update or on the entire dataset")
parser.add_argument('-verbose', "--verbose", type=bool, default=False,
                    help="Print out a bunch of extra stuff")
parser.add_argument('-slow_clients_bool', "--slow_clients_bool", type=bool, default=False,
                    help="Control whether or not to have ANY slow clients")
parser.add_argument('-return_cost_func_comps', "--return_cost_func_comps", type=bool, default=False, #True
                    help="Return Loss, Error, DTerm, FTerm from loss class")
parser.add_argument('-test_split_users', "--test_split_users", type=bool, default=False,
                    help="Split testing data by holding out some users (fraction held out determined by test_split_fraction)")
    
parser.add_argument('-t', "--times", type=int, default=1,
                    help="Running times")
parser.add_argument('-ab', "--auto_break", type=bool, default=False)
parser.add_argument('-dlg', "--dlg_eval", type=bool, default=False)
parser.add_argument('-dlgg', "--dlg_gap", type=int, default=100)
parser.add_argument('-bnpc', "--batch_num_per_client", type=int, default=2)  # Only used with DLG
parser.add_argument('-eg', "--eval_gap", type=int, default=1,
                    help="Rounds gap for evaluation")
parser.add_argument('-nnc', "--num_new_clients", type=int, default=0)

# This one for sure breaks it
#parser.add_argument('-fte', "--fine_tuning_epoch", type=int, default=0)

_StoreAction(option_strings=['-nnc', '--num_new_clients'], dest='num_new_clients', nargs=None, const=None, default=0, type=<class 'int'>, choices=None, required=False, help=None, metavar=None)

In [8]:
#args = parser.parse_args()
args = parser.parse_known_args()

In [9]:
args = args[0]
args.fine_tuning_epoch=0
args

Namespace(goal='test', device='cpu', device_id='0', dataset='cphs', model='LinearRegression', batch_size=1200, local_learning_rate=0.005, learning_rate_decay=False, learning_rate_decay_gamma=0.99, global_rounds=250, local_epochs=1, algorithm='FedAvg', join_ratio=0.2, random_join_ratio=False, num_clients=14, privacy=False, dp_sigma=0.0, save_folder_name='items', client_drop_rate=0.0, train_slow_rate=0.0, send_slow_rate=0.0, time_select=False, time_threthold=10000, pca_channels=64, lambdaF=0.0, lambdaD=0.001, lambdaE=0.0001, starting_update=0, test_split_fraction=0.2, device_channels=64, dt=0.016666666666666666, normalize_emg=False, normalize_V=False, local_round_threshold=50, debug_mode=False, condition_number=1, test_split_each_update=False, verbose=False, slow_clients_bool=False, return_cost_func_comps=False, test_split_users=False, times=1, auto_break=False, dlg_eval=False, dlg_gap=100, batch_num_per_client=2, eval_gap=1, num_new_clients=0, fine_tuning_epoch=0)

In [10]:
dataset = 'cphs'

In [16]:
time_list = []
#reporter = MemReporter()
model_str = args.model

# Switched args.prev to 0 since it wasn't working
#for i in range(0, args.times):
print(f"\n============= Running time: {0}th =============")
print("Creating server and clients ...")
start = time.time()

# Generate args.model
args.model = torch.nn.Linear(args.pca_channels, 2)  #input_size, output_size

print(args.model)

# select algorithm
if args.algorithm == "FedAvg":
    server = FedAvg(args, 0)
elif args.algorithm == "Local":
    server = Local(args, 0)
else:
    raise NotImplementedError

#server.train()

#time_list.append(time.time()-start)
#print(f"\nAverage time cost: {round(np.average(time_list), 2)}s.")


Creating server and clients ...
Linear(in_features=64, out_features=2, bias=True)
Serveravg init(): set_slow_clients()
Serveravg init(): set_clients()
SBSC: iter 0
SBSC: iter 1
SBSC: iter 2
SBSC: iter 3
SBSC: iter 4
SBSC: iter 5
SBSC: iter 6
SBSC: iter 7
SBSC: iter 8
SBSC: iter 9
SBSC: iter 10
SBSC: iter 11
SBSC: iter 12
SBSC: iter 13

Join ratio / total clients: 0.2 / 14
Finished creating server and clients.


In [22]:
server.selected_clients = server.clients
with torch.no_grad():
    # subscript global_model with [0] if it is sequential instead of linear model --> does that return just the first layer then?
    server.global_model.weight.fill_(0)

#for i in range(self.global_rounds+1):
if 0%server.eval_gap == 0:
    print(f"\n-------------Round number: {0}-------------")
    if 0!=0:
        print("\nEvaluate personalized models")
        server.evaluate()

        #print(f"len: {len(self.rs_train_loss[-1])}")
        if type(server.rs_train_loss[-1]) in [int, float]:
            print(f"rs_train_loss: {server.rs_train_loss[-1]}")
        else:
            print(f"len: {len(server.rs_train_loss[-1])}")
        print()

server.selected_clients = server.select_clients()
print(f"Selected client IDs: {[client.ID for client in server.selected_clients]}")


-------------Round number: 0-------------
Selected client IDs: [10, 2]


In [24]:
#print("CLIENT TRAINING")
#for client in server.selected_clients:
#    client.train()
#    print(f"Client{client.ID} loss: {client.loss_log[-1]:0,.3f}")

my_client = server.selected_clients[0]

In [25]:
my_client.local_epochs

1

In [29]:
#def train(self):
trainloader = my_client.load_train_data()
# self.model.to(self.device)
my_client.model.train()

# differential privacy
#if self.privacy:
#    self.model, self.optimizer, trainloader, privacy_engine = \
#        initialize_dp(self.model, self.optimizer, trainloader, self.dp_sigma)

start_time = time.time()

max_local_steps = my_client.local_epochs
#if self.train_slow:
#    max_local_steps = np.random.randint(1, max_local_steps // 2)

In [30]:
trainloader

<torch.utils.data.dataloader.DataLoader at 0x2a68c0eba60>

In [35]:
i=0
base_data_path = 'C:\\Users\\kdmen\\Desktop\\Research\\personalization-privacy-risk\\Data\\Client_Specific_Files\\'
client = clientAVG(server.args, 
                    ID=i, 
                    train_samples = base_data_path + "UserID" + str(i) + "_TrainData_8by20770by64.npy", 
                    test_samples = base_data_path + "UserID" + str(i) + "_Labels_8by20770by2.npy", 
                    train_slow=False, 
                    send_slow=False)

In [36]:
my_client.test_split_users

False

In [37]:
my_client.test_split_each_update

False

In [39]:
# FROM CLIENTBASE.PY

#def load_train_data(self, batch_size=None):
batch_size=None
# Load full client dataasets
if my_client.local_round == 0:

    ###########################################################################################
    #self._load_train_data()   # Returns nothing, sets self variables
    # Load in client's data
    with open(my_client.samples_path, 'rb') as handle:
        samples_npy = np.load(handle)
    with open(my_client.labels_path, 'rb') as handle:
        labels_npy = np.load(handle)
    # Select for given condition #THIS IS THE ACTUAL TRAINING DATA AND LABELS FOR THE GIVEN TRIAL
    my_client.cond_samples_npy = samples_npy[my_client.condition_number,:,:]
    my_client.cond_labels_npy = labels_npy[my_client.condition_number,:,:]
    # Split data into train and test sets
    testsplit_upper_bound = round((1-my_client.test_split_fraction)*(my_client.cond_samples_npy.shape[0]))
    # Set the number of examples (used to be done on init) --> ... THIS IS ABOUT TRAIN/TEST SPLIT
    my_client.train_samples = testsplit_upper_bound
    my_client.test_samples = my_client.cond_samples_npy.shape[0] - testsplit_upper_bound
    train_test_update_number_split = min(my_client.update_ix, key=lambda x:abs(x-testsplit_upper_bound))
    my_client.max_training_update_upbound = my_client.update_ix.index(train_test_update_number_split)
    ###########################################################################################

    # Why is this in local_round=0...
    #if my_client.current_update < my_client.max_training_update_upbound:
    #    my_client.update_lower_bound = my_client.update_ix[my_client.current_update]
    #    my_client.update_upper_bound = my_client.update_ix[my_client.current_update+1]
    # I just added this, should really be idx bound not update bound...
    my_client.update_lower_bound = my_client.update_ix[my_client.current_update]
    my_client.update_upper_bound = my_client.update_ix[my_client.current_update+1]

In [56]:
print(f"update lower bound: {my_client.update_lower_bound}")
print(f"update upper bound: {my_client.update_upper_bound}")

update lower bound: 0
update upper bound: 1200


In [None]:
my_client.local_round += 1
# Check if you need to advance the update
# ---> THIS IMPLIES THAT I AM CREATING A NEW TRAINING LOADER FOR EACH UPDATE...
# Uh why is 16 hardcoded...
# This is the update logic
if (my_client.local_round>1) and (my_client.current_update < 16) and (my_client.local_round%my_client.local_round_threshold==0):
    my_client.current_update += 1
    print(f"Client{my_client.ID} advances to update {my_client.current_update}")
    # Slice the full client dataset based on the current update number
    if my_client.current_update < my_client.max_training_update_upbound:
        my_client.update_lower_bound = my_client.update_ix[my_client.current_update]
        my_client.update_upper_bound = my_client.update_ix[my_client.current_update+1]
    else:
        my_client.update_lower_bound = my_client.max_training_update_upbound - 1
        my_client.update_upper_bound = my_client.max_training_update_upbound

In [None]:
# Set the Dataset Obj
# Uhhhh is this creating a new one each time? As long as its not re-reading in the data it probably doesn't matter...
#train_data = read_client_data(self.dataset, self.ID, self.current_update, is_train=True)  # Original code
#CustomEMGDataset(emgs_block1[my_user][condition_idx,update_lower_bound:update_upper_bound,:], refs_block1[my_user][condition_idx,update_lower_bound:update_upper_bound,:])
training_dataset_obj = CustomEMGDataset(my_client.cond_samples_npy[my_client.update_lower_bound:my_client.update_upper_bound,:], my_client.cond_labels_npy[my_client.update_lower_bound:my_client.update_upper_bound,:])
X_data = torch.Tensor(training_dataset_obj['x']).type(torch.float32)
y_data = torch.Tensor(training_dataset_obj['y']).type(torch.float32)
training_data_for_dataloader = [(x, y) for x, y in zip(X_data, y_data)]

In [60]:
training_dataset_obj = CustomEMGDataset(my_client.cond_samples_npy[my_client.update_lower_bound:my_client.update_upper_bound,:], my_client.cond_labels_npy[my_client.update_lower_bound:my_client.update_upper_bound,:])

In [58]:
my_client.cond_samples_npy[my_client.update_lower_bound:my_client.update_upper_bound,:].shape

(1200, 64)

In [59]:
my_client.cond_labels_npy[my_client.update_lower_bound:my_client.update_upper_bound,:].shape

(1200, 2)

In [62]:
torch.Tensor(training_dataset_obj['x']).type(torch.float32).shape

torch.Size([1200, 64])

In [63]:
len(training_data_for_dataloader)

1200

In [64]:
len(training_data_for_dataloader[0])

2

That all looks fine...

In [65]:
# Set dataloader
if batch_size == None:
    batch_size = my_client.batch_size
trainloader = DataLoader(
    dataset=training_data_for_dataloader,
    batch_size=batch_size, 
    drop_last=False,  # Yah idk if this should be true or false or if it matters...
    shuffle=False) 
#return dl

In [66]:
my_client.cond_samples_npy.shape

(20770, 64)

In [67]:
my_client.cond_labels_npy.shape

(20770, 2)

In [68]:
for i, (x, y) in enumerate(trainloader):
    print(f"Batch {i}: x has size {x.size()}; y has size {y.size()}")

Batch 0: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])


Lmao so on rerun it works now...

In [46]:
1200*15+370

18370

It shouldn't be going through all the batches at once right... it should only be going through the first updates worth...
- There's 16 batches, but 18 updates... looks like it held the last few batches out for testing? 
- Don't really wanna test on the actual last batch
- Also shouldn't the training updates by broken up perfectly and not with some leftover? Code isn't working as expected...
- Does simulate data streaming have any affect?
- Where was it in the code that I kept auto-remaking trainloaders?

In [49]:
len(update_ix)-1

18

Uhh is this code only used once lol
- It's literaly only used in the client init...

In [None]:
# Before this I need to run the INIT update segmentation code...
#init_dl = self.load_train_data()
#self.simulate_data_streaming(init_dl)
# ^ This func sets F, V, etc

update setting code SHOULD NOT be in test data (unless each update has its own separate test data)

In [50]:
#def load_test_data(self, batch_size=None):
batch_size=None
# Make sure this runs AFTER load_train_data so the data is already loaded in
if batch_size == None:
    batch_size = my_client.batch_size

#test_data = read_client_data(self.dataset, self.ID, self.current_update, is_train=False)
testing_dataset_obj = CustomEMGDataset(my_client.cond_samples_npy[my_client.update_upper_bound:,:], my_client.cond_labels_npy[my_client.update_upper_bound:,:])
X_data = torch.Tensor(testing_dataset_obj['x']).type(torch.float32)
y_data = torch.Tensor(testing_dataset_obj['y']).type(torch.float32)
testing_data_for_dataloader = [(x, y) for x, y in zip(X_data, y_data)]

testloader = DataLoader(
    dataset=testing_data_for_dataloader,
    batch_size=batch_size, 
    drop_last=False,  # Yah idk if this should be true or false or if it matters...
    shuffle=False) 
#return dl

In [51]:
for i, (x, y) in enumerate(testloader):
    print(f"Batch {i}: x has size {x.size()}; y has size {y.size()}")

Batch 0: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 1: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 2: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 3: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 4: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 5: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 6: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 7: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 8: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 9: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 10: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 11: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 12: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 

Uhhhh why is this exactly the same as the trainloader.....

In [52]:
my_client.update_upper_bound

1200

In [53]:
my_client.max_training_update_upbound

14

Uhhh so I mean that's actually fine, given that that is the UPDATE NUMBER! not the index to split at...

# CORRECTION TO LOAD_TEST_DATA()

In [54]:
# CORRECTED
#def load_test_data(self, batch_size=None):
batch_size=None
# Make sure this runs AFTER load_train_data so the data is already loaded in
if batch_size == None:
    batch_size = my_client.batch_size

##########################################################################
# Idk if update_ix would need self or if it even exists within the object yet lmao
my_client.test_split_idx = update_ix[my_client.max_training_update_upbound]
##########################################################################
    
#test_data = read_client_data(self.dataset, self.ID, self.current_update, is_train=False)
testing_dataset_obj = CustomEMGDataset(my_client.cond_samples_npy[my_client.test_split_idx:,:], my_client.cond_labels_npy[my_client.test_split_idx:,:])
X_data = torch.Tensor(testing_dataset_obj['x']).type(torch.float32)
y_data = torch.Tensor(testing_dataset_obj['y']).type(torch.float32)
testing_data_for_dataloader = [(x, y) for x, y in zip(X_data, y_data)]

correctedtestloader = DataLoader(
    dataset=testing_data_for_dataloader,
    batch_size=batch_size, 
    drop_last=False,  # Yah idk if this should be true or false or if it matters...
    shuffle=False) 
#return dl

In [55]:
for i, (x, y) in enumerate(correctedtestloader):
    print(f"Batch {i}: x has size {x.size()}; y has size {y.size()}")

Batch 0: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 1: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 2: x has size torch.Size([1200, 64]); y has size torch.Size([1200, 2])
Batch 3: x has size torch.Size([344, 64]); y has size torch.Size([344, 2])


Seems like it fixed it... 
- Do I wanna just drop the last batch? It might mess things up since it's not the same size
- Idk what would happen in real-time trials though if everything has to be set up into uniform sized buckets...

In [None]:
# WHICH OF THESE LOOPS IS EQUIVALENT TO MY EPOCHS...
running_num_samples = 0
step = 0
#for step in range(max_local_steps):  # I'm assuming this is gradient steps?... are local epochs the same as gd steps?
for i, (x, y) in enumerate(trainloader):  # This is all the data in a given batch, I think? Can I just kill this... PITA
    print(f"Step {step}, pair {i} in traindl: x.size(): {x.size()}")
    if type(x) == type([]):
        x[0] = x[0].to(my_client.device)
    else:
        x = x.to(my_client.device)
    y = y.to(my_client.device)
    #if self.train_slow:
    #    time.sleep(0.1 * np.abs(np.random.rand()))
    output = my_client.model(x)
    #print(f"clientAVG ----> Training LOSS {i}")  # What is this even tellimg me lol
    loss = my_client.loss(output, y, self.model)
    if my_client.return_cost_func_comps:
        my_client.cost_func_comps_log.append(loss[1:])
        loss = loss[0]
    else:
        # .item() ONLY WORKS WITH 1D TENSORS!!!
        t1 = my_client.loss.term1_error.item()
        t2 = my_client.loss.term2_ld_decnorm.item()
        t3 = my_client.loss.term3_lf_emgnorm.item()
        if np.isnan(t1):
            print("CLIENTAVG: Error term is None...")
            t1 = -1
        if np.isnan(t2):
            print("CLIENTAVG: Decoder Effort term is None...")
            t2 = -1
        if np.isnan(t3):
            print("CLIENTAVG: User Effort term is None...")
            t3 = -1
        my_client.cost_func_comps_log.append((t1, t2, t3))
    weight_grad = my_client.model.weight.grad
    if weight_grad == None:
        print("Weight gradient is None...")
        my_client.gradient_norm_log.append(-1)
    else:
        #grad_norm = torch.linalg.norm(self.model.weight.grad, ord='fro')
        grad_norm = np.linalg.norm(my_client.model.weight.grad.detach().numpy())
        my_client.gradient_norm_log.append(grad_norm)
    my_client.loss_log.append(loss.item())
    #self.running_epoch_loss.append(loss.item() * x.size(0))  # From: running_epoch_loss.append(loss.item() * images.size(0))
    running_num_samples += x.size(0)
    my_client.optimizer.zero_grad()
    loss.backward()
    my_client.optimizer.step()

my_client.train_time_cost['num_rounds'] += 1
#my_client.train_time_cost['total_cost'] += time.time() - start_time

In [None]:
print()

if self.auto_break and self.check_done(acc_lss=[self.rs_test_acc], top_cnt=self.top_cnt):
    print("Breaking")
    break

self.evaluate(train=False, test=True)
print("\nBest Loss.")
print(min(self.rs_test_loss))

for idx, client in enumerate(self.clients):
    #self.cost_func_comps_dict[idx] = client.cost_func_comps_log
    #self.gradient_dict[idx] = client.gradient_norm_log
    self.cost_func_comps_log.append(client.cost_func_comps_log)
    self.gradient_norm_log.append(client.gradient_norm_log)

self.save_results(save_cost_func_comps=True, save_gradient=True)
model_path = os.path.join("models", self.dataset)
model_path = os.path.join(model_path, "Local")
for client in self.clients:
    client.save_item(client.model, 'local_client_model', item_path=model_path)
# No idea where this global model is coming from? Why did they save it...
self.save_global_model()