In [1]:
import torch
from flcore.pflniid_utils.data_utils import read_client_data
from flcore.clients.clientavg import clientAVG

In [2]:
import copy
import argparse
import os
import time
import warnings
import numpy as np
import logging
from flcore.servers.serveravg import FedAvg
from flcore.servers.serverlocal import Local

In [3]:
parser = argparse.ArgumentParser()

In [4]:
# general
parser.add_argument('-go', "--goal", type=str, default="test", 
                    help="The goal for this experiment")
parser.add_argument('-dev', "--device", type=str, default="cpu",  # KAI: Changed the default to cpu
                    choices=["cpu", "cuda"])
parser.add_argument('-did', "--device_id", type=str, default="0")
parser.add_argument('-data', "--dataset", type=str, default="cphs")  # KAI: Changed the default to cphs (from mnist)
#parser.add_argument('-nb', "--num_classes", type=int, default=10)  # Not doing classification...
parser.add_argument('-m', "--model", type=str, default="LinearRegression")  # KAI: Changed the default to Linear Regression
parser.add_argument('-lbs', "--batch_size", type=int, default=1200)  # Setting it to a full update would be 1300ish... how many batches does it run? In one epoch? Not even sure where that is set
# The 1300 and the batch size are 2 separate things...
# I want to restrict the given dataset to just the 1300, but then iterate in batches... or do I since we don't have that much data and can probably just use all the data at once? Make batch size match the update size? ...
parser.add_argument('-lr', "--local_learning_rate", type=float, default=0.005,
                    help="Local learning rate")
parser.add_argument('-ld', "--learning_rate_decay", type=bool, default=False)
parser.add_argument('-ldg', "--learning_rate_decay_gamma", type=float, default=0.99)
parser.add_argument('-gr', "--global_rounds", type=int, default=250)  # KAI: Switched to 250 down from 2000
parser.add_argument('-ls', "--local_epochs", type=int, default=1, 
                    help="Multiple update steps in one local epoch.")  # KAI: I think it was 1 originally.  I'm gonna keep it there.  Does this mean I can set batchsize to 1300 and cook?Is my setup capable or running multiple epochs? Implicitly I was doing 1 epoch before, using the full update data I believe...
parser.add_argument('-algo', "--algorithm", type=str, default="FedAvg")
parser.add_argument('-jr', "--join_ratio", type=float, default=0.2,
                    help="Ratio of clients per round")
parser.add_argument('-rjr', "--random_join_ratio", type=bool, default=False,
                    help="Random ratio of clients per round")
parser.add_argument('-nc', "--num_clients", type=int, default=14,
                    help="Total number of clients")
parser.add_argument('-dp', "--privacy", type=bool, default=False,
                    help="differential privacy")
parser.add_argument('-dps', "--dp_sigma", type=float, default=0.0)
parser.add_argument('-sfn', "--save_folder_name", type=str, default='items')

# SECTION: practical
parser.add_argument('-cdr', "--client_drop_rate", type=float, default=0.0,
                    help="Rate for clients that train but drop out")
parser.add_argument('-tsr', "--train_slow_rate", type=float, default=0.0,
                    help="The rate for slow clients when training locally")
parser.add_argument('-ssr', "--send_slow_rate", type=float, default=0.0,
                    help="The rate for slow clients when sending global model")
parser.add_argument('-ts', "--time_select", type=bool, default=False,
                    help="Whether to group and select clients at each round according to time cost")
parser.add_argument('-tth', "--time_threthold", type=float, default=10000,
                    help="The threthold for droping slow clients")

# SECTION: Kai's additional args
parser.add_argument('-pca_channels', "--pca_channels", type=int, default=64,
                    help="Number of principal components. 64 means do not use any PCA")
parser.add_argument('-lambdas', "--lambdas", type=list, default=[0, 1e-3, 1e-4],
                    help="Lamda F, D, E penalty terms ")
parser.add_argument('-starting_update', "--starting_update", type=int, default=0,
                    help="Which update to start on (for CPHS Simulation). Use 0 or 10.")
parser.add_argument('-test_split', "--test_split", type=float, default=0.2,
                    help="Percent of data to use for testing")
parser.add_argument('-device_channels', "--device_channels", type=int, default=64,
                    help="Number of recording channels with the used EMG device")
parser.add_argument('-dt', "--dt", type=float, default=1/60,
                    help="Delta time, amount of time (sec?) between measurements")
parser.add_argument('-normalize_emg', "--normalize_emg", type=bool, default=False,
                    help="Normalize the input EMG signals")
parser.add_argument('-normalize_V', "--normalize_V", type=bool, default=False,
                    help="Normalize the V term in the cost function")
parser.add_argument('-local_round_threshold', "--local_round_threshold", type=int, default=50,
                    help="Number of communication rounds per client until a client will advance to the next batch of streamed data")
parser.add_argument('-debug_mode', "--debug_mode", type=bool, default=False,
                    help="In debug mode, the code is run to minimize overhead time in order to debug as fast as possible.  Namely, the data is held at the server to decrease init time, and communication delays are ignored.")
parser.add_argument('-condition_number', "--condition_number", type=int, default=1,
                    help="Which condition number (trial) to train on")



parser.add_argument('-t', "--times", type=int, default=1,
                    help="Running times")
parser.add_argument('-ab', "--auto_break", type=bool, default=False)
parser.add_argument('-dlg', "--dlg_eval", type=bool, default=False)
parser.add_argument('-dlgg', "--dlg_gap", type=int, default=100)
parser.add_argument('-bnpc', "--batch_num_per_client", type=int, default=2)  # Only used with DLG
parser.add_argument('-eg', "--eval_gap", type=int, default=1,
                    help="Rounds gap for evaluation")
parser.add_argument('-nnc', "--num_new_clients", type=int, default=0)


# This one for sure breaks it
#parser.add_argument('-fte', "--fine_tuning_epoch", type=int, default=0)

_StoreAction(option_strings=['-nnc', '--num_new_clients'], dest='num_new_clients', nargs=None, const=None, default=0, type=<class 'int'>, choices=None, required=False, help=None, metavar=None)

In [7]:
#args = parser.parse_args()
args = parser.parse_known_args()

In [8]:
args

(Namespace(goal='test', device='cpu', device_id='0', dataset='cphs', model='LinearRegression', batch_size=1200, local_learning_rate=0.005, learning_rate_decay=False, learning_rate_decay_gamma=0.99, global_rounds=250, local_epochs=1, algorithm='FedAvg', join_ratio=0.2, random_join_ratio=False, num_clients=14, privacy=False, dp_sigma=0.0, save_folder_name='items', client_drop_rate=0.0, train_slow_rate=0.0, send_slow_rate=0.0, time_select=False, time_threthold=10000, pca_channels=64, lambdas=[0, 0.001, 0.0001], starting_update=0, test_split=0.2, device_channels=64, dt=0.016666666666666666, normalize_emg=False, normalize_V=False, local_round_threshold=50, debug_mode=False, condition_number=1, times=1, auto_break=False, dlg_eval=False, dlg_gap=100, batch_num_per_client=2, eval_gap=1, num_new_clients=0),
 ['-f',
  'C:\\Users\\kdmen\\AppData\\Roaming\\jupyter\\runtime\\kernel-91f155c2-6045-4889-bdcf-cf25e0647c4e.json'])

In [9]:
args = args[0]
args.fine_tuning_epoch=0
args

Namespace(goal='test', device='cpu', device_id='0', dataset='cphs', model='LinearRegression', batch_size=1200, local_learning_rate=0.005, learning_rate_decay=False, learning_rate_decay_gamma=0.99, global_rounds=250, local_epochs=1, algorithm='FedAvg', join_ratio=0.2, random_join_ratio=False, num_clients=14, privacy=False, dp_sigma=0.0, save_folder_name='items', client_drop_rate=0.0, train_slow_rate=0.0, send_slow_rate=0.0, time_select=False, time_threthold=10000, pca_channels=64, lambdas=[0, 0.001, 0.0001], starting_update=0, test_split=0.2, device_channels=64, dt=0.016666666666666666, normalize_emg=False, normalize_V=False, local_round_threshold=50, debug_mode=False, condition_number=1, times=1, auto_break=False, dlg_eval=False, dlg_gap=100, batch_num_per_client=2, eval_gap=1, num_new_clients=0, fine_tuning_epoch=0)

In [11]:
dataset = 'cphs'

Runs and crashes in the same way that the command line version does! Yay!

In [None]:
time_list = []
#reporter = MemReporter()
model_str = args.model

# Switched args.prev to 0 since it wasn't working
for i in range(0, args.times):
    print(f"\n============= Running time: {i}th =============")
    print("Creating server and clients ...")
    start = time.time()

    # Generate args.model
    if model_str == "LinearRegression":
        args.model = torch.nn.Linear(args.pca_channels, 2)  #input_size, output_size
    else:
        raise NotImplementedError

    print(args.model)

    # select algorithm
    if args.algorithm == "FedAvg":
        server = FedAvg(args, i)
    elif args.algorithm == "Local":
        server = Local(args, i)
    else:
        raise NotImplementedError

    server.train()

    time_list.append(time.time()-start)

print(f"\nAverage time cost: {round(np.average(time_list), 2)}s.")

In [None]:
# All the selfs aren't defined since this is outside the class

def _train(self):
    trainloader = self.load_train_data()
    self.model.train()

    start_time = time.time()

    max_local_steps = self.local_epochs

    for step in range(max_local_steps):
        for i, (x, y) in enumerate(trainloader):
            print(f"Step {step}, pair {i} in traindl")
            print(f"x.size(): {x.size()}")
            if type(x) == type([]):
                x[0] = x[0].to(self.device)
            else:
                x = x.to(self.device)
            y = y.to(self.device)
            output = self.model(x)
            loss = self.loss(output, y, self.model)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

In [None]:
my_clients = []
for i in range(14):
    train_data = read_client_data(dataset, i, is_train=True)
    test_data = read_client_data(dataset, i, is_train=False)
    client = clientAVG(args, 
                    ID=i, 
                    train_samples=len(train_data), 
                    test_samples=len(test_data), 
                    train_slow=False, 
                    send_slow=False)
    my_clients.append(client)

In [None]:
my_clients.train()