In [1]:
import os
import itertools
from attrs import define
# from code.train import train
# from codes.train_dist import train
from codes.train import train
from codes.optimizers.base import Optimizer
# from code.problems import Problem
from codes import Loss
from codes.datasets import Dataset
from codes.models import Model

# %matplotlib widget
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def zip_dict(**kwargs):
    keys = kwargs.keys()
    for instance in zip(*kwargs.values()):
        yield dict(zip(keys, instance))


def product_dict(**kwargs):
    keys = kwargs.keys()
    for instance in itertools.product(*kwargs.values()):
        yield dict(zip(keys, instance))

In [3]:
os.environ["MKL_THREADING_LAYER"] = "AMD"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["TORCH_DEVICE"] = "cpu"

In [4]:
os.environ['MLFLOW_VERBOSE'] = 'True'
os.environ['MLFLOW_CHECK_EXIST'] = 'True'
os.environ['MLFLOW_EXPERIMENT_NAME'] = os.path.basename(os.getcwd())

In [5]:
@define
class BaseConfig():
    nepochs:         int = 500
    npeers:          int = 150
    seed:            int = 0

    loss:           Loss = Loss.MSELoss
    model:         Model = Model.Mean
    dataset:     Dataset = Dataset.Normal

    # nsamples:        int = 1000
    valenabled_:    bool = None
    valnsamples_:    int = None
    nclasses:        int = None
    hratio:        float = None

    optimizer: Optimizer = None
    batchsize:       int = 100
    lr:            float = 1e-2

    trueweights:    bool = None

    mdbatchsize_:    int = None
    mdniters_:       int = None
    mdlr_:           int = None
    mdfull_:        bool = None

In [6]:
os.environ['MLFLOW_RUN_TAGS'] = str(dict(about=f'final'))
# os.environ['MLFLOW_RUN_TAGS'] = str(dict(about=f'dev'))

args_grid = dict(
    hratio=[0.001, 0.01, 0.1],
    mdlr_=[3.5, 4.5, 12.5],
)

for d in zip_dict(**args_grid):

    os.environ['MLFLOW_RUN_NAME'] = 'SGD Full'
    config = BaseConfig(**d)
    config.mdlr_ = None
    config.optimizer = Optimizer.SGD
    config.trueweights = False
    %time train(config)

    os.environ['MLFLOW_RUN_NAME'] = 'SGD Ideal'
    config = BaseConfig(**d)
    config.mdlr_ = None
    config.hratio = None
    config.optimizer = Optimizer.SGD
    config.trueweights = True
    %time train(config)

    os.environ['MLFLOW_RUN_NAME'] = 'MeritFed MD'
    config = BaseConfig(**d)
    config.optimizer = Optimizer.MeritFed
    config.mdfull_ = True
    config.mdniters_ = 10
    config.mdbatchsize_ = 100
    config.valenabled_ = True
    config.valnsamples_ = 1000
    %time train(config)

    os.environ['MLFLOW_RUN_NAME'] = 'MeritFed SMD'
    config = BaseConfig(**d)
    config.optimizer = Optimizer.MeritFed
    config.mdfull_ = False
    config.mdniters_ = 10
    config.mdbatchsize_ = 100
    config.valenabled_ = True
    config.valnsamples_ = 1000
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.TAWT
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    config.mdlr_ = None
    %time train(config)

    config = BaseConfig(**d)
    config.optimizer = Optimizer.FedAdp
    os.environ['MLFLOW_RUN_NAME'] = str(config.optimizer)
    config.mdlr_ = None
    %time train(config)

Step 5000: train-loss: 1.01554 expected-loss: 0.10936
CPU times: user 9min 5s, sys: 9.25 s, total: 9min 14s
Wall time: 9min 6s
Step 5000: train-loss: 1.00350 expected-loss: 0.00260
CPU times: user 7min 22s, sys: 12.6 s, total: 7min 34s
Wall time: 7min 26s
Step 5000: train-loss: 1.00376 expected-loss: 0.00011
CPU times: user 13min 37s, sys: 18.4 s, total: 13min 56s
Wall time: 13min 47s
Step 5000: train-loss: 1.00376 expected-loss: 0.00011
CPU times: user 10min 59s, sys: 18.4 s, total: 11min 17s
Wall time: 11min 9s
Step 5000: train-loss: 1.00281 expected-loss: 0.01070
CPU times: user 7min 46s, sys: 17.4 s, total: 8min 4s
Wall time: 7min 55s
Step 5000: train-loss: 1.00394 expected-loss: 0.00874
CPU times: user 8min 11s, sys: 17.8 s, total: 8min 28s
Wall time: 8min 18s
Step 5000: train-loss: 1.01585 expected-loss: 0.11205
CPU times: user 7min 20s, sys: 11.6 s, total: 7min 32s
Wall time: 7min 24s
CPU times: user 1.32 s, sys: 479 ms, total: 1.8 s
Wall time: 1.8 s
Step 5000: train-loss: 1.003