In [1]:
import os
# import json
import math
import torch
from itertools import product
from dataclasses import dataclass, field

from gamesopt.attacks import Attack
from gamesopt.optimizer import Optimizer
from gamesopt.aggregator import Aggregator
from gamesopt.games import Game, create_matrix, create_bias
from gamesopt.train_distributed import train

# %matplotlib widget
%matplotlib inline
%load_ext autoreload
%autoreload 1

In [2]:
os.environ['MLFLOW_VERBOSE'] = 'True'
os.environ['MLFLOW_EXPERIMENT_NAME'] = os.path.basename(os.getcwd())

In [5]:
# class Config(BaseConfig):
@dataclass
class Config():
    n_iter: int = 6001

    n_peers: int = 100
    n_byzan: int = 20

    # game: Game = Game.Quadratic
    game: Game = field(default_factory=lambda: Game.Quadratic)
    num_samples: int = 1000
    dim: int = 25
    with_bias: bool = True
    mu: float = 1e-1
    ell: float = 1e3

    optimizer: Optimizer = None
    batch_size: int = None
    lr: float = 1/2/ell/15
    alpha: float = None
    sigmaC: float = None

    attack: Attack = None
    n_attacking: int = None
    attack_param: float = None

    use_bucketing: bool = None
    bucketing_s: int = 2

    aggregator: Aggregator = None
    aggregator_param_a: int = 10  # trimmed_mean_b=10; krum_m=2; ,clipping_n_iter=3 clipping_tau=10
    aggregator_param_b: float = 0.1 # rfa_T=10,rfa_nu=0.1    , 0.1)  # rfa_T=10,rfa_nu=0.1

In [6]:
@dataclass
class Data():
    def __init__(self, config):
        self.matrix = create_matrix(config.dim, config.num_samples,
                                    config.mu, config.ell,
                                    with_bias=config.with_bias)
        self.bias = create_bias(config.dim, config.num_samples,
                                with_bias=config.with_bias)
        self.true = torch.linalg.solve(self.matrix.mean(dim=0),
                                       -self.bias.mean(dim=0))
        self.players = self.true + .1/math.sqrt(2 * config.dim)

In [7]:
base_config = Config()
data = Data(base_config)
# base_config_items = set(base_config.__dict__.items())

In [9]:
for at, bs in product([Attack.BF],
                          # [1e1, 1e2, 1e3, 1e4],
                          [1, 10, 100]):

    # SGDA
    config = Config(attack=at, batch_size=bs)
    config.optimizer = Optimizer.SGDA
    config.aggregator = Aggregator.Mean
    config.n_attacking = config.n_byzan
    # config_items = set(config.__dict__.items())
    os.environ['MLFLOW_RUN_TAG'] = 'RA'
    os.environ['MLFLOW_RUN_NAME'] = 'SGDA'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    # json.dumps(dict(sorted(config_items - base_config_items)))
    train(config, data)

    # SGDARA
    config = Config(attack=at, batch_size=bs)
    config.optimizer = Optimizer.SGDARA
    config.aggregator = Aggregator.RFA
    config.use_bucketing = True
    config.n_attacking = config.n_byzan
    os.environ['MLFLOW_RUN_TAG'] = 'RA'
    os.environ['MLFLOW_RUN_NAME'] = 'SGDARA'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)

    # MSGDARA
    config = Config(attack=at, batch_size=bs)
    config.optimizer = Optimizer.MSGDARA
    config.alpha = 0.1
    config.aggregator = Aggregator.RFA
    config.use_bucketing = True
    config.n_attacking = config.n_byzan
    os.environ['MLFLOW_RUN_TAG'] = 'RA M'
    os.environ['MLFLOW_RUN_NAME'] = 'MSGDARA'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)

    # SEGRA
    config = Config(attack=at, batch_size=bs)
    config.n_iter = int(config.n_iter / 2) + 1
    config.optimizer = Optimizer.SEGRA
    config.aggregator = Aggregator.RFA
    config.use_bucketing = True
    config.n_attacking = config.n_byzan
    os.environ['MLFLOW_RUN_TAG'] = 'RA'
    os.environ['MLFLOW_RUN_NAME'] = 'SEGRA'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)

    # RDEG
    config = Config(attack=at, batch_size=bs)
    config.n_iter = int(config.n_iter / 2) + 1
    config.optimizer = Optimizer.RDEG
    config.aggregator = Aggregator.UnivariateTM
    config.n_attacking = config.n_byzan
    os.environ['MLFLOW_RUN_TAG'] = 'RA'
    os.environ['MLFLOW_RUN_NAME'] = 'RDEG'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)

    # SGDACC
    config = Config(attack=at, batch_size=bs)
    config.n_iter = config.n_iter * 2 - 1
    config.optimizer = Optimizer.SGDACC
    config.aggregator = Aggregator.Mean
    config.sigmaC = 100.
    config.n_attacking = 1
    os.environ['MLFLOW_RUN_TAG'] = 'CC'
    os.environ['MLFLOW_RUN_NAME'] = 'SGDACC'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)

    # SEGCC
    config = Config(attack=at, batch_size=bs)
    config.n_iter = config.n_iter + 1
    config.optimizer = Optimizer.SEGCC
    config.aggregator = Aggregator.Mean
    config.sigmaC = 100.
    config.n_attacking = 1
    os.environ['MLFLOW_RUN_TAG'] = 'CC'
    os.environ['MLFLOW_RUN_NAME'] = 'SEGCC'
    os.environ['MLFLOW_RUN_TITLE'] = '%s, bs=%i' % (at, bs)
    train(config, data)


Trying port 37555
Trying port 40809
Trying port 10837
Trying port 26135
2325         

KeyboardInterrupt: 

Process SpawnProcess-402:
Traceback (most recent call last):
  File "/home/nazya/miniconda3/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 59, in _wrap
    fn(i, *args)
  File "/home/nazya/sgda-ra/gamesopt/train_distributed.py", line 72, in _train
    optimizer.step()
  File "/home/nazya/sgda-ra/gamesopt/optimizer/distributed.py", line 136, in step
    dist.broadcast(self.game.players, src=self.game.master_node)
  File "/home/nazya/miniconda3/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py", line 1084, in broadcast
    work.wait()
RuntimeError: [/pytorch/third_party/gloo/gloo/transport/tcp/pair.cc:598] Connection closed by peer [127.0.1.1]:35724

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/nazya/miniconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/nazya/miniconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
