In [1]:
import os
import itertools
from attrs import define
# from code.train import train
from code.train_dist import train
from code.optimizers.base import Optimizer
# from code.problems import Problem
from code.problem import Loss
from code.datasets import Dataset
from code.models import Model

# %matplotlib widget
%load_ext autoreload
%autoreload 2

In [2]:
def zip_dict(**kwargs):
    keys = kwargs.keys()
    for instance in zip(*kwargs.values()):
        yield dict(zip(keys, instance))


def product_dict(**kwargs):
    keys = kwargs.keys()
    for instance in itertools.product(*kwargs.values()):
        yield dict(zip(keys, instance))

In [3]:
# os.environ["OMP_NUM_THREADS"] = "1"
# os.environ["MKL_NUM_THREADS"] = "1"
os.environ["MKL_THREADING_LAYER"] = "AMD"  # move to train dist
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["TORCH_DEVICE"] = "cuda:3"

In [4]:
os.environ['MLFLOW_VERBOSE'] = 'True'
os.environ['MLFLOW_CHECK_EXIST'] = 'True'
os.environ['MLFLOW_EXPERIMENT_NAME'] = os.path.basename(os.getcwd())

# 4 classes CIFAR10

In [5]:
@define
class BaseConfig():
    # nepochs: int = 3
    niters:          int = 2500
    npeers:          int = 20
    seed:            int = 0

    loss:           Loss = Loss.CrossEntropyLoss
    model:         Model = Model.ResNet18
    dataset:     Dataset = Dataset.CIFAR10

    nsamples:        int = 1000
    hratio:        float = None

    optimizer: Optimizer = None
    batchsize:       int = 200
    lr:            float = 1e-2

    trueweights:    bool = None

    mdniters_:       int = None
    mdlr_:           int = None
    mdfull_:        bool = None


In [None]:
args_grid = dict(
    # hratio=[None], 
    hratio=[0.99, 0.9, 0.7, 0.5],
    mdlr_=[0.05],
    # 'seed': [0],
)

for d in product_dict(**args_grid):

    os.environ['MLFLOW_RUN_NAME'] = 'SGD Full'
    config = BaseConfig(**d)
    config.mdlr_ = None
    config.optimizer = Optimizer.SGD
    config.trueweights = False
    # %time train(config)
    %time train(config)

    os.environ['MLFLOW_RUN_NAME'] = 'SGD Ideal'
    config = BaseConfig(**d)
    config.mdlr_ = None
    config.hratio = None
    config.optimizer = Optimizer.SGD
    config.trueweights = True
    %time train(config)

#     os.environ['MLFLOW_RUN_NAME'] = 'MeritFed MD'
#     config = BaseConfig(**d)
#     config.optimizer = Optimizer.MeritFed
#     config.mdfull_ = True
#     config.mdniters_ = 20
#     # config.md_lr_ = 0.05
#     %time train(config)

#     os.environ['MLFLOW_RUN_NAME'] = 'MeritFed SMD'
#     config = BaseConfig(**d)
#     config.optimizer = Optimizer.MeritFed
#     config.mdfull_ = False
#     config.mdniters_ = 20
#     # config.md_lr_ = 0.05
#     %time train(config)


CPU times: user 43.6 ms, sys: 0 ns, total: 43.6 ms
Wall time: 42.7 ms
CPU times: user 22.6 ms, sys: 15.6 ms, total: 38.2 ms
Wall time: 38.2 ms
Iterations left: 2448         

In [None]:
Epoch: 3/3.. Training Loss: 1.50394,

In [None]:
import torch
import random as random
import code.datasets
from collections import defaultdict
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as transforms
import torchvision.datasets as datasets


root = '/tmp'
transform = transforms.ToTensor()
# train_data = datasets.CIFAR10(root=root, train=True, download=True, transform=transform)
train_data = datasets.MNIST(root=root, train=True, download=True, transform=transform)

cfg = BaseConfig(hratio=0.5)

In [None]:
# def split(dset, cfg):

#     nclasses = 3
#     d = defaultdict(list)
#     m = len(dset)
#     for i, c in enumerate(dset.classes):
#         indices = [j for j, x in enumerate(dset.targets) if x == i]
#         indices = np.array(indices)
#         np.random.shuffle(indices)
#         # d[dset.classes[i]] = indices
#         d[i] = indices
#         m = min(m, len(indices))

#     for i, _ in enumerate(dset.targets):
#         dset.targets[i] %= nclasses

    
#     target_rank_below, near_target_rank_below = 1, 11

#     trueweights = torch.zeros(cfg.npeers)
#     trueweights[:target_rank_below] = 1 / target_rank_below
    
#     indices_split = [list() for _ in range(cfg.npeers)]

#     hratio = 1.
#     m //= 2
#     for rank, _ in enumerate(indices_split):
#         if rank < target_rank_below:
#             for i in range(nclasses):
#                 indices_split[rank] += d[i][:m].tolist()
#         elif target_rank_below <= rank and rank < near_target_rank_below:
#             for i in range(nclasses):
#                 n = int(m * cfg.hratio)
#                 indices_split[rank] += d[i][m:m+n].tolist()
#                 indices_split[rank] += d[i+nclasses][:m-n].tolist()
#         else:
#             for i in range(nclasses):
#                 indices_split[rank] += d[i+2*nclasses][:m].tolist()
    
#     return indices_split

# indices_split = split(train_data, cfg)

In [None]:
import random as random
def split(dset, npeers, hratio):

    nclasses = 3
    d = defaultdict(list)
    m = len(dset)
    for i, c in enumerate(dset.classes):
        indices = [j for j, x in enumerate(dset.targets) if x == i]
        # indices = np.array(indices)
        random.shuffle(indices)
        # shuffle(indices)
        # d[dset.classes[i]] = indices
        d[i] = indices
        m = min(m, len(indices))

    for i, _ in enumerate(dset.targets):
        dset.targets[i] %= nclasses

    
    target_rank_below, near_target_rank_below = 1, 11

    trueweights = torch.zeros(npeers)
    trueweights[:target_rank_below] = 1 / target_rank_below
    
    indices_split = [list() for _ in range(npeers)]

    m //= 2
    for rank, _ in enumerate(indices_split):
        if rank < target_rank_below:
            for i in range(nclasses):
                indices_split[rank] += d[i][:m]#.tolist()
        elif target_rank_below <= rank and rank < near_target_rank_below:
            for i in range(nclasses):
                n = int(m * hratio)
                indices_split[rank] += d[i][m:m+n]#.tolist()
                indices_split[rank] += d[i+nclasses][:m-n]#.tolist()
        else:
            for i in range(nclasses):
                indices_split[rank] += d[i+2*nclasses][:m]#.tolist()
    for i in indices_split:
        print(f"{i=}")
    return [Subset(dset, inds) for inds in indices_split]


In [None]:
dataloader_kwargs = {'batch_size': config.batchsize,
                         'shuffle': True}
use_cuda = os.environ["TORCH_DEVICE"] != 'cpu'
if use_cuda:
    dataloader_kwargs.update({'num_workers': 0,
                              'pin_memory': True})



train_loaders = dict()
train_split = split(train_data, 20, 0.5)
for i, inds in enumerate(indices_split):
    # print(f"{inds=}")
    print(f"{len(inds)=}")
    train_loaders[i] = DataLoader(Subset(dset, inds), **dataloader_kwargs)
    
    

In [None]:
x = [[i] for i in range(10)]
shuffle(x)
print(x)

# print(x)  gives  [[9], [2], [7], [0], [4], [5], [3], [1], [8], [6]]

In [None]:
x = [[i] for i in range(10)]
random.shuffle(x)
print(x)

# print(x)  gives  [[9], [2], [7], [0], [4], [5], [3], [1], [8], [6]]

In [None]:
test_data = datasets.MNIST(root=root, train=False, download=True, transform=transform)

indices = indices[mdnsamples:]
n = len(indices)
a = int(np.floor(n / n_workers))
top_ind = a * n_workers
seq = range(a, top_ind, a)
split = np.split(indices[:top_ind], seq)


val_data = Subset(train_data, indices=indices[:n_val])

test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

In [None]:
loader = DataLoader(Subset(dset, d[0]), **dataloader_kwargs)
for i in loader:
    # print(f"{i[1]=}")
    pass

In [None]:

def load_data(dataset_name):

    if dataset_name == 'mnist':

        transform = transforms.ToTensor()

        train_data = datasets.MNIST(root='data', train=True,
                                    download=True, transform=transform)

        test_data = datasets.MNIST(root='data', train=False,
                                   download=True, transform=transform)
    elif dataset_name == 'cifar10':

        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

        train_data = datasets.CIFAR10(root='data', train=True,
                                      download=True, transform=transform)
        
        print(f"{len(train_data.classes)=}")

        test_data = datasets.CIFAR10(root='data', train=False,
                                     download=True, transform=transform)
    elif dataset_name == 'cifar100':
        transform = transforms.ToTensor()  # add extra transforms
        train_data = datasets.CIFAR100(root='data', train=True,
                                       download=True, transform=transform)

        test_data = datasets.CIFAR100(root='data', train=False,
                                      download=True, transform=transform)
    else:
        raise ValueError(dataset_name + ' is not known.')

    return train_data, test_data


train_loader_workers, val_loader, test_loader = create_loaders('cifar10', 3, 2)


In [None]:
train_loader_workers