In [None]:
## Imports
# import os
from pathlib import Path
# import shutil
import warnings
import opendatasets as od
from typing import Optional, Callable, Tuple, Dict
import numpy as np
import matplotlib.pyplot as plt
import torch
# import torchvision
from torch.utils.data import Subset, random_split
from torchvision.datasets.vision import VisionDataset
from torchvision import datasets, transforms

In [None]:
## Simulate the args like in the `main_*.py` files
class ARGS:
    #federated arguments
    # epochs:int = 1000         # rounds of training
    epochs:int = 10           # rounds of training
    num_users:int = 100       # number of users: K
    shard_per_user:int = 2    # classes per user
    frac:float = 0.1          # the fraction of clients: C
    local_ep:int = 1          # the number of local epochs: E
    local_bs:int = 10         # local batch size: B
    bs:int = 128              # test batch size
    lr:float = 0.01           # learning rate
    # results_save:str = "run1"
    momentum:float = 0.5      # SGD momentum (default: 0.5)
    # gpu:int = 0
    split:str = "user"        # train-test split type, user or sample
    # grad_norm:str           # use_gradnorm_avging
    local_ep_pretrain:int = 0 # the number of pretrain local ep
    lr_decay:float = 1.0      # learning rate decay per round

    # model arguments
    model:str = "cnn"          # model name
    kernel_num:int = 9         # number of each kind of kernel
    kernel_sizes:str = "3,4,5" # comma-separated kernel size to use for convolution
    norm:str = "batch_norm"    # batch_norm, layer_norm, or None
    num_filters:int = 32       # number of filters for conv nets
    max_pool:str = True        # whether use max pooling rather than strided convolutions
    num_layers_keep:int = 1    # number layers to keep
    
    # other arguments
    dataset:str = "coba"      # name of dataset
    log_level:str = "info"    # level of logger
    iid:bool = True           # "store_true" #whether iid or not
    num_classes:int = 14      # number of classes
    num_channels:int = 3      # number of channels of images RGB
    gpu:int = 0               # GPU ID, -1 for CPU
    stopping_rounds:int = 10  # rounds of early stopping
    verbose:bool = True       # "store_true"
    print_freq:int = 100      # print loss frequency during training
    seed:int = 1              # random seed (default:1)
    test_freq:int = 1         # how often to test on val set
    load_fed:str = ""         # define pretrained federated model path
    results_save:str = "run1" # define fed results save folder
    start_saving:int = 0      # when to start saving models


args = ARGS()
args.num_users

In [None]:
from utils.coba_dataset import COBA

## Initialize CobaDataset
coba_dataset = COBA(root="data/coba", download=True)

In [None]:
## Create training and testing data -- method 1
train_size = int(0.8 * len(coba_dataset))
test_size = len(coba_dataset) - train_size
train_dataset, test_dataset = random_split(dataset=coba_dataset, lengths=[train_size, test_size])

In [None]:
len(train_dataset.indices)
# label.argmax()
# label_encodings[label.argmax().item()]
# print(f"Label: {label_encodings[label.argmax().item()]}")
# plt.imshow(img)

In [None]:
## Plot random train sample
example_image = coba_dataset[np.random.choice(train_dataset.indices, 1).item()]
img, label = example_image
label_encodings = train_dataset.dataset.class_to_idx

print(f"Label: {label_encodings[label.argmax().item()]}")
plt.imshow(img)

In [None]:
## The random split works!
dups = 0
for index in test_dataset.indices:
    if index in train_dataset.indices:
        dups += 1
print(dups)        

In [None]:
from utils.sampling import iid

In [None]:
dict_users_train = iid(dataset=train_dataset.dataset, args=args)
for user,d in dict_users_train.items():
    print(f"user:{user}\t\t len:{len(d)}")

In [None]:
## Try noniid example
import random
from utils.sampling import noniid

In [None]:
dict_users_train, rand_set_all = noniid(dataset=train_dataset.dataset, args=args)
for user,d in dict_users_train.items():
    print(f"user:{user}\t\t len:{len(d)}")

In [None]:
args.num_users

In [None]:
coba_dataset.class_to_idx

In [None]:
import copy
import pickle
import numpy as np
import pandas as pd
import torch

from utils.options import args_parser
from utils.train_utils import get_data, get_model
from models.Update import LocalUpdate
from models.test import test_img
import os

In [None]:
args.device = torch.device(
        "cuda:{}".format(args.gpu)
        if torch.cuda.is_available() and args.gpu != -1
        else "cpu"
)
args.device

In [None]:
from utils.options import get_logger
from logging import Logger
def main_loop():
    filename: str = "iobt_coba_data_viewer"
    logger: Logger = get_logger(args=args, filename=filename)
    
    logger.log(level=logger.level, msg=f"Log level: {args.log_level.upper()}")
    
    args.device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() and args.gpu != -1 else "cpu"
    )
    
    dataset_train, dataset_test, dict_users_train, dict_users_test = get_data(args)
        
    logger.debug("%s dataset loaded", args.dataset.upper())
    
    base_dir: Path = Path(
        "save",
        args.dataset,
        f"{args.model}_iid{args.iid}_num{args.num_users}_C{args.frac}_le{args.local_ep}",
        f"shard{args.shard_per_user}",
    )
    
    run_num: int = int(args.results_save[-1])
    
    for file in base_dir.glob(pattern="*"):
        if args.results_save[:-1] in file.as_posix():
            run_num += 1
        else:
            break
    
    args.results_save = f"{args.results_save[:-1]}{run_num}"
    
    base_dir = Path(base_dir, args.results_save)
    
    logger.info("Base save directory: %s", base_dir)
    
    if not Path(base_dir, "fed").exists():
        Path(base_dir, "fed").mkdir(exist_ok=True, parents=True)
    
    dict_save_path: Path = Path(base_dir, "dict_users.pkl")
    with open(dict_save_path, "wb") as handle:
        pickle.dump((dict_users_train, dict_users_test), handle)
    
    # build model
    logger.debug("Building Model")
    net_glob = get_model(args)
    logger.debug("Model built\n%s", net_glob)
    
    logger.debug("Setting model to training mode")
    net_glob.train()
    
    # training
    results_save_path: Path = Path(base_dir, "fed/results.csv")
    
    loss_train = []
    net_best = None
    best_loss = None
    best_acc = None
    best_epoch = None
    
    w_glob = None
    m = max(int(args.frac * args.num_users), 1)
    
    lr: float = args.lr
    results: list = []
    
    logger.debug("Starting training loop")
    # for _iter in range(args.epochs):
    for _iter in range(1):
        loss_locals = []
        # w_glob = None
        # m = max(int(args.frac * args.num_users), 1)
        idxs_users = np.random.choice(range(args.num_users), m, replace=False)
        logger.info("Round %3d, lr: %.3f, %s", _iter, lr, idxs_users)
    
        for idx in idxs_users:
            logger.debug("User %i local training", idx)
            local = LocalUpdate(
                args=args, dataset=dataset_train, idxs=dict_users_train[idx]
            )
            logger.debug("\tcreating net_local")
            net_local = copy.deepcopy(net_glob)
            logger.debug("\tnet_local created")
    
            logger.debug("\ttraining to get w_local and loss")
            w_local, loss = local.train(net=net_local.to(args.device))
            logger.debug("\ttraining completed")
    
            logger.debug("\tadding loss to loss_locals")
            loss_locals.append(copy.deepcopy(loss))
    
            if w_glob is None:
                logger.debug("\tcreated w_glob (during User %i)", idx)
                w_glob = copy.deepcopy(w_local)
                # for k, tensor in w_glob.items():
                #         # w_glob[k] = tensor.detach().cpu()
                #         w_glob[k] = tensor.cpu()
            else:
                logger.debug("\tadding w_local[k] to each key k in w_glob[k]")
                for k in w_glob.keys():
                    # w_glob[k] += w_local[k].to("cpu")
                    w_glob[k] += w_local[k]
    
        logger.debug("Modifying lr")
        lr *= args.lr_decay
    
        # update global weights
        logger.debug("Updating global weights")
        for k in w_glob.keys():
            w_glob[k] = torch.div(w_glob[k], m)
    
        # copy weight to net_glob
        logger.debug("Copying weights")
        net_glob.load_state_dict(w_glob)
    
        # print loss
        logger.debug("Calculating Loss")
        loss_avg = sum(loss_locals) / len(loss_locals)
        loss_train.append(loss_avg)
    
        if (_iter + 1) % args.test_freq == 0:
            logger.debug("Evaluating net_glob")
            net_glob.eval()
    
            # pylint: disable=unbalanced-tuple-unpacking
            logger.debug("Calculating acc_test and loss_test")
            # acc_test, loss_test = test_img(net_glob, dataset_test, args)
            acc_test, loss_test, f1_test, precision_test, recall_test = test_img(net_glob, dataset_test, args)
            logger.info(
                # "\tRound %3d, Avg loss %.3f, Test loss %.6f, Test accuracy: %.2f",
                "\tAvg loss %.3f, Test loss %.6f, Test accuracy: %.2f",
                # _iter,
                loss_avg,
                loss_test,
                acc_test,
            )
    
            if best_acc is None or acc_test > best_acc:
                net_best = copy.deepcopy(net_glob)
                best_acc = acc_test
                best_epoch = _iter
    
            # if (iter + 1) > args.start_saving:
            #     model_save_path = os.path.join(base_dir, 'fed/model_{}.pt'.format(_iter + 1))
            #     torch.save(net_glob.state_dict(), model_save_path)
    
            results.append(np.array([_iter, loss_avg, loss_test, acc_test, best_acc]))
            final_results = np.array(results)
            final_results = pd.DataFrame(
                final_results,
                columns=["epoch", "loss_avg", "loss_test", "acc_test", "best_acc"],
            )
            final_results.to_csv(results_save_path, index=False)
    
        if (_iter + 1) % 50 == 0:
            best_save_path: Path = Path(base_dir, f"fed/best_{_iter+1}.pt")
            model_save_path: Path = Path(base_dir, f"fed/model_{_iter+1}.pt")
    
            if args.device.type != "cpu":
                torch.save(
                    net_best.to(torch.device("cpu")).state_dict(), best_save_path
                )
                torch.save(
                    net_glob.to(torch.device("cpu")).state_dict(), model_save_path
                )
            else:
                torch.save(net_best.state_dict(), best_save_path)
                torch.save(net_glob.state_dict(), model_save_path)
    
    logger.info("Best model, iter: %i, acc: %f", best_epoch, best_acc)

In [None]:
main_loop()

In [None]:
## Troubleshooting with GPU errors
# print("Device before")
# for k in w_glob.keys():
#     print(f"\t {w_glob[k].device}")
    
# w_glob = copy.deepcopy(w_local)

# for k,value in w_glob.items():
#     # w_glob[k] = value.detach().cpu()
#     w_glob[k] = value.cpu()

# print("Device after")
# for k in w_glob.keys():
#     print(f"\t {w_glob[k].device}")

In [None]:
## Troubleshooting with Shape errors
# image,label = coba_dataset[0]
# print(image.shape)
# image = image.permute(2,0,1)
# print(image.shape)

In [None]:
imgs, labels = train_dataset.dataset[:]

In [None]:
labels

In [None]:
torch.tensor(list(map(torch.argmax,labels)))

In [None]:
args.device

In [None]:
args.device.type

In [None]:
list(test_dataset.dataset.class_to_idx.keys())

In [None]:
trans_mnist = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

args.num_users = 100
args.num_classes = 10
args.dataset = "mnist"
args.model = "mlp"

dataset_train = datasets.MNIST(
            "data/mnist/", train=True, download=True, transform=trans_mnist
        )
dataset_test = datasets.MNIST(
            "data/mnist/", train=False, download=True, transform=trans_mnist
        )

dict_users_train, rand_set_all = noniid(dataset=dataset_train, args=args)
dict_users_test, rand_set_all = noniid(
                dataset=dataset_test,
                args=args,
                rand_set_all=rand_set_all,
            )
main_loop()

In [None]:
trans_cifar10_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
trans_cifar10_val = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

args.num_users = 100
args.num_classes = 10
args.dataset = "cifar10"
args.model = "cnn"

dataset_train = datasets.CIFAR10(
            "data/cifar10/", train=True, download=True, transform=trans_cifar10_train)
dataset_test = datasets.CIFAR10(
            "data/cifar10/", train=False, download=True, transform=trans_cifar10_val)

dict_users_train, rand_set_all = noniid(dataset=dataset_train, args=args)
dict_users_test, rand_set_all = noniid(
                dataset=dataset_test,
                args=args,
                rand_set_all=rand_set_all,)
main_loop()

In [None]:
## Simulate the args like in the `main_*.py` files
class ARGS:
    #federated arguments
    # epochs:int = 1000         # rounds of training
    epochs:int = 10           # rounds of training
    num_users:int = 100       # number of users: K
    shard_per_user:int = 2    # classes per user
    frac:float = 0.1          # the fraction of clients: C
    local_ep:int = 1          # the number of local epochs: E
    local_bs:int = 10         # local batch size: B
    bs:int = 128              # test batch size
    lr:float = 0.01           # learning rate
    # results_save:str = "run1"
    momentum:float = 0.5      # SGD momentum (default: 0.5)
    # gpu:int = 0
    split:str = "user"        # train-test split type, user or sample
    # grad_norm:str           # use_gradnorm_avging
    local_ep_pretrain:int = 0 # the number of pretrain local ep
    lr_decay:float = 1.0      # learning rate decay per round

    # model arguments
    model:str = "cnn"          # model name
    kernel_num:int = 9         # number of each kind of kernel
    kernel_sizes:str = "3,4,5" # comma-separated kernel size to use for convolution
    norm:str = "batch_norm"    # batch_norm, layer_norm, or None
    num_filters:int = 32       # number of filters for conv nets
    max_pool:str = True        # whether use max pooling rather than strided convolutions
    num_layers_keep:int = 1    # number layers to keep
    
    # other arguments
    dataset:str = "coba"      # name of dataset
    log_level:str = "info"    # level of logger
    iid:bool = True           # "store_true" #whether iid or not
    num_classes:int = 14      # number of classes
    num_channels:int = 3      # number of channels of images RGB
    gpu:int = 0               # GPU ID, -1 for CPU
    stopping_rounds:int = 10  # rounds of early stopping
    verbose:bool = True       # "store_true"
    print_freq:int = 100      # print loss frequency during training
    seed:int = 1              # random seed (default:1)
    test_freq:int = 1         # how often to test on val set
    load_fed:str = ""         # define pretrained federated model path
    results_save:str = "run1" # define fed results save folder
    start_saving:int = 0      # when to start saving models


args = ARGS()

args.device = torch.device(
        "cuda:{}".format(args.gpu)
        if torch.cuda.is_available() and args.gpu != -1
        else "cpu"
)

args.num_users, args.device

## Analyze Results

In [None]:
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# from torch.utils.data import DataLoader

# args.device = torch.device(
#         "cuda:{}".format(args.gpu)
#         if torch.cuda.is_available() and args.gpu != -1
#         else "cpu"
# )

# dataset_train, dataset_test, dict_users_train, dict_users_test = get_data(args)
# if args.dataset == "coba":
#     dataset_train, dataset_test = dataset_train.dataset, dataset_test.dataset

# model = get_model(args)

# model_state_dict_path:str = Path("save","coba","cnn_iidFalse_num98_C0.3_le1","shard2","seed10_coba_fedavg_bestcase_run12","fed","model_1000.pt")

# model.load_state_dict(torch.load(model_state_dict_path))

In [None]:
# data_loader: DataLoader = DataLoader(dataset_test, batch_size=args.bs)
# IS_USING_GPU: bool = args.gpu != -1 and args.device.type != "cpu"

# probs: np.array = np.array([])
# y_preds: np.array = np.array([])
# y_trues: np.array = np.array([])
# coba_cms = []

# for _, (data, target) in enumerate(data_loader):
#     if args.gpu != -1 and args.device.type != "cpu":
#         data, target = data.to(args.device), target.to(args.device)
#     if args.dataset == "coba":
#         data = data.permute(0, 3, 1, 2)

#     log_probs: torch.Tensor = model(data)

#     probs: np.array = (
#             np.append(probs, log_probs.cpu().data.numpy())
#             if IS_USING_GPU
#             else np.append(probs, log_probs.data.numpy())
#     )

#     y_pred: torch.Tensor = (
#             log_probs.cpu().data.max(1, keepdim=True)[1]
#             if args.device.type != "cpu"
#             else log_probs.data.max(1, keepdim=True)[1]
#     )

#     y_true: torch.Tensor = (
#             torch.tensor(
#                 list(map(torch.argmax, target.data)), device="cpu"
#             ).data.view_as(y_pred)
#             if args.dataset == "coba"
#             else target.to("cpu").data.view_as(y_pred)
#     )

#     coba_cms.append(confusion_matrix(y_pred=y_pred,y_true=y_true))

#     y_preds = np.append(y_preds,y_pred)
#     y_trues = np.append(y_trues,y_true)

# y_preds.shape == y_trues.shape

In [None]:
# coba_cm = confusion_matrix(y_pred=y_preds,y_true=y_trues)
# disp = ConfusionMatrixDisplay(confusion_matrix=coba_cm)
# disp.plot()

In [None]:
# coba_cm_gen = (cm for cm in coba_cms)

In [None]:
# disp = ConfusionMatrixDisplay(confusion_matrix=next(coba_cm_gen))
# disp.plot()

In [None]:
# len(coba_cms)

In [None]:
# for data, target in data_loader:
#     print(data.shape)
#     print(target.shape)
#     break

In [None]:
# dataset_test == dataset_train

In [None]:
# coba_dataset: COBA = COBA(root="data/coba", download=True)

# ## Create training and testing data
# train_size: int = int(
#     0.8 * len(coba_dataset)
# )  # maybe TODO: make the percentage customizable (part of `args`)
# test_size: int = len(coba_dataset) - train_size
# dataset_train, dataset_test = random_split(
#     dataset=coba_dataset, lengths=[train_size, test_size]
# )

## Verify Train/Test data differences

In [None]:
coba_dataset: COBA = COBA(root="data/coba", download=True)

In [None]:
train_size: int = int(
            0.8 * len(coba_dataset)
)  # maybe TODO: make the percentage customizable (part of `args`)
test_size: int = len(coba_dataset) - train_size
dataset_train, dataset_test = random_split(
    dataset=coba_dataset, lengths=[train_size, test_size]
)

In [None]:
dataset_train, dataset_test

In [None]:
new_dataset_train = [(img,label) for img,label in dataset_train]
len(new_dataset_train) == train_size

In [None]:
from utils.coba_dataset import COBA_Split
d_train = COBA_Split(dataset=dataset_train)
d_test = COBA_Split(dataset=dataset_test)

len(d_train), len(d_test)

In [None]:
# iid
dict_users_train: Dict[int, set] = iid(
                dataset=d_train, args=args
)
dict_users_test: Dict[int, set] = iid(
    dataset=d_test, args=args
)
for user,d in dict_users_train.items():
    print(f"user:{user}\t\t len:{len(d)}")

In [None]:
# noniid
dict_users_train, rand_set_all = noniid(
                dataset=d_train, args=args
)
dict_users_test, rand_set_all = noniid(
    dataset=d_test,
    args=args,
    rand_set_all=rand_set_all,
)

In [None]:
for user,d in dict_users_train.items():
    print(f"user:{user}\t\t len:{len(d)}")