In [None]:
## Imports
from pathlib import Path
import sys

sys.path.append("..")

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
import torch

from utils.train_utils import get_data, get_model

In [None]:
## Simulate the args like in the `main_*.py` files
class ARGS:
    # federated arguments
    # epochs:int = 1000         # rounds of training
    epochs: int = 10  # rounds of training
    train_test_same: int = 0  # use same testing for
    num_users: int = 100  # number of users: K
    shard_per_user: int = 2  # classes per user
    frac: float = 0.1  # the fraction of clients: C
    local_ep: int = 1  # the number of local epochs: E
    local_bs: int = 10  # local batch size: B
    bs: int = 128  # test batch size
    lr: float = 0.01  # learning rate
    # results_save:str = "run1"
    momentum: float = 0.5  # SGD momentum (default: 0.5)
    # gpu:int = 0
    split: str = "user"  # train-test split type, user or sample
    # grad_norm:str           # use_gradnorm_avging
    local_ep_pretrain: int = 0  # the number of pretrain local ep
    lr_decay: float = 1.0  # learning rate decay per round

    # model arguments
    model: str = "cnn"  # model name
    kernel_num: int = 9  # number of each kind of kernel
    kernel_sizes: str = "3,4,5"  # comma-separated kernel size to use for convolution
    norm: str = "batch_norm"  # batch_norm, layer_norm, or None
    num_filters: int = 32  # number of filters for conv nets
    max_pool: str = True  # whether use max pooling rather than strided convolutions
    num_layers_keep: int = 1  # number layers to keep

    # other arguments
    dataset: str = "coba"  # name of dataset
    log_level: str = "warning"  # level of logger
    iid: bool = True  # "store_true" #whether iid or not
    num_classes: int = 14  # number of classes
    num_channels: int = 3  # number of channels of images RGB
    gpu: int = 0  # GPU ID, -1 for CPU
    stopping_rounds: int = 10  # rounds of early stopping
    verbose: bool = True  # "store_true"
    print_freq: int = 100  # print loss frequency during training
    seed: int = 1  # random seed (default:1)
    test_freq: int = 1  # how often to test on val set
    load_fed: str = ""  # define pretrained federated model path
    results_save: str = "run1"  # define fed results save folder
    start_saving: int = 0  # when to start saving models


args = ARGS()

args.device = torch.device(
    "cuda:{}".format(args.gpu)
    if torch.cuda.is_available() and args.gpu != -1
    else "cpu"
)

args.num_users, args.device

In [None]:
dataset_train, dataset_test, dict_users_train, dict_users_test = get_data(args)

model = get_model(args)

model_state_dict_path: str = Path(
    "../save",
    "coba_legacy",
    "cnn_iidFalse_num98_C0.3_le1",
    "shard2",
    "seed10_coba_fedavg_bestcase_run12",
    "fed",
    "model_1000.pt",
)

model.load_state_dict(
    torch.load(model_state_dict_path)
) if args.device.type != "cpu" else model.load_state_dict(
    torch.load(model_state_dict_path, map_location=torch.device("cpu"))
)

In [None]:
data_loader: DataLoader = DataLoader(dataset_test, batch_size=args.bs)
IS_USING_GPU: bool = args.gpu != -1 and args.device.type != "cpu"

y_preds: np.array = np.array([])
y_trues: np.array = np.array([])
# coba_cms = []

for _, (data, target) in enumerate(data_loader):
    if args.gpu != -1 and args.device.type != "cpu":
        data, target = data.to(args.device), target.to(args.device)
    if args.dataset == "coba":
        data = data.permute(0, 3, 1, 2)

    log_probs: torch.Tensor = model(data)

    y_pred: torch.Tensor = (
        log_probs.cpu().data.max(1, keepdim=True)[1]
        if args.device.type != "cpu"
        else log_probs.data.max(1, keepdim=True)[1]
    )

    y_true: torch.Tensor = (
        torch.tensor(list(map(torch.argmax, target.data)), device="cpu").data.view_as(
            y_pred
        )
        if args.dataset == "coba"
        else target.to("cpu").data.view_as(y_pred)
    )

    # coba_cms.append(confusion_matrix(y_pred=y_pred,y_true=y_true))

    y_preds = np.append(y_preds, y_pred)
    y_trues = np.append(y_trues, y_true)

y_preds.shape == y_trues.shape

In [None]:
coba_cm = confusion_matrix(y_pred=y_preds, y_true=y_trues)
disp = ConfusionMatrixDisplay(confusion_matrix=coba_cm)
disp.plot()
disp.ax_.set_title("COBA Confusion Matrix")
print(dataset_train.class_to_idx)
# plt.savefig("confusion_matrix_example.png")

### Visualize Results

In [None]:
coba_results_path: str = Path(
    "../save",
    "coba_legacy",
    "cnn_iidFalse_num98_C0.3_le1",
    "shard2",
    "seed10_coba_fedavg_bestcase_run12",
    "fed",
    "results.csv",
)
coba_results_df: pd.DataFrame = pd.read_csv(coba_results_path, delimiter=",")
coba_results_df

In [None]:
coba_results_df.describe()

In [None]:
args.num_users = 98
args.frac = "0.3"
args.iid = False
args.log_level = "info"  # "debug" might crash the notebook
args.seed = 0
args.results_save = "coba_fedavg_bestcase_run2"

base_dir: Path = Path(
    Path.cwd().parent,
    "save",
    "coba_legacy",
    f"{args.model}_iid{args.iid}_num{args.num_users}_C{args.frac}_le{args.local_ep}",
    f"shard{args.shard_per_user}",
)


base_dir = Path(base_dir, f"seed{args.seed}_{args.results_save}")
base_dir

In [None]:
from utils.train_utils import save_metrics_graphs

save_metrics_graphs(base_dir=base_dir, df=coba_results_df)

In [None]:
def graph_adjusted(base_dir: Path, col_name: str, df: pd.DataFrame) -> None:
    graphs_dir: Path = Path(base_dir, "fed", "metrics_graphs")
    graphs_dir.mkdir(exist_ok=True)

    graph_save_file: Path = Path(graphs_dir, f"{col_name.title()}.png")

    adjusted_col = (
        df.groupby(np.arange(len(df)) // 10).mean()[col_name].values
    )  # averaged every 10 epochs
    epochs = np.arange(len(df) // 10) * 10
    sns.lineplot(x=epochs, y=adjusted_col).set(
        title=col_name.title(),
        xlabel="Epochs",
        ylabel="Value",
    )
    # plt.savefig(graph_save_file)
    # plt.clf()

In [None]:
graph_adjusted(col_name="best_acc", base_dir=base_dir, df=coba_results_df)

In [None]:
graph_adjusted(col_name="acc_test", base_dir=base_dir, df=coba_results_df)

In [None]:
graph_adjusted(col_name="loss_test", base_dir=base_dir, df=coba_results_df)

In [None]:
graph_adjusted(col_name="f1_test", base_dir=base_dir, df=coba_results_df)

In [None]:
graph_adjusted(col_name="precision_test", base_dir=base_dir, df=coba_results_df)

In [None]:
graph_adjusted(
    col_name="recall_test",
    base_dir=base_dir,
    df=coba_results_df,
)