# Imports

In [12]:
import sys

sys.path.append("..")
from pathlib import Path
from typing import Optional, Dict, List

import os
import torch

from utils.train_utils import get_model

# Steps

1. Get paths of models (given a directory)
1. Store paths of models
1. For each model...
   1. Load into memory
   2. Test model
   3. Compare to the current best models for each performance metric, replacing if necessary
1. Save best models to `best_models.csv`

# Dynamically Selecting the Best Models

## 1. Get paths of models (given a directory)

In [10]:
## Simulate the args like in the `main_*.py` files
class ARGS:
    # federated arguments
    # epochs:int = 1000         # rounds of training
    epochs: int = 10  # rounds of training
    train_test_same: int = 0  # use same testing for
    num_users: int = 100  # number of users: K
    shard_per_user: int = 2  # classes per user
    frac: float = 0.1  # the fraction of clients: C
    local_ep: int = 1  # the number of local epochs: E
    local_bs: int = 10  # local batch size: B
    bs: int = 128  # test batch size
    lr: float = 0.01  # learning rate
    # results_save:str = "run1"
    momentum: float = 0.5  # SGD momentum (default: 0.5)
    # gpu:int = 0
    split: str = "user"  # train-test split type, user or sample
    # grad_norm:str           # use_gradnorm_avging
    local_ep_pretrain: int = 0  # the number of pretrain local ep
    lr_decay: float = 1.0  # learning rate decay per round

    # model arguments
    model: str = "cnn"  # model name
    kernel_num: int = 9  # number of each kind of kernel
    kernel_sizes: str = "3,4,5"  # comma-separated kernel size to use for convolution
    norm: str = "batch_norm"  # batch_norm, layer_norm, or None
    num_filters: int = 32  # number of filters for conv nets
    max_pool: str = True  # whether use max pooling rather than strided convolutions
    num_layers_keep: int = 1  # number layers to keep

    # other arguments
    dataset: str = "coba"  # name of dataset
    log_level: str = "info"  # level of logger
    iid: bool = True  # "store_true" #whether iid or not
    num_classes: int = 14  # number of classes
    num_channels: int = 3  # number of channels of images RGB
    gpu: int = 0  # GPU ID, -1 for CPU
    stopping_rounds: int = 10  # rounds of early stopping
    verbose: bool = True  # "store_true"
    print_freq: int = 100  # print loss frequency during training
    seed: int = 1  # random seed (default:1)
    test_freq: int = 1  # how often to test on val set
    load_fed: str = ""  # define pretrained federated model path
    results_save: str = "run1"  # define fed results save folder
    start_saving: int = 0  # when to start saving models


args = ARGS()

args.device = torch.device(
    "cuda:{}".format(args.gpu)
    if torch.cuda.is_available() and args.gpu != -1
    else "cpu"
)

args.num_users, args.device

(100, device(type='cuda', index=0))

In [2]:
SCENARIO: str = "best"  # the scenario we're interested in
SEED: int = 0  # the seed of the experiment we're interested in analyzing
chosen_scenario_dir: Optional[Path] = None
experiment_run_dir: Path = Path(
    Path.cwd().parent, "save", "coba_legacy"
)  # could also be: "coba", "mnist", "cifar10"

In [3]:
# Get models according to the chosen scenario
async_fl_scenarios: Dict[str, str] = {"0.3": "best", "0.5": "average", "1.0": "worst"}

print("Async FL Scenarios:")
for dir in experiment_run_dir.glob("*"):
    scenario_percent: str = (
        dir.as_posix().split(os.sep)[-1].split("_")[-2].replace("C", "")
    )
    print(f"\t{scenario_percent} -> {async_fl_scenarios[scenario_percent]}")

    if async_fl_scenarios[scenario_percent] == SCENARIO:
        chosen_scenario_dir = dir

print(f"{SCENARIO.title()} case scenario directory: '{chosen_scenario_dir.as_posix()}'")

Async FL Scenarios:
	0.3 -> best
	0.5 -> average
	1.0 -> worst
Best case scenario directory: '/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1'


In [4]:
# Get paths to models in the chosen scenario
all_chosen_scenario_runs_dir: Path = Path(chosen_scenario_dir, "shard2")
all_models_dir: Optional[Path] = None

for dir in all_chosen_scenario_runs_dir.glob("*"):
    seed: int = int(dir.as_posix().split(os.sep)[-1].split("_")[0].split("d")[-1])

    if SEED == seed:
        all_models_dir = Path(dir, "fed")

# Raise error if no directory is found with the given SEED
if all_models_dir is None:
    raise FileNotFoundError(
        f"Directory with the provided seed '{SEED}' does not exist. Please choose a different one and try again."
    )


print(
    [
        model_file.as_posix().split(os.sep)[-1]
        for model_file in all_models_dir.glob("*.pt")
    ]
)

['best_450.pt', 'best_650.pt', 'model_350.pt', 'best_800.pt', 'model_300.pt', 'model_500.pt', 'best_350.pt', 'best_550.pt', 'best_900.pt', 'best_50.pt', 'best_150.pt', 'model_850.pt', 'model_750.pt', 'best_700.pt', 'best_100.pt', 'model_650.pt', 'model_200.pt', 'best_1000.pt', 'model_450.pt', 'model_50.pt', 'best_600.pt', 'best_400.pt', 'best_250.pt', 'model_900.pt', 'best_300.pt', 'model_800.pt', 'model_550.pt', 'model_600.pt', 'best_850.pt', 'best_500.pt', 'model_150.pt', 'best_750.pt', 'model_100.pt', 'model_250.pt', 'model_700.pt', 'model_1000.pt', 'model_400.pt', 'best_200.pt', 'best_950.pt', 'model_950.pt']


## 2. Store paths of models

In [5]:
# model_paths:List[Path] = sorted([model_file for model_file in all_models_dir.glob("*.pt")],key=lambda s: int(s.as_posix().split(os.sep)[-1].split("_")[-1].replace(".pt","")))
model_paths: List[Path] = [model_file for model_file in all_models_dir.glob("*.pt")]
model_paths

[PosixPath('/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run2/fed/best_450.pt'),
 PosixPath('/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run2/fed/best_650.pt'),
 PosixPath('/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run2/fed/model_350.pt'),
 PosixPath('/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run2/fed/best_800.pt'),
 PosixPath('/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run

## 3. For each model...

1. Load into memory
2. Test model
3. Compare to the current best models for each performance metric, replacing if necessary

In [8]:
best_models: Dict[str, Path] = {}
for model_path in model_paths:
    # TODO: Load into memory
    model = get_model(args)

    model.load_state_dict(
        torch.load(model_path)
    ) if args.device.type != "cpu" else model.load_state_dict(
        torch.load(model_state_dict_path, map_location=torch.device("cpu"))
    )

    # TODO: Test model
    # TODO: Compare to the current best models for each performance metric, replacing if necessary
    break

/home/tank/Coding-Practice/objective3-fl-experiment/dynamic-model-selection-for-async-fl/save/coba_legacy/cnn_iidFalse_num98_C0.3_le1/shard2/seed0_coba_fedavg_bestcase_run2/fed/best_450.pt


## 4. Save best models

In [7]:
best_models_filename: str = "best_models.csv"