# TODO:
1. Нужно random seed в саму генерацию масок прокинуть, а то там одна и та же маска на слой будет генерится. Или это норм?

In [1]:
import sys
from pathlib import Path

def find_project_root(start_path: Path = Path.cwd(), marker: str = 'pyproject.toml') -> Path:
    current_path = start_path.resolve()
    for parent in [current_path] + list(current_path.parents):
        if (parent / marker).exists():
            return parent
        
def add_project_root_to_sys_path(marker: str = 'pyproject.toml'):
    project_root = find_project_root(marker=marker)
    if str(project_root) not in sys.path:
        sys.path.insert(0, str(project_root))

add_project_root_to_sys_path()


# Imports

In [2]:
import torch
import torch.nn as nn
import wandb
from tqdm import tqdm
from torch.utils.data import DataLoader
from src.asym_ensembles.data_loaders import load_california_housing, load_wine_quality
from src.asym_ensembles.modeling.training import (
    set_global_seed,
    train_one_model,
    evaluate_model,
    evaluate_ensemble
)
from src.asym_ensembles.modeling.models import MLP, WMLP

# Config

In [28]:

config = {
    "task_type": "regression", # "regression" or "classification"
    "batch_size": 64,
    "epochs": 10,
    "lr": 1e-3,
    "hidden_dim": 128,
    "num_layers": 4,
    "ensemble_sizes": [2,4,8,16,32,64],
    "total_models": 64, # max(ensemble_sizes)
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "mask_params": {
    0: {'mask_constant': 1, 'mask_type': 'random_subsets', 'do_normal_mask': True, 'num_fixed': 4},
    1: {'mask_constant': 1, 'mask_type': 'random_subsets', 'do_normal_mask': True, 'num_fixed': 64},
    2: {'mask_constant': 1, 'mask_type': 'random_subsets', 'do_normal_mask': True, 'num_fixed': 64},
    3: {'mask_constant': 1, 'mask_type': 'random_subsets', 'do_normal_mask': True, 'num_fixed': 64},
}
}

In [29]:
wandb.init(project="DeepEnsembleProject", config=config, name="DeepEnsembles")

# Loading Data

In [30]:
if config["task_type"] == "regression":
    train_ds, val_ds, test_ds = load_california_housing()
    criterion = nn.MSELoss()
else:
    train_ds, val_ds, test_ds = load_wine_quality()
    criterion = nn.CrossEntropyLoss()

train_loader = DataLoader(train_ds, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(val_ds, batch_size=config["batch_size"], shuffle=False)
test_loader = DataLoader(test_ds, batch_size=config["batch_size"], shuffle=False)

in_dim = train_ds.tensors[0].shape[1]
if config["task_type"] == "regression":
    out_dim = 1
else:
    out_dim = len(torch.unique(train_ds.tensors[1]))

# Train base estimators (MLP and WMLP)

In [31]:
mlp_models = []
wmlp_models = []
mlp_times = []
wmlp_times = []

In [35]:
for i in tqdm(range(config["total_models"])):
    seed_value = 1000 + i
    set_global_seed(seed_value)
    mlp = MLP(in_dim, config["hidden_dim"], out_dim, config["num_layers"], norm=None)

    optimizer = torch.optim.AdamW(mlp.parameters(), lr=config["lr"])
    mlp, train_time, train_losses, val_losses = train_one_model(
        mlp, train_loader, val_loader, criterion, optimizer,
        device=config["device"], epochs=config["epochs"]
    )
    mlp_models.append(mlp)
    mlp_times.append(train_time)
    wandb.log({f"MLP_{i}_train_time": train_time})

    metric_mlp = evaluate_model(mlp, test_loader, criterion, config["device"], task_type=config["task_type"])
    wandb.log({f"MLP_{i}_test_metric": metric_mlp})


    seed_value_wmlp = 2000 + i
    set_global_seed(seed_value_wmlp)

    wmlp = WMLP(in_dim, config["hidden_dim"], out_dim, config["num_layers"], config["mask_params"], norm=None)

    optimizer_wmlp = torch.optim.AdamW(wmlp.parameters(), lr=config["lr"])
    wmlp, wmlp_train_time, train_losses_w, val_losses_w = train_one_model(
        wmlp, train_loader, val_loader, criterion, optimizer_wmlp,
        device=config["device"], epochs=config["epochs"]
    )
    wmlp_models.append(wmlp)
    wmlp_times.append(wmlp_train_time)

    wandb.log({f"WMLP_{i}_train_time": wmlp_train_time})

    metric_wmlp = evaluate_model(wmlp, test_loader, criterion, config["device"], task_type=config["task_type"])
    wandb.log({f"WMLP_{i}_test_metric": metric_wmlp})

100%|██████████| 10/10 [00:01<00:00,  5.11it/s]
100%|██████████| 10/10 [00:02<00:00,  4.27it/s]
  0%|          | 0/64 [00:04<?, ?it/s]


# Building ensembles

In [None]:
for ensemble_size in config["ensemble_sizes"]:
    # Take first ensemble_size models from mlp_models
    mlp_sub = mlp_models[:ensemble_size]
    mlp_ens_metric = evaluate_ensemble(mlp_sub, test_loader, config["device"], config["task_type"])
    wandb.log({f"MLP_ensemble_{ensemble_size}": mlp_ens_metric})

    # WMLP
    wmlp_sub = wmlp_models[:ensemble_size]
    wmlp_ens_metric = evaluate_ensemble(wmlp_sub, test_loader, config["device"], config["task_type"])
    wandb.log({f"WMLP_ensemble_{ensemble_size}": wmlp_ens_metric})


In [None]:
wandb.finish()