# Setup Ensemble Model using weighted average of Predictions
## Imports

In [1]:
import helpers.set_path

from src.data.load_data import get_val_loader, get_test_loader, classes
from src.data.format_submissions import format_submissions

from pytorch_lightning import Trainer, seed_everything
import torch

import pandas as pd
import numpy as np
import wandb
from sklearn.metrics import log_loss, f1_score
from typing import List
from pytorch_lightning.loggers import WandbLogger

# Models
from src.models import Resnet50Model
from src.models import EfficientnetV2Model
from src.models import DensenetModel
from src.models import ViTModel
from src.models import VGG19Model
from src.models import InceptionV3Model

# Ensemble
from src.models import EnsembleModel

  warn(
  from .autonotebook import tqdm as notebook_tqdm
2023-06-13 17:21:31.740678: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-13 17:21:31.785754: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## Load the models to be averaged

In [2]:
def load_models_avg(model_paths: List[str]):
    models = []
    for i in range(len(model_paths)):
        if model_paths[i].startswith("../models/Resnet"):
            model = Resnet50Model.load_from_checkpoint(model_paths[i])
            models.append(model)
        elif model_paths[i].startswith("../models/Efficient"):
            model = EfficientnetV2Model.load_from_checkpoint(model_paths[i])
            models.append(model)
        elif model_paths[i].startswith("../models/Dense"):
            model = DensenetModel.load_from_checkpoint(model_paths[i])
            models.append(model)
        elif model_paths[i].startswith("../models/ViT"):
            model = ViTModel.load_from_checkpoint(model_paths[i])
            models.append(model)
        elif model_paths[i].startswith("../models/VGG"):
            model = VGG19Model.load_from_checkpoint(model_paths[i])
            models.append(model)
        elif model_paths[i].startswith("../models/InceptionV3Model"):
            model = InceptionV3Model.load_from_checkpoint(model_paths[i])
            models.append(model)

    return models

In [3]:
def get_sweep_name(model_paths: List[str]):
    sweep_name = ""
    for path in model_paths:
        filename = path.split("/")[-1]
        name = filename.split("_")[0]
        name = name[:-5]
        sweep_name += name + "_"
    return sweep_name[:-1]

In [4]:
model_paths = ["../models/DensenetModel.ckpt",
               "../models/EfficientnetV2Model.ckpt",
               "../models/InceptionV3Model.ckpt"]

models = load_models_avg(model_paths)
sweep_name = get_sweep_name(model_paths)



## Sweep

In [5]:
sweep_config = {
    "name": "ensemble_average",
    "method": "grid",
    "parameters": {
        "weights": {"values": [[0.1, 0.1, 0.8],
                               [0.1, 0.8, 0.1],
                               [0.8, 0.1, 0.1],

                               [0.2, 0.2, 0.6],
                               [0.2, 0.6, 0.2],
                               [0.6, 0.2, 0.2],

                               [0.3, 0.3, 0.4],
                               [0.3, 0.4, 0.3],
                               [0.4, 0.3, 0.3],

                               [0.33, 0.33, 0.34]
                               ],
                               },
                            },
}

In [6]:
sweep_id = wandb.sweep(sweep=sweep_config, project="ccv1", entity="safari_squad")

def train_sweep():
    seed_everything(42)
    torch.set_float32_matmul_precision('high')
    
    run = wandb.init(project="ccv1", entity="safari_squad", config=sweep_config, name=sweep_name)
    config = run.config
    
    val_dataloader = get_val_loader(64, 224, 0.05)
    test_dataloader = get_test_loader(64, 224, 0.05)
        
    ensemble_model = EnsembleModel(models, config.weights)

    trainer = Trainer(accelerator="gpu")
    
    avg_preds = trainer.predict(ensemble_model, val_dataloader)
    
    avg_preds_numpy = [batch[1].cpu().detach().numpy() for batch in avg_preds] 
    avg_preds_numpy = np.concatenate(avg_preds_numpy)
    
    true_labels = [batch[2].numpy() for batch in avg_preds]
    true_labels = np.concatenate(true_labels)

    log_loss_value = log_loss(true_labels, avg_preds_numpy)

    avg_preds_numpy_argmax = np.argmax(avg_preds_numpy, axis=1)
    true_labels_argmax = np.argmax(true_labels, axis=1)


    f1_weighted = f1_score(true_labels_argmax, avg_preds_numpy_argmax, average="weighted")
    f1_macro = f1_score(true_labels_argmax, avg_preds_numpy_argmax, average="macro")
    f1_micro = f1_score(true_labels_argmax, avg_preds_numpy_argmax, average="micro")

    wandb.log({"log_loss": log_loss_value,
                "f1_weighted": f1_weighted,
                "f1_macro": f1_macro,
                "f1_micro": f1_micro})

    test_preds = trainer.predict(ensemble_model, test_dataloader)
    test_preds_numpy = [batch[1].cpu().detach().numpy() for batch in test_preds]
    test_preds_numpy = np.concatenate(test_preds_numpy)

    test_ids = [list(batch[0]) for batch in test_dataloader]
    test_ids = np.concatenate(test_ids)

    test_preds_df = pd.DataFrame(test_preds_numpy, columns=classes)
    test_preds_df.insert(0, "id", test_ids)
    
    wandb.log({"submission": wandb.Table(dataframe=test_preds_df)})
    
    wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 8vjbgkku
Sweep URL: https://wandb.ai/safari_squad/ccv1/sweeps/8vjbgkku


In [7]:
wandb.agent(sweep_id, function=train_sweep)

[34m[1mwandb[0m: Agent Starting Run: 58uzw799 with config:
[34m[1mwandb[0m: 	weights: [0.1, 0.1, 0.8]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mstudentluca[0m ([33msafari_squad[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:10<00:00,  5.05it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.83it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.59543
f1_micro,0.63382
f1_weighted,0.63382
log_loss,1.06379


[34m[1mwandb[0m: Agent Starting Run: 215evw24 with config:
[34m[1mwandb[0m: 	weights: [0.1, 0.8, 0.1]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.92it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.63605
f1_micro,0.675
f1_weighted,0.67592
log_loss,0.95366


[34m[1mwandb[0m: Agent Starting Run: 40uasq4j with config:
[34m[1mwandb[0m: 	weights: [0.8, 0.1, 0.1]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.94it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.85it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.60605
f1_micro,0.64324
f1_weighted,0.64272
log_loss,1.03133


[34m[1mwandb[0m: Agent Starting Run: 7bgtku8i with config:
[34m[1mwandb[0m: 	weights: [0.2, 0.2, 0.6]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.95it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.60854
f1_micro,0.64735
f1_weighted,0.64762
log_loss,1.00506


[34m[1mwandb[0m: Agent Starting Run: hf1z339c with config:
[34m[1mwandb[0m: 	weights: [0.2, 0.6, 0.2]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.92it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.84it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.64163
f1_micro,0.67941
f1_weighted,0.68081
log_loss,0.94891


[34m[1mwandb[0m: Agent Starting Run: th5i9q8b with config:
[34m[1mwandb[0m: 	weights: [0.6, 0.2, 0.2]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.96it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.61873
f1_micro,0.65824
f1_weighted,0.65836
log_loss,0.991


[34m[1mwandb[0m: Agent Starting Run: 3800mnrw with config:
[34m[1mwandb[0m: 	weights: [0.3, 0.3, 0.4]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.83it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.62185
f1_micro,0.66324
f1_weighted,0.66338
log_loss,0.97331


[34m[1mwandb[0m: Agent Starting Run: xunlu5vs with config:
[34m[1mwandb[0m: 	weights: [0.3, 0.4, 0.3]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.77it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.63116
f1_micro,0.67029
f1_weighted,0.67102
log_loss,0.95977


[34m[1mwandb[0m: Agent Starting Run: 7y1gc48f with config:
[34m[1mwandb[0m: 	weights: [0.4, 0.3, 0.3]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.91it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.84it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.62665
f1_micro,0.66618
f1_weighted,0.66651
log_loss,0.9704


[34m[1mwandb[0m: Agent Starting Run: vts40o7b with config:
[34m[1mwandb[0m: 	weights: [0.33, 0.33, 0.34]
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:07<00:00,  6.96it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 70/70 [00:10<00:00,  6.87it/s]


0,1
f1_macro,▁
f1_micro,▁
f1_weighted,▁
log_loss,▁

0,1
f1_macro,0.62987
f1_micro,0.66824
f1_weighted,0.66883
log_loss,0.96746


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
