# Run Model


## Imports


In [1]:
%load_ext autoreload

%autoreload 2

In [2]:
import helpers.set_path  # needs to be there to set the correct project path

import pandas as pd

from src.data.load_data import (
    get_train_loader,
    get_val_loader,
    get_test_loader,
    classes,
)
from src.data.format_submissions import format_submissions

from pathlib import Path
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

import wandb
import torch
import os

In [3]:
from src.models import DensenetModel as Model

sweep_config = {
    "name": f"{Model.__name__}_sweep",
    "method": "grid",
    "metric": {"name": "val_loss", "goal": "minimize"},
    "parameters": {
        "batch_size": {"values": [32]},
        "image_size": {"values": [224, 264]},
        "lr": {"value": 5e-6},
        "dropout": {"values": [0, 0.05, 0.1, 0.15]},
        "crop_threshold": {"values": [0.05, 0.1]},
    },
}

MAX_EPOCHS = 100
DELETE_MODEL_CHECKPOINTS = True

## Run Training


In [4]:
sweep_id = wandb.sweep(sweep=sweep_config, project="ccv1", entity="safari_squad")


def train_sweep():
    seed_everything(42)
    torch.set_float32_matmul_precision("high")

    run = wandb.init(name=Model.__name__)
    config = run.config

    model = Model(
        batch_size=config.batch_size,
        image_size=config.image_size,
        crop_threshold=config.crop_threshold,
        lr=config.lr,
        dropout=config.dropout,
    )

    train_dataloader = get_train_loader(
        config.batch_size, config.image_size, config.crop_threshold
    )
    val_dataloader = get_val_loader(
        config.batch_size, config.image_size, config.crop_threshold
    )
    test_dataloader = get_test_loader(
        config.batch_size, config.image_size, config.crop_threshold
    )

    wandb_logger = WandbLogger(
        project="ccv1", entity="safari_squad", name=Model.__name__
    )
    trainer = Trainer(
        max_epochs=MAX_EPOCHS,
        accelerator="gpu",
        logger=wandb_logger,
        callbacks=[
            EarlyStopping(monitor="val_loss", mode="min", patience=3),
            ModelCheckpoint(
                dirpath=Path("../models/checkpoints"),
                filename=f"{model.__class__.__name__}_{wandb_logger.version}",
                monitor="val_loss",
                mode="min",
                save_top_k=1,
            ),
        ],
    )

    trainer.fit(
        model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader
    )

    best_model = model.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)

    submissions = format_submissions(
        trainer.predict(best_model, dataloaders=test_dataloader), classes
    )

    trainer.validate(best_model, dataloaders=val_dataloader)

    if DELETE_MODEL_CHECKPOINTS:
        os.remove(trainer.checkpoint_callback.best_model_path)

    wandb_logger.log_text("submission", dataframe=submissions)

    wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 3cdjj5si
Sweep URL: https://wandb.ai/safari_squad/ccv1/sweeps/3cdjj5si


In [5]:
# Run the sweep agent
wandb.agent(sweep_id, function=train_sweep)

[34m[1mwandb[0m: Agent Starting Run: v95g7ndv with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	crop_threshold: 0.05
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	image_size: 264
[34m[1mwandb[0m: 	lr: 5e-06
Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmarvinvr[0m ([33msafari_squad[0m). Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type              | Params
--------------------------------------------------
0 | loss        | CrossEntropyLoss  | 0     
1 | f1_weighted | MulticlassF1Score | 0     
2 | f1_micro    | MulticlassF1Score | 0     
3 | f1_macro    | MulticlassF1Score | 0     
4 | model       | DenseNet          | 26.5 M
--------------------------------------------------
26.5 M    Trainable params
0         Non-trainable params
26.5 M    Total params
105.959   Total estimated model params size (MB)


Epoch 7: 100%|██████████| 372/372 [00:57<00:00,  6.52it/s, v_num=7ndv]     

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Epoch 7: 100%|██████████| 372/372 [01:04<00:00,  5.80it/s, v_num=7ndv]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]