In [1]:
DIAGNOSIS = "acl"
PLANE = "sagittal"

In [2]:
import os

KAGGLE =  os.getenv("KAGGLE_URL_BASE") is not None
COLAB = os.getenv("COLAB_GPU") is not None
TPU = os.getenv("XRT_TPU_CONFIG") is not None
LOCAL = not KAGGLE and not COLAB

if not LOCAL:
    !git clone https://github.com/nclibz/MRKnee/

if COLAB:
    os.chdir('/content/MRKnee/')
    !git checkout v3
    from google.colab import drive
    drive.mount('/content/drive')
    DATADIR = "/content/drive/MyDrive/MRKnee/data"
    if TPU:
        !pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl

if KAGGLE:
    os.chdir('/kaggle/working/MRKnee/')
    !git checkout v3
    dataset_name = os.listdir('/kaggle/input')[0]
    
    DATADIR = f"/kaggle/input/{dataset_name}/MRNet"
    
    if TPU:
        !pip install torchtext==0.9
        !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
        !python pytorch-xla-env-setup.py --version 1.8

if not LOCAL:
    !pip install --quiet "pytorch-lightning>=1.4.9" "torchmetrics>=0.5" "timm" "neptune-client" "optuna" "PyMySql"
    !pip install albumentations --upgrade --quiet
    BACKBONE = "tf_efficientnetv2_s_in21k"

if LOCAL:
    DATADIR = "data"
    BACKBONE = 'tf_mobilenetv3_small_minimal_100'
    %load_ext autoreload
    %autoreload 2

In [3]:
from src.study import Study
from src.model import MRKnee
from src.data import MRKneeDataModule
from src.augmentations import Augmentations
from src.callbacks import Callbacks
from src.cfg import Cfg
import pytorch_lightning as pl

pl.seed_everything(123)

Global seed set to 123


123

In [4]:

def objective(trial, diagnosis=DIAGNOSIS, plane=PLANE, backbone=BACKBONE, datadir=DATADIR):

    model = MRKnee(
        backbone=backbone,
        drop_rate=trial.suggest_int("drop_rate", 30, 90, step = 10) / 100,
        learning_rate=trial.suggest_loguniform('lr', 1e-6, 1e-3),
        adam_wd=trial.suggest_loguniform('adam_wd', 0.001, 0.3),
        max_epochs=20,
        precision=32,
        log_auc=True,
        log_ind_loss=False,
    )

    augs = Augmentations(
        model,
        max_res_train = 256,
        shift_limit=trial.suggest_int("shift_limit", 0, 25, step = 5) / 100,
        scale_limit=trial.suggest_int("scale_limit", 0, 25, step = 5) / 100,
        rotate_limit=trial.suggest_int("rotate_limit", 0, 25, step = 5) / 100,
        ssr_p=trial.suggest_int("ShiftScaleRotate_p", 20, 80, step = 10) / 100,
        clahe_p=trial.suggest_int("clahe_p", 20, 80, step = 10) / 100,
        reverse_p=0.0,
        indp_normalz=True,
    )

    dm = MRKneeDataModule(
        datadir=datadir,
        diagnosis=diagnosis,
        plane=plane,
        transforms=augs,
        clean=True,
        num_workers=2,
        pin_memory=True,
        trim_train=True,
    )

    configs = Cfg(model = model, dm = dm, augs = augs)
    cfg = configs.get_cfg()
    
    callbacks = Callbacks(cfg, trial, neptune_name="mrkneev3")
    neptune_logger = callbacks.get_neptune_logger()
    list_of_cbs = callbacks.get_callbacks()
    fast_dev_run = False
    
    if LOCAL:
        fast_dev_run = 50
    
    trainer = pl.Trainer(
        gpus=1,
        precision=cfg["precision"],
        max_epochs=cfg["max_epochs"],
        logger=neptune_logger,
        log_every_n_steps=100,
        num_sanity_val_steps=0,
        callbacks=list_of_cbs,
        progress_bar_refresh_rate=20,
        deterministic=False,
        fast_dev_run = False,
    )

    trainer.fit(model, dm)

    ## UPLOAD BEST CHECKPOINTS TO LOG
    # if not LOCAL:
    #     callbacks.upload_best_checkpoints()

    return callbacks.model_checkpoint.best_model_score.item()


In [5]:
if DIAGNOSIS == "meniscus":
    threshold = 1.2
else:
    threshold = 1

study = Study(diagnosis = DIAGNOSIS,
              plane = PLANE,
              backbone=BACKBONE,
              n_warmup_steps=5, 
              threshold=threshold) 

[32m[I 2021-11-21 10:45:19,646][0m Using an existing study with name 'acl_sagittal_tf_mobilenetv3_small_minimal_100' instead of creating a new one.[0m


In [6]:
study.optimize(objective, n_trials=2)



https://ui.neptune.ai/nclibz/mrkneev3/e/MRKNEEV-161
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type        | Params
-----------------------------------------
0 | backbone | MobileNetV3 | 1.0 M 
1 | clf      | Linear      | 1.0 K 
2 | val_auc  | AUROC       | 0     
-----------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.082     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:   0%|          | 0/1241 [00:00<?, ?it/s] 



Epoch 0: 100%|██████████| 1241/1241 [01:18<00:00, 15.82it/s, loss=1.23, v_num=-161, val_auc=0.500, val_loss=0.771]

Epoch 0, global step 1120: val_loss reached 0.77070 (best 0.77070), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=00-val_loss=0.77-val_auc=0.50.ckpt" as top 2


Epoch 1: 100%|██████████| 1241/1241 [01:16<00:00, 16.29it/s, loss=0.902, v_num=-161, val_auc=0.590, val_loss=0.752, train_loss=1.180]

Epoch 1, global step 2241: val_loss reached 0.75207 (best 0.75207), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=01-val_loss=0.75-val_auc=0.59.ckpt" as top 2


Epoch 2: 100%|██████████| 1241/1241 [01:14<00:00, 16.76it/s, loss=0.674, v_num=-161, val_auc=0.702, val_loss=0.731, train_loss=1.020]

Epoch 2, global step 3362: val_loss reached 0.73060 (best 0.73060), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=02-val_loss=0.73-val_auc=0.70.ckpt" as top 2


Epoch 3: 100%|██████████| 1241/1241 [00:59<00:00, 20.86it/s, loss=0.98, v_num=-161, val_auc=0.738, val_loss=0.727, train_loss=0.889] 

Epoch 3, global step 4483: val_loss reached 0.72662 (best 0.72662), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=03-val_loss=0.73-val_auc=0.74.ckpt" as top 2


Epoch 4: 100%|██████████| 1241/1241 [00:59<00:00, 20.85it/s, loss=0.54, v_num=-161, val_auc=0.794, val_loss=0.703, train_loss=0.801] 

Epoch 4, global step 5604: val_loss reached 0.70256 (best 0.70256), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=04-val_loss=0.70-val_auc=0.79.ckpt" as top 2


Epoch 5: 100%|██████████| 1241/1241 [01:00<00:00, 20.52it/s, loss=0.557, v_num=-161, val_auc=0.797, val_loss=0.697, train_loss=0.703]

Epoch 5, global step 6725: val_loss reached 0.69683 (best 0.69683), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=05-val_loss=0.70-val_auc=0.80.ckpt" as top 2


Epoch 6: 100%|██████████| 1241/1241 [01:01<00:00, 20.02it/s, loss=0.393, v_num=-161, val_auc=0.815, val_loss=0.695, train_loss=0.623]

Epoch 6, global step 7846: val_loss reached 0.69452 (best 0.69452), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=06-val_loss=0.69-val_auc=0.81.ckpt" as top 2


Epoch 7: 100%|██████████| 1241/1241 [01:00<00:00, 20.59it/s, loss=0.425, v_num=-161, val_auc=0.831, val_loss=0.674, train_loss=0.540]

Epoch 7, global step 8967: val_loss reached 0.67398 (best 0.67398), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=07-val_loss=0.67-val_auc=0.83.ckpt" as top 2


Epoch 8: 100%|██████████| 1241/1241 [00:59<00:00, 20.84it/s, loss=0.287, v_num=-161, val_auc=0.839, val_loss=0.666, train_loss=0.486]

Epoch 8, global step 10088: val_loss reached 0.66583 (best 0.66583), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=08-val_loss=0.67-val_auc=0.84.ckpt" as top 2


Epoch 9: 100%|██████████| 1241/1241 [00:59<00:00, 20.99it/s, loss=0.511, v_num=-161, val_auc=0.847, val_loss=0.627, train_loss=0.408]

Epoch 9, global step 11209: val_loss reached 0.62747 (best 0.62747), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=09-val_loss=0.63-val_auc=0.85.ckpt" as top 2


Epoch 10: 100%|██████████| 1241/1241 [00:59<00:00, 20.89it/s, loss=0.424, v_num=-161, val_auc=0.854, val_loss=0.609, train_loss=0.347]

Epoch 10, global step 12330: val_loss reached 0.60927 (best 0.60927), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial10/epoch=10-val_loss=0.61-val_auc=0.85.ckpt" as top 2


Epoch 11: 100%|██████████| 1241/1241 [00:59<00:00, 20.82it/s, loss=0.162, v_num=-161, val_auc=0.859, val_loss=0.669, train_loss=0.317]

Epoch 11, global step 13451: val_loss was not in top 2


Epoch 12: 100%|██████████| 1241/1241 [00:59<00:00, 20.81it/s, loss=0.156, v_num=-161, val_auc=0.816, val_loss=0.659, train_loss=0.263]

Epoch 12, global step 14572: val_loss was not in top 2


Epoch 13: 100%|██████████| 1241/1241 [00:59<00:00, 20.86it/s, loss=0.118, v_num=-161, val_auc=0.792, val_loss=0.780, train_loss=0.220]

Epoch 13, global step 15693: val_loss was not in top 2


Epoch 14:  27%|██▋       | 340/1241 [00:16<00:44, 20.04it/s, loss=0.0722, v_num=-161, val_auc=0.792, val_loss=0.780, train_loss=0.172]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
