In [1]:
DIAGNOSIS = "acl"
PLANE = "sagittal"

In [2]:
import os

KAGGLE =  os.getenv("KAGGLE_URL_BASE") is not None
COLAB = os.getenv("COLAB_GPU") is not None
TPU = os.getenv("XRT_TPU_CONFIG") is not None
LOCAL = not KAGGLE and not COLAB

if not LOCAL:
    !git clone https://github.com/nclibz/MRKnee/

if COLAB:
    os.chdir('/content/MRKnee/')
    !git checkout v3
    from google.colab import drive
    drive.mount('/content/drive')
    DATADIR = "/content/drive/MyDrive/MRKnee/data"
    if TPU:
        !pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl

if KAGGLE:
    os.chdir('/kaggle/working/MRKnee/')
    !git checkout v3
    dataset_name = os.listdir('/kaggle/input')[0]
    
    DATADIR = f"/kaggle/input/{dataset_name}/MRNet"
    
    if TPU:
        !pip install torchtext==0.9
        !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
        !python pytorch-xla-env-setup.py --version 1.8

if not LOCAL:
    !pip install --quiet "pytorch-lightning>=1.4.9" "torchmetrics>=0.5" "timm" "neptune-client" "optuna" "PyMySql"
    !pip install albumentations --upgrade --quiet
    BACKBONE = "tf_efficientnetv2_s_in21k"

if LOCAL:
    DATADIR = "data"
    BACKBONE = 'tf_mobilenetv3_small_minimal_100'
    %load_ext autoreload
    %autoreload 2

In [3]:
from src.study import Study
from src.model import MRKnee
from src.data import MRKneeDataModule
from src.augmentations import Augmentations
from src.callbacks import Callbacks
from src.cfg import Cfg
import pytorch_lightning as pl

pl.seed_everything(123)

Global seed set to 123


123

In [4]:

def objective(trial, diagnosis=DIAGNOSIS, plane=PLANE, backbone=BACKBONE, datadir=DATADIR):

    model = MRKnee(
        backbone=backbone,
        drop_rate=trial.suggest_int("drop_rate", 30, 95, step = 5) / 100,
        learning_rate=trial.suggest_loguniform('lr', 1e-6, 1e-3),
        adam_wd=trial.suggest_loguniform('adam_wd', 0.001, 0.3),
        max_epochs=20,
        precision=32,
        log_auc=True,
        log_ind_loss=False,
    )

    clahe = trial.suggest_categorical("clahe", [True, False])
    clahe_valid = trial.suggest_categorical("clahe_valid", [True, False]) if clahe else False

    augs = Augmentations(
        model,
        max_res_train = 256,
        shift_limit=trial.suggest_int("shift_limit", 0, 15) / 100,
        scale_limit=trial.suggest_int("scale_limit", 0, 15) / 100,
        rotate_limit=trial.suggest_int("rotate_limit", 0, 15) / 100,
        ssr_p=trial.suggest_int("ShiftScaleRotate_p", 20, 80, step = 5) / 100,
        clahe=clahe,
        clahe_valid = clahe,
        reverse_p=0.0,
        indp_normalz=True,
    )

    dm = MRKneeDataModule(
        datadir=datadir,
        diagnosis=diagnosis,
        plane=plane,
        transforms=augs,
        clean=True,
        num_workers=2,
        pin_memory=True,
        trim_train=True,
    )

    configs = Cfg(model = model, dm = dm, augs = augs)
    cfg = configs.get_cfg()
    
    callbacks = Callbacks(cfg, trial, neptune_name="mrkneev3")
    neptune_logger = callbacks.get_neptune_logger()
    list_of_cbs = callbacks.get_callbacks()
    fast_dev_run = False
    
    if LOCAL:
        fast_dev_run = 50
    
    trainer = pl.Trainer(
        gpus=1,
        precision=cfg["precision"],
        max_epochs=cfg["max_epochs"],
        logger=neptune_logger,
        log_every_n_steps=100,
        num_sanity_val_steps=0,
        callbacks=list_of_cbs,
        progress_bar_refresh_rate=20,
        deterministic=False,
        fast_dev_run = False,
    )

    trainer.fit(model, dm)

    ## UPLOAD BEST CHECKPOINTS TO LOG
    # if not LOCAL:
    #     callbacks.upload_best_checkpoints()

    return callbacks.model_checkpoint.best_model_score.item()


In [5]:
if DIAGNOSIS == "meniscus":
    threshold = 1.2
else:
    threshold = 1

study = Study(diagnosis = DIAGNOSIS,
              plane = PLANE,
              backbone=BACKBONE,
              n_warmup_steps=5, 
              threshold=threshold) 

[32m[I 2021-11-21 10:22:45,754][0m Using an existing study with name 'acl_sagittal_tf_mobilenetv3_small_minimal_100' instead of creating a new one.[0m


In [6]:
study.optimize(objective, n_trials=2)



https://ui.neptune.ai/nclibz/mrkneev3/e/MRKNEEV-160
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type        | Params
-----------------------------------------
0 | backbone | MobileNetV3 | 1.0 M 
1 | clf      | Linear      | 1.0 K 
2 | val_auc  | AUROC       | 0     
-----------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.082     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:   0%|          | 0/1241 [00:00<?, ?it/s] 



Epoch 0: 100%|██████████| 1241/1241 [01:00<00:00, 20.55it/s, loss=1.83, v_num=-160, val_auc=0.522, val_loss=0.778]

Epoch 0, global step 1120: val_loss reached 0.77782 (best 0.77782), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial8/epoch=00-val_loss=0.78-val_auc=0.52.ckpt" as top 2


Epoch 1: 100%|██████████| 1241/1241 [00:59<00:00, 20.93it/s, loss=0.851, v_num=-160, val_auc=0.618, val_loss=0.764, train_loss=1.480]

Epoch 1, global step 2241: val_loss reached 0.76379 (best 0.76379), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial8/epoch=01-val_loss=0.76-val_auc=0.62.ckpt" as top 2


Epoch 2: 100%|██████████| 1241/1241 [00:59<00:00, 20.91it/s, loss=0.869, v_num=-160, val_auc=0.645, val_loss=0.752, train_loss=1.140]

Epoch 2, global step 3362: val_loss reached 0.75246 (best 0.75246), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial8/epoch=02-val_loss=0.75-val_auc=0.65.ckpt" as top 2


Epoch 3: 100%|██████████| 1241/1241 [00:58<00:00, 21.06it/s, loss=1.16, v_num=-160, val_auc=0.549, val_loss=0.764, train_loss=0.987]

Epoch 3, global step 4483: val_loss was not in top 2


Epoch 4: 100%|██████████| 1241/1241 [00:57<00:00, 21.65it/s, loss=0.574, v_num=-160, val_auc=0.580, val_loss=0.782, train_loss=0.914]

Epoch 4, global step 5604: val_loss was not in top 2


Epoch 5: 100%|██████████| 1241/1241 [00:57<00:00, 21.60it/s, loss=0.535, v_num=-160, val_auc=0.644, val_loss=0.759, train_loss=0.830]

Epoch 5, global step 6725: val_loss reached 0.75877 (best 0.75246), saving model to "/home/nicolai/OneDrive/Forskning/Projekter/MRKnee/checkpoints/trial8/epoch=05-val_loss=0.76-val_auc=0.64.ckpt" as top 2


Epoch 6:   0%|          | 0/1241 [00:00<?, ?it/s, loss=0.535, v_num=-160, val_auc=0.644, val_loss=0.759, train_loss=0.830]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
[32m[I 2021-11-21 10:29:11,832][0m Trial 8 finished with value: 0.7524612545967102 and parameters: {'drop_rate': 85, 'lr': 6.927471199789772e-05, 'adam_wd': 0.2964080283925666, 'shift_limit': 14, 'scale_limit': 10, 'rotate_limit': 13, 'clahe': False}. Best is trial 8 with value: 0.752461.[0m


Epoch 6:   0%|          | 0/1241 [00:16<?, ?it/s, loss=0.535, v_num=-160, val_auc=0.644, val_loss=0.759, train_loss=0.830]

