In [1]:
!nvidia-smi

Thu Jan 21 08:07:44 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# SETUP

In [2]:
#!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
#!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

In [3]:
!git clone https://github.com/nclibz/MRKnee/

Cloning into 'MRKnee'...
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 336 (delta 1), reused 1 (delta 0), pack-reused 328[K
Receiving objects: 100% (336/336), 126.39 MiB | 38.88 MiB/s, done.
Resolving deltas: 100% (217/217), done.


In [4]:
!pip install pytorch-lightning
!pip install timm
!pip install neptune-client
!pip install albumentations -U
!pip install neptune-contrib



Collecting pytorch-lightning
[?25l  Downloading https://files.pythonhosted.org/packages/12/98/86a89dcd54f84582bbf24cb29cd104b966fcf934d92d5dfc626f225015d2/pytorch_lightning-1.1.4-py3-none-any.whl (684kB)
[K     |████████████████████████████████| 686kB 9.3MB/s 
[?25hCollecting future>=0.17.1
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 17.8MB/s 
Collecting PyYAML>=5.1
[?25l  Downloading https://files.pythonhosted.org/packages/7a/5b/bc0b5ab38247bba158504a410112b6c03f153c652734ece1849749e5f518/PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (640kB)
[K     |████████████████████████████████| 645kB 37.7MB/s 
[?25hCollecting fsspec[http]>=0.8.1
[?25l  Downloading https://files.pythonhosted.org/packages/ec/80/72ac0982cc833945fada4b76c52f0f65435ba4d53bc9317d1c70b5f7e7d5/fsspec-0.8.5-py3-none-any.whl (98kB)
[K     |█████████████████

# PATHS

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# setup wd and datadir
import os
os.chdir('/content/MRKnee/')
os.getcwd()



'/content/MRKnee'

## GIT

In [7]:
! git pull https://github.com/nclibz/MRKnee/
!git checkout v2

From https://github.com/nclibz/MRKnee
 * branch            HEAD       -> FETCH_HEAD
Already up to date.
Branch 'v2' set up to track remote branch 'v2' from 'origin'.
Switched to a new branch 'v2'


In [8]:
!git pull origin v2

From https://github.com/nclibz/MRKnee
 * branch            v2         -> FETCH_HEAD
Already up to date.


# MODEL

In [9]:
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from model import MRKnee
from data import MRKneeDataModule
import albumentations as A
from utils import print_top_losses


pl.seed_everything(123)

%load_ext autoreload
%autoreload 0




Global seed set to 123


In [None]:

class MetricsCallback(Callback):
    """PyTorch Lightning metric callback."""

    def __init__(self):
        super().__init__()
        self.metrics = []

    def on_validation_end(self, trainer, pl_module):
        self.metrics.append(trainer.callback_metrics)

In [None]:

def objective(trial):

    IMG_SZ = 224  # b0 = 224, b1 = 240,

    cfg = {
        # DATA
        'datadir': 'data',
        'diagnosis': 'meniscus',
        'planes': ['axial'],  # , 'sagittal', 'coronal', 'axial',
        'n_chans': 1,
        'num_workers': 4,
        'pin_memory': True,
        'upsample': False,
        'w_loss': True,
        'indp_normalz': False,
        'transf': {
            'train': [A.Rotate(limit=25, p=1),
                      A.HorizontalFlip(p=0.5),
                      A.RandomCrop(IMG_SZ, IMG_SZ)],
            'valid': [A.CenterCrop(IMG_SZ, IMG_SZ)]
        },
        # MODEL
        'backbone': 'efficientnet_b0',
        'pretrained': True,
        'learning_rate': trial.suggest_loguniform('lr', 1e-6, 1e-2),
        'drop_rate': trial.suggest_float('dropout', 0., 0.8),
        'freeze_from': -1,
        'unfreeze_epoch': 0,
        'log_auc': True,
        'log_ind_loss': True,
        'final_pool': 'max',
        # Trainer
        'precision': 16,
        'max_epochs': 5,
    }

    # LOGGER
    neptune_logger = pl_loggers.NeptuneLogger(
        api_key="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiNDI5ODUwMzQtOTM0Mi00YTY2LWExYWQtMDNlZDZhY2NlYjUzIn0=",
        params=cfg,
        project_name='nclibz/optuna-test',
        tags=[cfg['diagnosis']] + cfg['planes']
    )

    # Callbacks
    model_checkpoint = ModelCheckpoint(dirpath=f'checkpoints/trial{trial.number}/',
                                       filename='{epoch:02d}-{val_loss:.2f}-{val_auc:.2f}',
                                       verbose=True,
                                       save_top_k=2,
                                       monitor='val_loss',
                                       mode='min',
                                       period=1)

    lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval="epoch")

    metrics_callback = MetricsCallback()

    prune_callback = PyTorchLightningPruningCallback(trial, monitor="val_loss")

    # DM AND MODEL
    dm = MRKneeDataModule(**cfg)
    model = MRKnee(**cfg)
    trainer = pl.Trainer(gpus=1,
                         precision=cfg['precision'],
                         max_epochs=cfg['max_epochs'],
                         logger=neptune_logger,
                         log_every_n_steps=100,
                         num_sanity_val_steps=0,
                         callbacks=[lr_monitor,
                                    model_checkpoint,
                                    metrics_callback,
                                    prune_callback],
                         progress_bar_refresh_rate=20,
                         limit_train_batches=0.10,  # HUSK AT SLETTE
                         deterministic=True)

    trainer.fit(model, dm)

    return metrics_callback.metrics[-1]["val_loss"].item()

https://ui.neptune.ai/nclibz/mrknee/e/MRKNEE-38


NeptuneLogger will work in online mode


In [None]:


pruner = optuna.pruners.MedianPruner()
# skal vel også bruge en TPE sampler?
study = optuna.create_study(direction="minimize", pruner=pruner)

study.optimize(objective, n_trials=10, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
## UPLOAD BEST CHECKPOINTS TO LOG
neptune_logger.experiment.set_property('best_val_loss', model_checkpoint.best_model_score.tolist())
for k in model_checkpoint.best_k_models.keys():
    model_name = 'checkpoints/' + k.split('/')[-1]
    neptune_logger.experiment.log_artifact(k, model_name)




ChunkedEncodingError: ignored

In [None]:
### UPLOAD  SAMPLE LOSSES  - tager dog den sidste og ik ved bedste val. Skal implementere inde i modellen
from neptunecontrib.api import log_pickle
log_pickle('v_sample_loss.pkl', model.v_sample_loss, neptune_logger)
log_pickle('t_sample_loss.pkl', model.t_sample_loss, neptune_logger)

In [None]:
# save top losses to pkl files 
# uploade til neptune??
import pickle 

pickle.dump( model.t_sample_loss, open( "t_sample_loss.p", "wb" ) )
pickle.dump( model.v_sample_loss, open( "v_sample_loss.p", "wb" ) )
neptune_logger.log_artifact("t_sample_loss.p")
neptune_logger.log_artifact("v_sample_loss.p")


print_top_losses(model.t_sample_loss,5)

Sample : Loss
('0943',)  :  tensor(0.9017, device='cuda:0')
('0408',)  :  tensor(0.7901, device='cuda:0')
('1009',)  :  tensor(0.6418, device='cuda:0')
('0087',)  :  tensor(0.4506, device='cuda:0')
('0768',)  :  tensor(0.2992, device='cuda:0')


In [None]:


neptune_logger.log_artifact(export_pickle(model.t_sample_loss), "t_sample_loss.pkl")
  



In [None]:
model.trainer.callbacks.model_ch

[<pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor at 0x7f1075b71828>,
 <pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint at 0x7f10713afd30>,
 <pytorch_lightning.callbacks.progress.ProgressBar at 0x7f10704dfa58>]

In [None]:
model.trainer.logger

TypeError: ignored

In [None]:
model.best_val_loss

tensor(0.3541, device='cuda:0')