In [1]:
!nvidia-smi

Fri Jan 22 13:31:16 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# SETUP

In [2]:
#!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
#!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

In [3]:
!git clone https://github.com/nclibz/MRKnee/

Cloning into 'MRKnee'...
remote: Enumerating objects: 44, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 372 (delta 25), reused 27 (delta 11), pack-reused 328[K
Receiving objects: 100% (372/372), 126.40 MiB | 24.61 MiB/s, done.
Resolving deltas: 100% (241/241), done.
Checking out files: 100% (12/12), done.


In [8]:
!pip install pytorch-lightning
!pip install timm
!pip install neptune-client
!pip install albumentations -U
!pip install neptune-contrib
!pip install optuna



Requirement already up-to-date: albumentations in /usr/local/lib/python3.6/dist-packages (0.5.2)
Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/59/b4/a1a80252cef3d8f5a0acdf6e678d6dc07e2e6964ee46d0453a2ae1af1ecb/optuna-2.4.0-py3-none-any.whl (282kB)
[K     |████████████████████████████████| 286kB 11.5MB/s 
Collecting cmaes>=0.6.0
  Downloading https://files.pythonhosted.org/packages/8d/3c/06c76ec8b54b9b1fad7f35e903fd25010fe3e0d41bd94cea5e6f12e0d651/cmaes-0.7.0-py3-none-any.whl
Collecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/ea/c3/b9147464dba5e5f1279fb46ddda3bef28c8d3594599e07cf0ae67d8c7c01/alembic-1.5.2-py2.py3-none-any.whl (155kB)
[K     |████████████████████████████████| 163kB 21.1MB/s 
[?25hCollecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/0f/8f/3c74fa4b6c3db1051b495385f5302fc5d5aa0f180d40ce3e9a13c82f8c82/cliff-3.6.0-py3-none-any.whl (79kB)
[K     |████████████████████████████████| 81kB 9.2MB

# PATHS

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# setup wd and datadir
import os
os.chdir('/content/MRKnee/')
os.getcwd()



'/content/MRKnee'

## GIT

In [7]:
! git pull https://github.com/nclibz/MRKnee/
!git checkout optuna

From https://github.com/nclibz/MRKnee
 * branch            HEAD       -> FETCH_HEAD
Already up to date.
Branch 'optuna' set up to track remote branch 'optuna' from 'origin'.
Switched to a new branch 'optuna'


In [9]:
!git pull origin optuna

From https://github.com/nclibz/MRKnee
 * branch            optuna     -> FETCH_HEAD
Already up to date.


# MODEL

In [11]:
# %%
from optuna.integration import PyTorchLightningPruningCallback
import optuna
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from model import MRKnee
from data import MRKneeDataModule
import albumentations as A
from pytorch_lightning import Callback
from utils import print_top_losses


pl.seed_everything(123)

%load_ext autoreload
%autoreload 0




Global seed set to 123


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:

class MetricsCallback(Callback):
    """PyTorch Lightning metric callback."""

    def __init__(self):
        super().__init__()
        self.metrics = []

    def on_validation_end(self, trainer, pl_module):
        self.metrics.append(trainer.callback_metrics)

In [19]:

def objective(trial):
    BACKBONE = 'efficientnet_b1'
    IMG_SZ = 240  # b0 = 224, b1 = 240,


    cfg = {
        # DATA
        'datadir': '/content/drive/MyDrive/MRKnee/data',
        'diagnosis': 'meniscus',
        'planes': ['axial'],  # , 'sagittal', 'coronal', 'axial',
        'n_chans': 1,
        'num_workers': 4,
        'pin_memory': True,
        'upsample': False,
        'w_loss': True,
        'indp_normalz': False,
        'transf': {
            'train': [A.ShiftScaleRotate(shift_limit=(-0.05, 0.05),
                                         scale_limit=(-0.05,0.05),
                                         rotate_limit=(25, 25),
                                         p = 1.),
                      A.HorizontalFlip(p=0.5),
                      A.RandomCrop(IMG_SZ, IMG_SZ)],
            'valid': [A.CenterCrop(IMG_SZ, IMG_SZ)]
        },
        # MODEL
        'backbone': BACKBONE,
        'pretrained': True,
        'learning_rate': trial.suggest_loguniform('lr', 1e-6, 1e-2),
        'drop_rate': trial.suggest_float('dropout', 0., 0.8),
        'freeze_from': -1,
        'unfreeze_epoch': 0,
        'log_auc': True,
        'log_ind_loss': False,
        'final_pool': 'max',
        # Trainer
        #'precision': 16,
        'max_epochs': 8,
    }

    # LOGGER
    neptune_logger = pl_loggers.NeptuneLogger(
        api_key="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiNDI5ODUwMzQtOTM0Mi00YTY2LWExYWQtMDNlZDZhY2NlYjUzIn0=",
        params=cfg,
        project_name='nclibz/optuna-test',
        tags=[cfg['diagnosis']] + cfg['planes']
    )

    # Callbacks
    model_checkpoint = ModelCheckpoint(dirpath=f'checkpoints/trial{trial.number}/',
                                       filename='{epoch:02d}-{val_loss:.2f}-{val_auc:.2f}',
                                       verbose=True,
                                       save_top_k=2,
                                       monitor='val_loss',
                                       mode='min',
                                       period=1)

    lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval="epoch")

    metrics_callback = MetricsCallback()

    prune_callback = PyTorchLightningPruningCallback(trial, monitor="val_loss")

    # DM AND MODEL
    dm = MRKneeDataModule(**cfg)
    model = MRKnee(**cfg)
    trainer = pl.Trainer(gpus=1,
                         precision=cfg['precision'],
                         max_epochs=cfg['max_epochs'],
                         logger=neptune_logger,
                         log_every_n_steps=100,
                         num_sanity_val_steps=0,
                         callbacks=[lr_monitor,
                                    model_checkpoint,
                                    metrics_callback,
                                    prune_callback],
                         progress_bar_refresh_rate=20,
                         deterministic=True)

    trainer.fit(model, dm)

    return metrics_callback.metrics[-1]["val_loss"].item()

In [24]:
pruner = optuna.pruners.HyperbandPruner()
sampler = optuna.samplers.TPESampler()
# skal vel også bruge en TPE sampler?
study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)

study.optimize(objective, n_trials=6, timeout=None)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2021-01-22 14:08:20,669][0m A new study created in memory with name: no-name-3adf738c-bd3a-48c1-aaea-d35a8181b994[0m


https://ui.neptune.ai/nclibz/optuna-test/e/OP-33


NeptuneLogger will work in online mode

Checkpoint directory checkpoints/trial0/ exists and is not empty.

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name      | Type       | Params
-----------------------------------------
0 | backbones | ModuleList | 4.0 M 
1 | clf       | Linear     | 1.3 K 
-----------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0, global step 9: val_loss reached 0.85982 (best 0.85982), saving model to "/content/MRKnee/checkpoints/trial0/epoch=00-val_loss=0.86-val_auc=0.47.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors se

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1, global step 19: val_loss reached 0.86824 (best 0.85982), saving model to "/content/MRKnee/checkpoints/trial0/epoch=01-val_loss=0.87-val_auc=0.44.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors s

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2, step 29: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-33. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3, step 39: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-33. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4, step 49: val_loss was not in top 2





Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-33. Cause: Error(code=400, message='X-coordinates must be strictly increasing for channel: 91828d45-2617-466c-b5bd-f09a6f432c5f.

https://ui.neptune.ai/nclibz/optuna-test/e/OP-34


NeptuneLogger will work in online mode

Checkpoint directory checkpoints/trial1/ exists and is not empty.

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name      | Type       | Params
-----------------------------------------
0 | backbones | ModuleList | 4.0 M 
1 | clf       | Linear     | 1.3 K 
-----------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0, global step 9: val_loss reached 0.79119 (best 0.79119), saving model to "/content/MRKnee/checkpoints/trial1/epoch=00-val_loss=0.79-val_auc=0.49.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors se

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1, global step 19: val_loss reached 0.78533 (best 0.78533), saving model to "/content/MRKnee/checkpoints/trial1/epoch=01-val_loss=0.79-val_auc=0.52.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors s

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2, step 29: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-34. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3, step 39: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-34. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4, step 49: val_loss was not in top 2





Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-34. Cause: Error(code=400, message='X-coordinates must be strictly increasing for channel: 43a1fc4a-4990-40e6-85dc-604595c9aa97.

https://ui.neptune.ai/nclibz/optuna-test/e/OP-35


NeptuneLogger will work in online mode

Checkpoint directory checkpoints/trial2/ exists and is not empty.

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name      | Type       | Params
-----------------------------------------
0 | backbones | ModuleList | 4.0 M 
1 | clf       | Linear     | 1.3 K 
-----------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0, global step 9: val_loss reached 7.85187 (best 7.85187), saving model to "/content/MRKnee/checkpoints/trial2/epoch=00-val_loss=7.85-val_auc=0.48.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors se

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1, global step 19: val_loss reached 11.22223 (best 7.85187), saving model to "/content/MRKnee/checkpoints/trial2/epoch=01-val_loss=11.22-val_auc=0.39.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2, step 29: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-35. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3, global step 39: val_loss reached 0.85931 (best 0.85931), saving model to "/content/MRKnee/checkpoints/trial2/epoch=03-val_loss=0.86-val_auc=0.55.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors s

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4, global step 49: val_loss reached 0.78329 (best 0.78329), saving model to "/content/MRKnee/checkpoints/trial2/epoch=04-val_loss=0.78-val_auc=0.58.ckpt" as top 2





Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-35. Cause: Error(code=400, message='X-coordinates must be strictly increasing for channel: e7085701-4f69-4533-a9fb-4108d8eb3b55.

https://ui.neptune.ai/nclibz/optuna-test/e/OP-36


NeptuneLogger will work in online mode

Checkpoint directory checkpoints/trial3/ exists and is not empty.

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name      | Type       | Params
-----------------------------------------
0 | backbones | ModuleList | 4.0 M 
1 | clf       | Linear     | 1.3 K 
-----------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0, global step 9: val_loss reached 46.31615 (best 46.31615), saving model to "/content/MRKnee/checkpoints/trial3/epoch=00-val_loss=46.32-val_auc=0.50.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1, global step 19: val_loss reached 39.03572 (best 39.03572), saving model to "/content/MRKnee/checkpoints/trial3/epoch=01-val_loss=39.04-val_auc=0.51.ckpt" as top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch error

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2, step 29: val_loss was not in top 2
Failed to send channel value.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/channels/channels_values_sender.py", line 156, in _send_values
    self._experiment._send_channels_values(channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/experiments.py", line 1167, in _send_channels_values
    self._backend.send_channels_values(self, channels_with_values)
  File "/usr/local/lib/python3.6/dist-packages/neptune/utils.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/neptune/internal/backends/hosted_neptune_backend.py", line 574, in send_channels_values
    raise ChannelsValuesSendBatchError(experiment.id, batch_errors)
neptune.api_exceptions.ChannelsValuesSendBatchError: Received batch errors sending channels' values to experiment OP-36. Cause: Error(code=400, message='X-coordinates must be strictly increasing for ch

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3, global step 39: val_loss reached 15.34609 (best 15.34609), saving model to "/content/MRKnee/checkpoints/trial3/epoch=03-val_loss=15.35-val_auc=0.50.ckpt" as top 2
Epoch 3, global step 38: val_loss reached 15.34609 (best 15.34609), saving model to "/content/MRKnee/checkpoints/trial3/epoch=03-val_loss=15.35-val_auc=0.50-v0.ckpt" as top 2





[32m[I 2021-01-22 14:20:00,077][0m Trial 3 pruned. Trial was pruned at epoch 3.[0m


Number of finished trials: 4
Best trial:
  Value: 0.7832931876182556
  Params: 
    lr: 0.004716782826929406
    dropout: 0.5643196194168602


In [None]:
## UPLOAD BEST CHECKPOINTS TO LOG
neptune_logger.experiment.set_property('best_val_loss', model_checkpoint.best_model_score.tolist())
for k in model_checkpoint.best_k_models.keys():
    model_name = 'checkpoints/' + k.split('/')[-1]
    neptune_logger.experiment.log_artifact(k, model_name)




ChunkedEncodingError: ignored

In [None]:
### UPLOAD  SAMPLE LOSSES  - tager dog den sidste og ik ved bedste val. Skal implementere inde i modellen
from neptunecontrib.api import log_pickle
log_pickle('v_sample_loss.pkl', model.v_sample_loss, neptune_logger)
log_pickle('t_sample_loss.pkl', model.t_sample_loss, neptune_logger)

In [None]:
# save top losses to pkl files 
# uploade til neptune??
import pickle 

pickle.dump( model.t_sample_loss, open( "t_sample_loss.p", "wb" ) )
pickle.dump( model.v_sample_loss, open( "v_sample_loss.p", "wb" ) )
neptune_logger.log_artifact("t_sample_loss.p")
neptune_logger.log_artifact("v_sample_loss.p")


print_top_losses(model.t_sample_loss,5)

Sample : Loss
('0943',)  :  tensor(0.9017, device='cuda:0')
('0408',)  :  tensor(0.7901, device='cuda:0')
('1009',)  :  tensor(0.6418, device='cuda:0')
('0087',)  :  tensor(0.4506, device='cuda:0')
('0768',)  :  tensor(0.2992, device='cuda:0')


In [None]:


neptune_logger.log_artifact(export_pickle(model.t_sample_loss), "t_sample_loss.pkl")
  



In [None]:
model.trainer.callbacks.model_ch

[<pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor at 0x7f1075b71828>,
 <pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint at 0x7f10713afd30>,
 <pytorch_lightning.callbacks.progress.ProgressBar at 0x7f10704dfa58>]

In [23]:
study.trials

[FrozenTrial(number=0, values=[0.8238645195960999], datetime_start=datetime.datetime(2021, 1, 22, 13, 45, 18, 753354), datetime_complete=datetime.datetime(2021, 1, 22, 13, 48, 27, 888335), params={'lr': 1.24951107527096e-05, 'dropout': 0.135944145465938}, distributions={'lr': LogUniformDistribution(high=0.01, low=1e-06), 'dropout': UniformDistribution(high=0.8, low=0.0)}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.8168485164642334, 1: 0.7946151494979858, 2: 0.8118051886558533, 3: 0.8125366568565369, 4: 0.8238645195960999}, trial_id=0, state=TrialState.COMPLETE, value=None),
 FrozenTrial(number=1, values=[0.777872622013092], datetime_start=datetime.datetime(2021, 1, 22, 13, 48, 27, 889905), datetime_complete=datetime.datetime(2021, 1, 22, 13, 51, 26, 671737), params={'lr': 8.26930067688086e-06, 'dropout': 0.7773077707486252}, distributions={'lr': LogUniformDistribution(high=0.01, low=1e-06), 'dropout': UniformDistribution(high=0.8, low=0.0)}, user_attrs={}, system_attrs={

In [None]:
model.best_val_loss

tensor(0.3541, device='cuda:0')