In [2]:
"""
Optuna example that optimizes multi-layer perceptrons using PyTorch Lightning.
In this example, we optimize the validation accuracy of fashion product recognition using
PyTorch Lightning, and FashionMNIST. We optimize the neural network architecture. As it is too time
consuming to use the whole FashionMNIST dataset, we here use a small subset of it.
You can run this example as follows, pruning can be turned on and off with the `--pruning`
argument.
    $ python pytorch_lightning_simple.py [--pruning]
"""
import argparse
import os
from typing import List
from typing import Optional

import optuna
from optuna.integration import PyTorchLightningPruningCallback
from packaging import version
import pytorch_lightning as pl
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision import datasets
from torchvision import transforms



In [10]:
if version.parse(pl.__version__) < version.parse("1.0.2"):
    raise RuntimeError("PyTorch Lightning>=1.0.2 is required for this example.")

PERCENT_VALID_EXAMPLES = 0.1
BATCHSIZE = 128
CLASSES = 10
EPOCHS = 10
DIR = os.getcwd()

In [11]:

class Net(nn.Module):
    def __init__(self, dropout: float, output_dims: List[int]):
        super().__init__()
        layers: List[nn.Module] = []

        input_dim: int = 28 * 28
        for output_dim in output_dims:
            layers.append(nn.Linear(input_dim, output_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            input_dim = output_dim

        layers.append(nn.Linear(input_dim, CLASSES))

        self.layers: nn.Module = nn.Sequential(*layers)

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        logits = self.layers(data)
        return F.log_softmax(logits, dim=1)


class LightningNet(pl.LightningModule):
    def __init__(self, dropout: float, output_dims: List[int]):
        super().__init__()
        self.model = Net(dropout, output_dims)

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        return self.model(data.view(-1, 28 * 28))

    def training_step(self, batch, batch_idx: int) -> torch.Tensor:
        data, target = batch
        output = self(data)
        return F.nll_loss(output, target)

    def validation_step(self, batch, batch_idx: int) -> None:
        data, target = batch
        output = self(data)
        pred = output.argmax(dim=1, keepdim=True)
        accuracy = pred.eq(target.view_as(pred)).float().mean()
        self.log("val_acc", accuracy)
        self.log("hp_metric", accuracy, on_step=False, on_epoch=True)

    def configure_optimizers(self) -> optim.Optimizer:
        return optim.Adam(self.model.parameters())


class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str, batch_size: int):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size

    def setup(self, stage: Optional[str] = None) -> None:
        self.mnist_test = datasets.FashionMNIST(
            self.data_dir, train=False, download=True, transform=transforms.ToTensor()
        )
        mnist_full = datasets.FashionMNIST(
            self.data_dir, train=True, download=True, transform=transforms.ToTensor()
        )
        self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

    def train_dataloader(self) -> DataLoader:
        return DataLoader(
            self.mnist_train, batch_size=self.batch_size, shuffle=True, pin_memory=True
        )

    def val_dataloader(self) -> DataLoader:
        return DataLoader(
            self.mnist_val, batch_size=self.batch_size, shuffle=False, pin_memory=True
        )

    def test_dataloader(self) -> DataLoader:
        return DataLoader(
            self.mnist_test, batch_size=self.batch_size, shuffle=False, pin_memory=True
        )


In [12]:




def objective(trial: optuna.trial.Trial) -> float:

    # We optimize the number of layers, hidden units in each layer and dropouts.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    dropout = trial.suggest_float("dropout", 0.2, 0.5)
    output_dims = [
        trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True) for i in range(n_layers)
    ]

    model = LightningNet(dropout, output_dims)
    datamodule = FashionMNISTDataModule(data_dir=DIR, batch_size=BATCHSIZE)

    trainer = pl.Trainer(
        logger=True,
        limit_val_batches=PERCENT_VALID_EXAMPLES,
        enable_checkpointing=False,
        max_epochs=EPOCHS,
        gpus=1 if torch.cuda.is_available() else None,
        callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_acc")],
    )
    hyperparameters = dict(n_layers=n_layers, dropout=dropout, output_dims=output_dims)
    trainer.logger.log_hyperparams(hyperparameters)
    trainer.fit(model, datamodule=datamodule)

    return trainer.callback_metrics["val_acc"].item()



In [5]:

parser = argparse.ArgumentParser(description="PyTorch Lightning example.")
parser.add_argument(
    "--pruning",
    "-p",
    action="store_true",
    help="Activate the pruning feature. `MedianPruner` stops unpromising "
    "trials at the early stages of training.",
)
args = parser.parse_args()

pruner: optuna.pruners.BasePruner = (
    optuna.pruners.MedianPruner() if args.pruning else optuna.pruners.NopPruner()
)


usage: ipykernel_launcher.py [-h] [--pruning]
ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"d4faee66-c9c8-4f69-9bc1-13cd00067a62" --shell=9002 --transport="tcp" --iopub=9004 --f=/home/vagrant/.local/share/jupyter/runtime/kernel-v2-362ecxhNiC0lK8i.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [5]:

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-09-18 16:34:34,178][0m A new study created in memory with name: no-name-3caebf71-5683-4878-ac44-94b788234e9b[0m
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/train-images-idx3-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /mnt/c/Users/rwmas/GitHub/xai/xai_api/FashionMNIST/raw




  | Name  | Type | Params
-------------------------------
0 | model | Net  | 22.5 K
-------------------------------
22.5 K    Trainable params
0         Non-trainable params
22.5 K    Total params
0.090     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
[32m[I 2022-09-18 16:35:18,021][0m Trial 0 finished with value: 0.837890625 and parameters: {'n_layers': 2, 'dropout': 0.2509245006357079, 'n_units_l0': 27, 'n_units_l1': 33}. Best is trial 0 with value: 0.837890625.[0m
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type | Params
-------------------------------
0 | model | Net  | 12.4 K
-------------------------------
12.4 K    Trainable params
0         Non-trainable params
12.4 K    Total params
0.050     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [7]:
import sys; sys.path.insert(0, '..') # add parent folder path where lib folder is
from utils import dasker


In [13]:
from dask.distributed import Client
import joblib
import optuna
import dask_optuna



# with Client() as client:
client = dasker.get_dask_client()
print(f"Dask dashboard is available at {client.dashboard_link}")

storage = dask_optuna.DaskStorage()
study = optuna.create_study(storage=storage, direction="minimize")

with joblib.parallel_backend("dask"):
    study.optimize(objective, n_trials=10, n_jobs=-1)



# study = optuna.create_study(direction="maximize")
# study.optimize(objective, n_trials=10, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-09-18 16:40:31,965][0m A new study created in memory with name: no-name-09928987-158e-4dd0-8332-f2e53ab40872[0m


Dask dashboard is available at http://127.0.0.1:8787/status


[33m[W 2022-09-18 16:40:32,184][0m Trial 2 failed because of the following error: AssertionError('daemonic processes are not allowed to have children')[0m
Traceback (most recent call last):
  File "/home/vagrant/miniconda3/envs/test/lib/python3.8/site-packages/optuna/_optimize.py", line 189, in _run_trial
    value = func(trial)
  File "/tmp/ipykernel_7848/3099996763.py", line 13, in objective
  File "/home/vagrant/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/utilities/argparse.py", line 345, in insert_env_defaults
    return fn(self, **kwargs)
  File "/home/vagrant/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 534, in __init__
    self._setup_on_init()
  File "/home/vagrant/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 619, in _setup_on_init
    self._log_device_info()
  File "/home/vagrant/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/tra

AssertionError: daemonic processes are not allowed to have children