In [1]:
!pip install "ray[tune]" torch torchvision pytorch-lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray[tune]
  Downloading ray-2.3.0-cp38-cp38-manylinux2014_x86_64.whl (58.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.6/58.6 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning
  Downloading pytorch_lightning-1.9.4-py3-none-any.whl (827 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m827.8/827.8 KB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
Collecting virtualenv>=20.0.24
  Downloading virtualenv-20.20.0-py3-none-any.whl (8.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m75.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorboardX>=1.9
  Downloading tensorboardX-2.6-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 KB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.6.0.post0
  Down

In [2]:
import math

import torch
import pytorch_lightning as pl
from filelock import FileLock
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torchvision.datasets import MNIST
from torchvision import transforms
import os

In [7]:
class LightningMNISTClassifier(pl.LightningModule):

    def __init__(self, config, data_dir=None):
        super(LightningMNISTClassifier, self).__init__()

        self.data_dir = data_dir or os.getcwd()

        self.layer_1_size = config["layer_1_size"]
        self.layer_2_size = config["layer_2_size"]
        self.lr = config["lr"]
        self.batch_size = config["batch_size"]

        # mnist images are (1, 28, 28) (channels, width, height)
        self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_size)
        self.layer_2 = torch.nn.Linear(self.layer_1_size, self.layer_2_size)
        self.layer_3 = torch.nn.Linear(self.layer_2_size, 10)

    def forward(self, x):
        batch_size, channels, width, height = x.size()
        x = x.view(batch_size, -1)

        x = self.layer_1(x)
        x = torch.relu(x)

        x = self.layer_2(x)
        x = torch.relu(x)

        x = self.layer_3(x)
        x = torch.log_softmax(x, dim=1)

        return x

    def cross_entropy_loss(self, logits, labels):
        return F.nll_loss(logits, labels)

    def accuracy(self, logits, labels):
        _, predicted = torch.max(logits.data, 1)
        correct = (predicted == labels).sum().item()
        accuracy = correct / len(labels)
        return torch.tensor(accuracy)

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        accuracy = self.accuracy(logits, y)

        self.log("ptl/train_loss", loss)
        self.log("ptl/train_accuracy", accuracy)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        logits = self.forward(x)
        loss = self.cross_entropy_loss(logits, y)
        accuracy = self.accuracy(logits, y)
        return {"val_loss": loss, "val_accuracy": accuracy}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean()
        self.log("ptl/val_loss", avg_loss)
        self.log("ptl/val_accuracy", avg_acc)

    @staticmethod
    def download_data(data_dir):
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        with FileLock(os.path.expanduser("~/.data.lock")):
            return MNIST(data_dir, train=True, download=True, transform=transform)

    def prepare_data(self):
        mnist_train = self.download_data(self.data_dir)

        self.mnist_train, self.mnist_val = random_split(
            mnist_train, [55000, 5000])

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=int(self.batch_size))

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=int(self.batch_size))

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer


def train_mnist(config):
    model = LightningMNISTClassifier(config)
    trainer = pl.Trainer(max_epochs=10, enable_progress_bar=True)


    trainer.fit(model)

In [8]:
def train_mnist_no_tune():
    config = {
        "layer_1_size": 128,
        "layer_2_size": 256,
        "lr": 1e-3,
        "batch_size": 64
    }
    train_mnist(config)

In [9]:
train_mnist_no_tune()

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type   | Params
-----------------------------------
0 | layer_1 | Linear | 100 K 
1 | layer_2 | Linear | 33.0 K
2 | layer_3 | Linear | 2.6 K 
-----------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


In [10]:
from pytorch_lightning.loggers import TensorBoardLogger
from ray import air, tune
from ray.air import session
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.tune.integration.pytorch_lightning import TuneReportCallback, \
    TuneReportCheckpointCallback

In [None]:
TuneReportCallback(
    {
        "loss": "ptl/val_loss",
        "mean_accuracy": "ptl/val_accuracy"
    },
    on="validation_end")

<ray.tune.integration.pytorch_lightning.TuneReportCallback at 0x17b305710>

In [11]:
def train_mnist_tune(config, num_epochs=10, num_gpus=0, data_dir="~/data"):
    data_dir = os.path.expanduser(data_dir)
    model = LightningMNISTClassifier(config, data_dir)
    trainer = pl.Trainer(
        max_epochs=num_epochs,
        # If fractional GPUs passed in, convert to int.
        gpus=math.ceil(num_gpus),
        logger=TensorBoardLogger(
            save_dir=os.getcwd(), name="", version="."),
        enable_progress_bar=False,
        callbacks=[
            TuneReportCallback(
                {
                    "loss": "ptl/val_loss",
                    "mean_accuracy": "ptl/val_accuracy"
                },
                on="validation_end")
        ])
    trainer.fit(model)

In [12]:
config = {
    "layer_1_size": tune.choice([32, 64, 128]),
    "layer_2_size": tune.choice([64, 128, 256]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([32, 64, 128]),
}

In [13]:
num_epochs = 10

scheduler = ASHAScheduler(
    max_t=num_epochs,
    grace_period=1,
    reduction_factor=2)

In [14]:
reporter = CLIReporter(
    parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
    metric_columns=["loss", "mean_accuracy", "training_iteration"])

In [15]:
gpus_per_trial = 0
data_dir = "~/data"

train_fn_with_parameters = tune.with_parameters(train_mnist_tune,
                                                num_epochs=num_epochs,
                                                num_gpus=gpus_per_trial,
                                                data_dir=data_dir)

In [16]:
resources_per_trial = {"cpu": 1, "gpu": gpus_per_trial}

In [17]:
def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0, data_dir="~/data"):
    config = {
        "layer_1_size": tune.choice([32, 64, 128]),
        "layer_2_size": tune.choice([64, 128, 256]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([32, 64, 128]),
    }

    scheduler = ASHAScheduler(
        max_t=num_epochs,
        grace_period=1,
        reduction_factor=2)

    reporter = CLIReporter(
        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
        metric_columns=["loss", "mean_accuracy", "training_iteration"])

    train_fn_with_parameters = tune.with_parameters(train_mnist_tune,
                                                    num_epochs=num_epochs,
                                                    num_gpus=gpus_per_trial,
                                                    data_dir=data_dir)
    resources_per_trial = {"cpu": 1, "gpu": gpus_per_trial}
    
    tuner = tune.Tuner(
        tune.with_resources(
            train_fn_with_parameters,
            resources=resources_per_trial
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=num_samples,
        ),
        run_config=air.RunConfig(
            name="tune_mnist_asha",
            progress_reporter=reporter,
        ),
        param_space=config,
    )
    results = tuner.fit()

    print("Best hyperparameters found were: ", results.get_best_result().config)

In [19]:
tune_mnist_asha()

2023-03-07 18:04:37,419	INFO worker.py:1553 -- Started a local Ray instance.


== Status ==
Current time: 2023-03-07 18:04:39 (running for 00:00:00.21)
Memory usage on this node: 1.9/12.7 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 1.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------------+----------+------------------+----------------+----------------+-------------+--------------+
| Trial name                   | status   | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |
|------------------------------+----------+------------------+----------------+----------------+-------------+--------------|
| train_mnist_tune_86f06_00000 | RUNNING  | 172.28.0.12:6139 |             32 |             64 | 0.000141977 |          128 |
| train_mnist_tune_86f06_00001 | PENDING  |                  |             64 |     

[2m[36m(train_mnist_tune pid=6139)[0m   rank_zero_deprecation(
[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs


[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/data/MNIST/raw/train-images-idx3-ubyte.gz


[2m[36m(train_mnist_tune pid=6139)[0m   0%|          | 0/9912422 [00:00<?, ?it/s]
 25%|██▌       | 2490368/9912422 [00:00<00:00, 15328251.68it/s]
100%|██████████| 9912422/9912422 [00:00<00:00, 46072783.50it/s]


[2m[36m(train_mnist_tune pid=6139)[0m Extracting /root/data/MNIST/raw/train-images-idx3-ubyte.gz to /root/data/MNIST/raw
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/data/MNIST/raw/train-labels-idx1-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Extracting /root/data/MNIST/raw/train-labels-idx1-ubyte.gz to /root/data/MNIST/raw
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/data/MNIST/raw/t10k-images-idx3-ubyte.gz


[2m[36m(train_mnist_tune pid=6139)[0m   0%|          | 0/28881 [00:00<?, ?it/s]100%|██████████| 28881/28881 [00:00<00:00, 132824225.68it/s]
[2m[36m(train_mnist_tune pid=6139)[0m   0%|          | 0/1648877 [00:00<?, ?it/s]100%|██████████| 1648877/1648877 [00:00<00:00, 108796881.98it/s]


[2m[36m(train_mnist_tune pid=6139)[0m Extracting /root/data/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/MNIST/raw
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
[2m[36m(train_mnist_tune pid=6139)[0m Extracting /root/data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/MNIST/raw
[2m[36m(train_mnist_tune pid=6139)[0m 


[2m[36m(train_mnist_tune pid=6139)[0m   0%|          | 0/4542 [00:00<?, ?it/s]100%|██████████| 4542/4542 [00:00<00:00, 22359775.55it/s]
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 2.1 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 650   
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Total params
[2m[36m(train_mnist_tune pid=6139)[0m 0.112     Total estimated model params size (MB)
[2m[36m(train_mnist_tune pid=6139)[0m 2023-03-07 18:04:48.475301: W tensorflow/com

== Status ==
Current time: 2023-03-07 18:04:48 (running for 00:00:09.42)
Memory usage on this node: 2.3/12.7 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (8 PENDING, 2 RUNNING)
+------------------------------+----------+------------------+----------------+----------------+-------------+--------------+
| Trial name                   | status   | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |
|------------------------------+----------+------------------+----------------+----------------+-------------+--------------|
| train_mnist_tune_86f06_00000 | RUNNING  | 172.28.0.12:6139 |             32 |             64 | 0.000141977 |          128 |
| train_mnist_tune_86f06_00001 | RUNNING  | 172.28.0.12:6199 |             64 |     

[2m[36m(train_mnist_tune pid=6199)[0m   rank_zero_deprecation(
[2m[36m(train_mnist_tune pid=6199)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6199)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6199)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6199)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6199)[0m 
[2m[36m(train_mnist_tune pid=6199)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 0 | layer_1 | Linear | 50.2 K
[2m[36m(train_mnist_tune pid=6199)[0m 1 | layer_2 | Linear | 4.2 K 
[2m[36m(train_mnist_tune pid=6199)[0m 2 | layer_3 | Linear | 650   
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 55.1 K    Trainable params
[2m[36m(train_mnist_tune pid=6199)[0m 0         Non-trainable params
[2

== Status ==
Current time: 2023-03-07 18:04:54 (running for 00:00:15.71)
Memory usage on this node: 2.6/12.7 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (8 PENDING, 2 RUNNING)
+------------------------------+----------+------------------+----------------+----------------+-------------+--------------+
| Trial name                   | status   | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |
|------------------------------+----------+------------------+----------------+----------------+-------------+--------------|
| train_mnist_tune_86f06_00000 | RUNNING  | 172.28.0.12:6139 |             32 |             64 | 0.000141977 |          128 |
| train_mnist_tune_86f06_00001 | RUNNING  | 172.28.0.12:6199 |             64 |     

Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,mean_accuracy,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_mnist_tune_86f06_00000,2023-03-07_18-09-11,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,10,0.16854,0.950586,172.28.0.12,6139,267.745,23.3821,267.745,1678212551,0,,10,86f06_00000,0.0074842
train_mnist_tune_86f06_00001,2023-03-07_18-07-21,True,,a50be76441e4490c900ce4f188b89767,4e057c5aa3eb,4,0.241117,0.937301,172.28.0.12,6199,151.522,38.3774,151.522,1678212441,0,,4,86f06_00001,0.00561714
train_mnist_tune_86f06_00002,2023-03-07_18-11-27,True,,a50be76441e4490c900ce4f188b89767,4e057c5aa3eb,10,0.140586,0.960352,172.28.0.12,6199,246.456,24.3108,246.456,1678212687,0,,10,86f06_00002,0.00561714
train_mnist_tune_86f06_00003,2023-03-07_18-09-36,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,1,0.375269,0.896289,172.28.0.12,6139,25.3858,25.3858,25.3858,1678212576,0,,1,86f06_00003,0.0074842
train_mnist_tune_86f06_00004,2023-03-07_18-10-03,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,1,0.636535,0.815467,172.28.0.12,6139,27.0855,27.0855,27.0855,1678212603,0,,1,86f06_00004,0.0074842
train_mnist_tune_86f06_00005,2023-03-07_18-10-51,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,2,0.263317,0.925781,172.28.0.12,6139,48.1902,23.7929,48.1902,1678212651,0,,2,86f06_00005,0.0074842
train_mnist_tune_86f06_00006,2023-03-07_18-11-50,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,2,0.338883,0.909612,172.28.0.12,6139,58.8461,29.129,58.8461,1678212710,0,,2,86f06_00006,0.0074842
train_mnist_tune_86f06_00007,2023-03-07_18-15-56,True,,a50be76441e4490c900ce4f188b89767,4e057c5aa3eb,10,0.134324,0.961432,172.28.0.12,6199,268.392,22.3336,268.392,1678212956,0,,10,86f06_00007,0.00561714
train_mnist_tune_86f06_00008,2023-03-07_18-15-16,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,8,0.175291,0.966992,172.28.0.12,6139,205.836,25.7812,205.836,1678212916,0,,8,86f06_00008,0.0074842
train_mnist_tune_86f06_00009,2023-03-07_18-15-49,True,,d73a4469f4974daeb8fdbb98edbfb9da,4e057c5aa3eb,1,2.31637,0.093551,172.28.0.12,6139,32.9956,32.9956,32.9956,1678212949,0,,1,86f06_00009,0.0074842


== Status ==
Current time: 2023-03-07 18:05:20 (running for 00:00:41.91)
Memory usage on this node: 2.6/12.7 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -0.4114856719970703
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00000 with loss=0.4114856719970703 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.00014197686913640606, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (8 PENDING, 2 RUNNING)
+------------------------------+----------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status   | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training_iteration |
|------------------------------+----------+--------

[2m[36m(train_mnist_tune pid=6199)[0m   rank_zero_deprecation(
[2m[36m(train_mnist_tune pid=6199)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6199)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6199)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6199)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6199)[0m 
[2m[36m(train_mnist_tune pid=6199)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6199)[0m 1 | layer_2 | Linear | 2.1 K 
[2m[36m(train_mnist_tune pid=6199)[0m 2 | layer_3 | Linear | 650   
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 27.9 K    Trainable params
[2m[36m(train_mnist_tune pid=6199)[0m 0         Non-trainable params
[2

== Status ==
Current time: 2023-03-07 18:07:26 (running for 00:02:47.28)
Memory usage on this node: 2.6/12.7 GiB 
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: None | Iter 4.000: -0.23614606261253357 | Iter 2.000: -0.2641848251223564 | Iter 1.000: -0.33576400578022003
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00000 with loss=0.21481367945671082 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.00014197686913640606, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (7 PENDING, 2 RUNNING, 1 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training_iteration |


[2m[36m(train_mnist_tune pid=6139)[0m   rank_zero_deprecation(
[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 8.4 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 2.6 K 
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 36.1 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2

== Status ==
Current time: 2023-03-07 18:09:16 (running for 00:04:37.16)
Memory usage on this node: 2.7/12.7 GiB 
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: -0.18267501890659332 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2366371899843216 | Iter 1.000: -0.26004233956336975
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00000 with loss=0.16854038834571838 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.00014197686913640606, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (6 PENDING, 2 RUNNING, 2 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   train

[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 2.1 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 650   
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Total params
[2m

== Status ==
Current time: 2023-03-07 18:09:41 (running for 00:05:02.58)
Memory usage on this node: 2.7/12.7 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: -0.18267501890659332 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2366371899843216 | Iter 1.000: -0.31765562295913696
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00002 with loss=0.15379753708839417 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.0009660718077979762, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (5 PENDING, 2 RUNNING, 3 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   traini

[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 2.1 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 650   
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 27.9 K    Total params
[2m

== Status ==
Current time: 2023-03-07 18:10:08 (running for 00:05:29.70)
Memory usage on this node: 2.8/12.7 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 8.000: -0.18267501890659332 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2366371899843216 | Iter 1.000: -0.3752689063549042
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00002 with loss=0.14832481741905212 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.0009660718077979762, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (4 PENDING, 2 RUNNING, 4 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   trainin

[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 100 K 
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 16.5 K
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 1.3 K 
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 118 K     Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 118 K     Total params
[2m

== Status ==
Current time: 2023-03-07 18:10:56 (running for 00:06:17.94)
Memory usage on this node: 2.8/12.7 GiB 
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: -0.1616043671965599 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2499769702553749 | Iter 1.000: -0.352556437253952
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00002 with loss=0.1405337154865265 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.0009660718077979762, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (3 PENDING, 2 RUNNING, 5 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training_i

[2m[36m(train_mnist_tune pid=6199)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6199)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6199)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6199)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6199)[0m 
[2m[36m(train_mnist_tune pid=6199)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6199)[0m 1 | layer_2 | Linear | 4.2 K 
[2m[36m(train_mnist_tune pid=6199)[0m 2 | layer_3 | Linear | 1.3 K 
[2m[36m(train_mnist_tune pid=6199)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6199)[0m 30.6 K    Trainable params
[2m[36m(train_mnist_tune pid=6199)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6199)[0m 30.6 K    Total params
[2m

== Status ==
Current time: 2023-03-07 18:11:32 (running for 00:06:53.76)
Memory usage on this node: 2.8/12.7 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -0.1616043671965599 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2499769702553749 | Iter 1.000: -0.3298439681529999
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00002 with loss=0.14058569073677063 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.0009660718077979762, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (2 PENDING, 2 RUNNING, 6 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training

[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 50.2 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 8.3 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 1.3 K 
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 59.9 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 59.9 K    Total params
[2m

== Status ==
Current time: 2023-03-07 18:11:53 (running for 00:07:14.22)
Memory usage on this node: 2.9/12.7 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -0.1616043671965599 | Iter 4.000: -0.23117558658123016 | Iter 2.000: -0.2633167505264282 | Iter 1.000: -0.31370092928409576
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00002 with loss=0.14058569073677063 and parameters={'layer_1_size': 32, 'layer_2_size': 64, 'lr': 0.0009660718077979762, 'batch_size': 128}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (1 PENDING, 2 RUNNING, 7 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   trainin

[2m[36m(train_mnist_tune pid=6139)[0m GPU available: False, used: False
[2m[36m(train_mnist_tune pid=6139)[0m TPU available: False, using: 0 TPU cores
[2m[36m(train_mnist_tune pid=6139)[0m IPU available: False, using: 0 IPUs
[2m[36m(train_mnist_tune pid=6139)[0m HPU available: False, using: 0 HPUs
[2m[36m(train_mnist_tune pid=6139)[0m 
[2m[36m(train_mnist_tune pid=6139)[0m   | Name    | Type   | Params
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 0 | layer_1 | Linear | 25.1 K
[2m[36m(train_mnist_tune pid=6139)[0m 1 | layer_2 | Linear | 8.4 K 
[2m[36m(train_mnist_tune pid=6139)[0m 2 | layer_3 | Linear | 2.6 K 
[2m[36m(train_mnist_tune pid=6139)[0m -----------------------------------
[2m[36m(train_mnist_tune pid=6139)[0m 36.1 K    Trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 0         Non-trainable params
[2m[36m(train_mnist_tune pid=6139)[0m 36.1 K    Total params
[2m

== Status ==
Current time: 2023-03-07 18:15:21 (running for 00:10:42.70)
Memory usage on this node: 2.7/12.7 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 8.000: -0.15791214257478714 | Iter 4.000: -0.17471830546855927 | Iter 2.000: -0.2366371899843216 | Iter 1.000: -0.29755789041519165
Resources requested: 2.0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00007 with loss=0.13429000973701477 and parameters={'layer_1_size': 32, 'layer_2_size': 128, 'lr': 0.0006869937718490195, 'batch_size': 64}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (2 RUNNING, 8 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training_iteratio

2023-03-07 18:15:56,276	INFO tune.py:798 -- Total run time: 677.37 seconds (677.19 seconds for the tuning loop).


== Status ==
Current time: 2023-03-07 18:15:56 (running for 00:11:17.21)
Memory usage on this node: 2.0/12.7 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -0.15791214257478714 | Iter 4.000: -0.17471830546855927 | Iter 2.000: -0.2366371899843216 | Iter 1.000: -0.31370092928409576
Resources requested: 0/2 CPUs, 0/0 GPUs, 0.0/7.37 GiB heap, 0.0/3.68 GiB objects
Current best trial: 86f06_00007 with loss=0.13432417809963226 and parameters={'layer_1_size': 32, 'layer_2_size': 128, 'lr': 0.0006869937718490195, 'batch_size': 64}
Result logdir: /root/ray_results/tune_mnist_asha
Number of trials: 10/10 (10 TERMINATED)
+------------------------------+------------+------------------+----------------+----------------+-------------+--------------+----------+-----------------+----------------------+
| Trial name                   | status     | loc              |   layer_1_size |   layer_2_size |          lr |   batch_size |     loss |   mean_accuracy |   training_iteration |
|------