In [2]:
!pip install ray[tune]
!pip install ipywidgets

Collecting ray[tune]
  Downloading ray-2.49.1-cp312-cp312-manylinux2014_x86_64.whl.metadata (21 kB)
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Downloading tensorboardx-2.6.4-py3-none-any.whl (87 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ray-2.49.1-cp312-cp312-manylinux2014_x86_64.whl (70.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardX, ray
Successfully installed ray-2.49.1 tensorboardX-2.6.4
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m75.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling c

In [3]:
from functools import partial
import os
import tempfile
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray import train
from ray.train import Checkpoint, get_checkpoint
from ray.tune.schedulers import ASHAScheduler
import ray.cloudpickle as pickle

In [4]:
def load_data(data_dir="./data"):
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

    trainset = torchvision.datasets.CIFAR10(
        root=data_dir, train=True, download=True, transform=transform
    )

    testset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False, download=True, transform=transform
    )

    return trainset, testset

In [5]:
class Net(nn.Module):
    def __init__(self, l1=120, l2=84):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
def train_cifar(config, data_dir=None):
    net = Net(config["l1"], config["l2"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)

    checkpoint = get_checkpoint()
    if checkpoint:
        with checkpoint.as_directory() as checkpoint_dir:
            data_path = Path(checkpoint_dir) / "data.pkl"
            with open(data_path, "rb") as fp:
                checkpoint_state = pickle.load(fp)
            start_epoch = checkpoint_state["epoch"]
            net.load_state_dict(checkpoint_state["net_state_dict"])
            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    trainset, testset = load_data(data_dir)

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs]
    )

    trainloader = torch.utils.data.DataLoader(
        train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
    )
    valloader = torch.utils.data.DataLoader(
        val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
    )

    for epoch in range(start_epoch, 10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print(
                    "[%d, %5d] loss: %.3f"
                    % (epoch + 1, i + 1, running_loss / epoch_steps)
                )
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        with tempfile.TemporaryDirectory() as checkpoint_dir:
            data_path = Path(checkpoint_dir) / "data.pkl"
            with open(data_path, "wb") as fp:
                pickle.dump(checkpoint_data, fp)

            checkpoint = Checkpoint.from_directory(checkpoint_dir)
            train.report(
                {"loss": val_loss / val_steps, "accuracy": correct / total},
                checkpoint=checkpoint,
            )

    print("Finished Training")

In [7]:
def test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [None]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    data_dir = os.path.abspath("./data")
    load_data(data_dir)
    config = {
        "l1": tune.choice([2**i for i in range(9)]),
        "l2": tune.choice([2**i for i in range(9)]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16]),
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2,
    )
    result = tune.run(
        partial(train_cifar, data_dir=data_dir),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}")

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint = result.get_best_checkpoint(trial=best_trial, metric="accuracy", mode="max")
    with best_checkpoint.as_directory() as checkpoint_dir:
        data_path = Path(checkpoint_dir) / "data.pkl"
        with open(data_path, "rb") as fp:
            best_checkpoint_data = pickle.load(fp)

        best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
        test_acc = test_accuracy(best_trained_model, device)
        print("Best trial test set accuracy: {}".format(test_acc))


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)

100%|██████████| 170M/170M [00:14<00:00, 11.6MB/s]
2025-09-08 19:45:42,815	INFO worker.py:1951 -- Started a local Ray instance.
2025-09-08 19:45:50,543	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.


+--------------------------------------------------------------------+
| Configuration for experiment     train_cifar_2025-09-08_19-45-50   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        AsyncHyperBandScheduler           |
| Number of trials                 10                                |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_cifar_2025-09-08_19-45-50
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-09-08_19-45-38_674511_315/artifacts/2025-09-08_19-45-50/train_cifar_2025-09-08_19-45-50/driver_artifacts`

Trial status: 10 PENDING
Current time: 2025-09-08 19:45:51. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------



[36m(func pid=3189)[0m [1,  2000] loss: 2.321

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:46:21. Total running time: 30s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size |
+-------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2 |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4 |
| train_cifar_6c5b1_00003   PENDING       2     32   0.0105631               16 |
| train_cifar_6c5b1_00004   PENDING      32     64   0.0223401                2 |
| train_cifar_6c5b1_00005   PENDING      64     64   0.0851156                8 |
| train_cifar_6c5b1_00006   PENDING     128

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000000)



Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:46:51. Total running time: 1min 0s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        1            40.4249   2.31564       0.0993 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                                    |
| train_cifar_6c5b1_00003  

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000001)



Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:47:21. Total running time: 1min 30s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        2            77.7998   2.32988       0.0993 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                                    |
| train_cifar_6c5b1_00003 

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000002)


[36m(func pid=3189)[0m [4,  2000] loss: 2.320
[36m(func pid=3189)[0m [4,  4000] loss: 1.160

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:48:21. Total running time: 2min 30s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        3            113.725   2.32133       0.0969 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.00012361

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000003)


[36m(func pid=3189)[0m [5,  2000] loss: 2.319

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:48:51. Total running time: 3min 0s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)     loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        4            151.046   2.3111       0.0969 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                   |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                      

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000004)


[36m(func pid=3189)[0m [6,  2000] loss: 2.321

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:49:21. Total running time: 3min 30s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        5             188.22   2.32448        0.104 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000005)



Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:49:51. Total running time: 4min 1s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        6            223.869   2.32333       0.0969 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                                    |
| train_cifar_6c5b1_00003  

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000006)



Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:50:21. Total running time: 4min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        7            260.808   2.33993       0.0993 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                                    |
| train_cifar_6c5b1_00003 

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000007)


[36m(func pid=3189)[0m [9,  2000] loss: 2.320

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:51:21. Total running time: 5min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        8            298.367   2.30805       0.0993 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000008)


[36m(func pid=3189)[0m [10,  2000] loss: 2.320

Trial status: 1 RUNNING | 9 PENDING
Current time: 2025-09-08 19:51:51. Total running time: 6min 1s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status       l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   RUNNING       1      8   0.0622516                8        9            334.489   2.32559       0.0995 |
| train_cifar_6c5b1_00001   PENDING     256     64   0.0042474                2                                                    |
| train_cifar_6c5b1_00002   PENDING      64     64   0.000123616              4                                

[36m(func pid=3189)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00000_0_batch_size=8,l1=1,l2=8,lr=0.0623_2025-09-08_19-45-50/checkpoint_000009)



Trial status: 1 TERMINATED | 9 PENDING
Current time: 2025-09-08 19:52:22. Total running time: 6min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)     loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244       0.1025 |
| train_cifar_6c5b1_00001   PENDING       256     64   0.0042474                2                                                   |
| train_cifar_6c5b1_00002   PENDING        64     64   0.000123616              4                                                   |
| train_cifar_6c5



[36m(func pid=5506)[0m [1,  2000] loss: 2.125
[36m(func pid=5506)[0m [1,  4000] loss: 0.975
[36m(func pid=5506)[0m [1,  6000] loss: 0.621

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 19:52:52. Total running time: 7min 1s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)     loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2                                                   |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244       0.102

[36m(func pid=5506)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00001_1_batch_size=2,l1=256,l2=64,lr=0.0042_2025-09-08_19-45-51/checkpoint_000000)


[36m(func pid=5506)[0m [2,  2000] loss: 1.815

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 19:54:22. Total running time: 8min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2        1            107.292   1.88592       0.3054 |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244        0.1025 |
| train_cifar_6c5b1_00002   PENDING        64     64   0.000123616              4     

[36m(func pid=5506)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00001_1_batch_size=2,l1=256,l2=64,lr=0.0042_2025-09-08_19-45-51/checkpoint_000001)


[36m(func pid=5506)[0m [3,  2000] loss: 1.822
[36m(func pid=5506)[0m [3,  4000] loss: 0.898

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 19:56:22. Total running time: 10min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2        2            219.668   1.82882       0.3311 |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244        0.1025 |
| train_cifar_6c5b1_00002   PENDING  

[36m(func pid=5506)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00001_1_batch_size=2,l1=256,l2=64,lr=0.0042_2025-09-08_19-45-51/checkpoint_000002)


[36m(func pid=5506)[0m [4,  2000] loss: 1.871
[36m(func pid=5506)[0m [4,  4000] loss: 0.930

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 19:58:22. Total running time: 12min 32s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2        3            337.124   1.88809       0.3086 |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244        0.1025 |
| train_cifar_6c5b1_00002   PENDING  

[36m(func pid=5506)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00001_1_batch_size=2,l1=256,l2=64,lr=0.0042_2025-09-08_19-45-51/checkpoint_000003)


[36m(func pid=5506)[0m [5,  2000] loss: 1.938
[36m(func pid=5506)[0m [5,  4000] loss: 0.936

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 20:00:23. Total running time: 14min 32s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)      loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2        4            456.707   1.89398       0.3193 |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244        0.1025 |
| train_cifar_6c5b1_00002   PENDING  

[36m(func pid=5506)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/train_cifar_2025-09-08_19-45-50/train_cifar_6c5b1_00001_1_batch_size=2,l1=256,l2=64,lr=0.0042_2025-09-08_19-45-51/checkpoint_000004)


[36m(func pid=5506)[0m [6,  2000] loss: 1.920
[36m(func pid=5506)[0m [6,  4000] loss: 0.951

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2025-09-08 20:02:23. Total running time: 16min 32s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-----------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         l1     l2            lr     batch_size     iter     total time (s)     loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------+
| train_cifar_6c5b1_00001   RUNNING       256     64   0.0042474                2        5            579.099   1.9109       0.3039 |
| train_cifar_6c5b1_00000   TERMINATED      1      8   0.0622516                8       10            371.456   2.3244       0.1025 |
| train_cifar_6c5b1_00002   PENDING       