## HYPERPARAMETER TUNING WITH RAY TUNE

In [1]:
from functools import partial
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [2]:
def load_data(data_dir="./"):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(
        root=data_dir, train=True, download=True, transform=transform)

    testset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False, download=True, transform=transform)

    return trainset, testset

In [3]:
class Net(nn.Module):
    def __init__(self, l1=120, l2=84):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
def train_cifar(config, checkpoint_dir=None, data_dir=None):
    net = Net(config["l1"], config["l2"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainset, testset = load_data(data_dir)

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs])

    trainloader = torch.utils.data.DataLoader(
        train_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)
    valloader = torch.utils.data.DataLoader(
        val_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)

    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finished Training")

In [5]:
def test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [6]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    data_dir = os.path.abspath("./data")
    load_data(data_dir)
    config = {
        "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_cifar, data_dir=data_dir),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to C:\my_code\Computer_Vision\Torch_Tutorial\Optimization\data\cifar-10-python.tar.gz


26.0%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

37.2%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

55.2%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

72.1%IOPub

Extracting C:\my_code\Computer_Vision\Torch_Tutorial\Optimization\data\cifar-10-python.tar.gz to C:\my_code\Computer_Vision\Torch_Tutorial\Optimization\data
Files already downloaded and verified


2021-07-28 00:39:33,934	INFO services.py:1274 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-07-28 00:39:46,673	INFO registry.py:65 -- Detected unknown callable for trainable. Converting to class.
2021-07-28 00:39:46,988	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


== Status ==
Memory usage on this node: 6.3/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (10 PENDING)
+---------------------+----------+-------+--------------+------+------+-------------+
| Trial name          | status   | loc   |   batch_size |   l1 |   l2 |          lr |
|---------------------+----------+-------+--------------+------+------+-------------|
| DEFAULT_df0ef_00000 | PENDING  |       |            8 |  128 |   16 | 0.00832281  |
| DEFAULT_df0ef_00001 | PENDING  |       |           16 |   64 |   64 | 0.038013    |
| DEFAULT_df0ef_00002 | PENDING  |       |            4 |   16 |    4 | 0.0278657   |
| DEFAULT_df0ef_00003 | PENDING  |       |            4 |    8 |    4 | 0.000381076 |
| DEFAULT_df0ef_00004 | PENDING  

[2m[36m(pid=12976)[0m Traceback (most recent call last):
[2m[36m(pid=12976)[0m   File "<string>", line 1, in <module>
[2m[36m(pid=12976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 105, in spawn_main
[2m[36m(pid=12976)[0m     exitcode = _main(fd)
[2m[36m(pid=12976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 115, in _main
[2m[36m(pid=12976)[0m     self = reduction.pickle.load(from_parent)
[2m[36m(pid=12976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\__init__.py", line 117, in <module>
[2m[36m(pid=12976)[0m     raise err
[2m[36m(pid=12976)[0m OSError: [WinError 1455] �� �۾��� �Ϸ��ϱ� ���� ����¡ ������ �ʹ� �۽��ϴ�. Error loading "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\lib\caffe2_detectron_ops_gpu.dll" or one of its dependencies.
[2m[36m(pid=13480)[0m Traceback (most recent call last):
[2m[36m(pid=13480)[0m 

2021-07-28 00:40:10,768	ERROR trial_runner.py:748 -- Trial DEFAULT_df0ef_00000: Error processing event.
Traceback (most recent call last):
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\trial_runner.py", line 718, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\ray_trial_executor.py", line 688, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\_private\client_mode_hook.py", line 62, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\worker.py", line 1494, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TuneError): [36mray::ImplicitFunc.train_buffered()[39m (pid=12976, ip=192.168.219.101)
  File "python\ray\_raylet.pyx", line 501, in ray._raylet.execute_task
  File "python

Result for DEFAULT_df0ef_00002:
  {}
  
== Status ==
Memory usage on this node: 13.4/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 10.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (1 ERROR, 4 PENDING, 5 RUNNING)
+---------------------+----------+-------+--------------+------+------+-------------+
| Trial name          | status   | loc   |   batch_size |   l1 |   l2 |          lr |
|---------------------+----------+-------+--------------+------+------+-------------|
| DEFAULT_df0ef_00000 | RUNNING  |       |            8 |  128 |   16 | 0.00832281  |
| DEFAULT_df0ef_00001 | RUNNING  |       |           16 |   64 |   64 | 0.038013    |
| DEFAULT_df0ef_00003 | RUNNING  |       |            4 |    8 |    4 | 0.000381076 |
| DEFAULT_df0ef_00004 | RUNNING  |       |            2 

[2m[36m(pid=12976)[0m Windows fatal exception: access violation
[2m[36m(pid=12976)[0m 
[2m[36m(pid=13480)[0m Windows fatal exception: access violation
[2m[36m(pid=13480)[0m 
[2m[36m(pid=6428)[0m Windows fatal exception: access violation
[2m[36m(pid=6428)[0m 


[2m[36m(pid=17160)[0m Files already downloaded and verified
[2m[36m(pid=8976)[0m Files already downloaded and verified
[2m[36m(pid=23764)[0m Files already downloaded and verified
[2m[36m(pid=17160)[0m Files already downloaded and verified
[2m[36m(pid=8976)[0m Files already downloaded and verified
[2m[36m(pid=23764)[0m Files already downloaded and verified


[2m[36m(pid=17160)[0m 2021-07-28 00:40:19,559	ERROR function_runner.py:254 -- Runner Thread raised error.
[2m[36m(pid=17160)[0m Traceback (most recent call last):
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 248, in run
[2m[36m(pid=17160)[0m     self._entrypoint()
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 316, in entrypoint
[2m[36m(pid=17160)[0m     self._status_reporter.get_checkpoint())
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 580, in _trainable_func
[2m[36m(pid=17160)[0m     output = fn()
[2m[36m(pid=17160)[0m   File "<ipython-input-4-1f7decf27e71>", line 40, in train_cifar
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\utils\data\dataloader.py", line 352, in __

[2m[36m(pid=8976)[0m 2021-07-28 00:40:19,723	ERROR function_runner.py:254 -- Runner Thread raised error.
[2m[36m(pid=8976)[0m Traceback (most recent call last):
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 248, in run
[2m[36m(pid=8976)[0m     self._entrypoint()
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 316, in entrypoint
[2m[36m(pid=8976)[0m     self._status_reporter.get_checkpoint())
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 580, in _trainable_func
[2m[36m(pid=8976)[0m     output = fn()
[2m[36m(pid=8976)[0m   File "<ipython-input-4-1f7decf27e71>", line 40, in train_cifar
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\utils\data\dataloader.py", line 352, in __iter__
[2m[36m(pi

Result for DEFAULT_df0ef_00006:
  {}
  
== Status ==
Memory usage on this node: 13.7/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 10.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (4 ERROR, 1 PENDING, 5 RUNNING)
+---------------------+----------+-------+--------------+------+------+-------------+
| Trial name          | status   | loc   |   batch_size |   l1 |   l2 |          lr |
|---------------------+----------+-------+--------------+------+------+-------------|
| DEFAULT_df0ef_00001 | RUNNING  |       |           16 |   64 |   64 | 0.038013    |
| DEFAULT_df0ef_00003 | RUNNING  |       |            4 |    8 |    4 | 0.000381076 |
| DEFAULT_df0ef_00004 | RUNNING  |       |            2 |   16 |    4 | 0.000174042 |
| DEFAULT_df0ef_00007 | RUNNING  |       |            4 

[2m[36m(pid=17160)[0m Windows fatal exception: access violation
[2m[36m(pid=17160)[0m 
[2m[36m(pid=23764)[0m 2021-07-28 00:40:19,856	ERROR function_runner.py:254 -- Runner Thread raised error.
[2m[36m(pid=23764)[0m Traceback (most recent call last):
[2m[36m(pid=23764)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 248, in run
[2m[36m(pid=23764)[0m     self._entrypoint()
[2m[36m(pid=23764)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 316, in entrypoint
[2m[36m(pid=23764)[0m     self._status_reporter.get_checkpoint())
[2m[36m(pid=23764)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\function_runner.py", line 580, in _trainable_func
[2m[36m(pid=23764)[0m     output = fn()
[2m[36m(pid=23764)[0m   File "<ipython-input-4-1f7decf27e71>", line 40, in train_cifar
[2m[36m(pid=23764)[0m   File "C:\Users\PARK JIHO\ana

[2m[36m(pid=8976)[0m Windows fatal exception: access violation
[2m[36m(pid=8976)[0m 
[2m[36m(pid=23764)[0m Windows fatal exception: access violation
[2m[36m(pid=23764)[0m 


Result for DEFAULT_df0ef_00007:
  {}
  


[2m[36m(pid=17160)[0m Traceback (most recent call last):
[2m[36m(pid=17160)[0m   File "<string>", line 1, in <module>
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 105, in spawn_main
[2m[36m(pid=17160)[0m     exitcode = _main(fd)
[2m[36m(pid=17160)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 115, in _main
[2m[36m(pid=17160)[0m     self = reduction.pickle.load(from_parent)
[2m[36m(pid=17160)[0m EOFError: Ran out of input
[2m[36m(pid=8976)[0m Traceback (most recent call last):
[2m[36m(pid=8976)[0m   File "<string>", line 1, in <module>
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 105, in spawn_main
[2m[36m(pid=8976)[0m     exitcode = _main(fd)
[2m[36m(pid=8976)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 115, in _main
[2m[36m(pid=8976)[0m     self

[2m[36m(pid=22428)[0m Files already downloaded and verified
[2m[36m(pid=22428)[0m Files already downloaded and verified


[2m[36m(pid=24740)[0m Traceback (most recent call last):
[2m[36m(pid=24740)[0m   File "<string>", line 1, in <module>
[2m[36m(pid=24740)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 105, in spawn_main
[2m[36m(pid=24740)[0m     exitcode = _main(fd)
[2m[36m(pid=24740)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\multiprocessing\spawn.py", line 115, in _main
[2m[36m(pid=24740)[0m     self = reduction.pickle.load(from_parent)
[2m[36m(pid=24740)[0m   File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\__init__.py", line 117, in <module>
[2m[36m(pid=24740)[0m     raise err
[2m[36m(pid=24740)[0m OSError: [WinError 1455] �� �۾��� �Ϸ��ϱ� ���� ����¡ ������ �ʹ� �۽��ϴ�. Error loading "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\torch\lib\caffe2_detectron_ops_gpu.dll" or one of its dependencies.
[2m[36m(pid=13244)[0m Traceback (most recent call last):
[2m[36m(pid=13244)[0m 

2021-07-28 00:40:25,194	ERROR trial_runner.py:748 -- Trial DEFAULT_df0ef_00004: Error processing event.
Traceback (most recent call last):
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\trial_runner.py", line 718, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\tune\ray_trial_executor.py", line 688, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\_private\client_mode_hook.py", line 62, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\PARK JIHO\anaconda3\envs\torch\lib\site-packages\ray\worker.py", line 1494, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TuneError): [36mray::ImplicitFunc.train_buffered()[39m (pid=13244, ip=192.168.219.101)
  File "python\ray\_raylet.pyx", line 501, in ray._raylet.execute_task
  File "python

Result for DEFAULT_df0ef_00003:
  {}
  
== Status ==
Memory usage on this node: 10.6/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 6.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (7 ERROR, 3 RUNNING)
+---------------------+----------+-------+--------------+------+------+-------------+
| Trial name          | status   | loc   |   batch_size |   l1 |   l2 |          lr |
|---------------------+----------+-------+--------------+------+------+-------------|
| DEFAULT_df0ef_00001 | RUNNING  |       |           16 |   64 |   64 | 0.038013    |
| DEFAULT_df0ef_00004 | RUNNING  |       |            2 |   16 |    4 | 0.000174042 |
| DEFAULT_df0ef_00009 | RUNNING  |       |           16 |  128 |    4 | 0.000411013 |
| DEFAULT_df0ef_00000 | ERROR    |       |            8 |  128 |   1

[2m[36m(pid=24740)[0m Windows fatal exception: access violation
[2m[36m(pid=24740)[0m 
[2m[36m(pid=13244)[0m Windows fatal exception: access violation
[2m[36m(pid=13244)[0m 
[2m[36m(pid=9092)[0m Windows fatal exception: access violation
[2m[36m(pid=9092)[0m 


[2m[36m(pid=22428)[0m [1,  2000] loss: 2.317
Result for DEFAULT_df0ef_00009:
  accuracy: 0.1243
  date: 2021-07-28_00-41-02
  done: false
  experiment_id: 1c6056f30c4843f092492f6cb727925e
  hostname: DESKTOP-G26MRLK
  iterations_since_restore: 1
  loss: 2.302574312210083
  node_ip: 192.168.219.101
  pid: 22428
  should_checkpoint: true
  time_since_restore: 41.067832946777344
  time_this_iter_s: 41.067832946777344
  time_total_s: 41.067832946777344
  timestamp: 1627400462
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: df0ef_00009
  
== Status ==
Memory usage on this node: 5.1/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -2.302574312210083
Resources requested: 2.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (9 ERROR, 1 RUNNING)
+---------------------+----------+-----------------

[2m[36m(pid=22428)[0m [3,  2000] loss: 2.215
Result for DEFAULT_df0ef_00009:
  accuracy: 0.201
  date: 2021-07-28_00-42-14
  done: false
  experiment_id: 1c6056f30c4843f092492f6cb727925e
  hostname: DESKTOP-G26MRLK
  iterations_since_restore: 3
  loss: 2.125642303466797
  node_ip: 192.168.219.101
  pid: 22428
  should_checkpoint: true
  time_since_restore: 112.96740436553955
  time_this_iter_s: 36.170666456222534
  time_total_s: 112.96740436553955
  timestamp: 1627400534
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: df0ef_00009
  
== Status ==
Memory usage on this node: 5.3/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -2.2590433113098145 | Iter 1.000: -2.302574312210083
Resources requested: 2.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (9 ERROR, 1 RUNNING)
+---------------------+----------+---

[2m[36m(pid=22428)[0m [5,  2000] loss: 1.951
Result for DEFAULT_df0ef_00009:
  accuracy: 0.2848
  date: 2021-07-28_00-43-28
  done: false
  experiment_id: 1c6056f30c4843f092492f6cb727925e
  hostname: DESKTOP-G26MRLK
  iterations_since_restore: 5
  loss: 1.8587878770828248
  node_ip: 192.168.219.101
  pid: 22428
  should_checkpoint: true
  time_since_restore: 186.14989757537842
  time_this_iter_s: 36.549498558044434
  time_total_s: 186.14989757537842
  timestamp: 1627400608
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: df0ef_00009
  
== Status ==
Memory usage on this node: 5.8/15.9 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: -2.0200763750076294 | Iter 2.000: -2.2590433113098145 | Iter 1.000: -2.302574312210083
Resources requested: 2.0/12 CPUs, 0/1 GPUs, 0.0/6.24 GiB heap, 0.0/3.12 GiB objects
Result logdir: C:\Users\PARK JIHO\ray_results\DEFAULT_2021-07-28_00-39-46
Number of trials: 10/10 (9 ERROR, 1 RUNNING)
+-------------------



KeyboardInterrupt: 