<a href="https://colab.research.google.com/github/qkrwoghd04/Image-text_fusion_for_binary_classification_using_BERT-ViT/blob/master/practice/vit_classification_using_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
!pip install torch
!pip install torchinfo
!pip install ray
!pip install -U tensorboardx



In [25]:
import os
import sys
import tempfile
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.io import read_image
# Image Classification
import torch
from torchvision.transforms import v2
from torch import nn
import torch.nn.functional as F
from torchvision import datasets
import argparse
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
from torchinfo import summary
from functools import partial
import os
from pathlib import Path
import torchvision.transforms as transforms
from ray import tune
from ray import train
from ray.train import Checkpoint, get_checkpoint, Result
from ray.tune.schedulers import ASHAScheduler
from ray.tune.examples.mnist_pytorch import train_mnist
from ray.tune import ResultGrid,CLIReporter
import ray.cloudpickle as pickle

In [26]:
# Training device:
device = 'cuda'

# hyperparameter
learning_rate = 1e-4
batch_size = 64
epochs = 20
# based on paper's "attention is all you need"
BATCH_SIZE = 64
PATCH_SIZE = 16
IMAGE_WIDTH = 224
IMAGE_HEIGHT = IMAGE_WIDTH
IMAGE_CHANNELS = 3
EMBEDDING_DIMS = IMAGE_CHANNELS * PATCH_SIZE**2 #(3 * 16 * 16) = 768
NUM_OF_PATCHES = int((IMAGE_WIDTH * IMAGE_HEIGHT) / PATCH_SIZE**2) #((224*224)/

In [27]:
class CustomImageDataset():
  def __init__(self, csv_file, img_dir, transform=None, target_transform=None):
        self.img_file = pd.read_csv(os.path.join(img_dir, csv_file))
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

  def __len__(self):
    return len(self.img_file)

  def __getitem__(self, index):
    # 레이블을 기반으로 서브폴더 경로 결정
    label = self.img_file.iloc[index, 2]
    subfolder = 'Sleep' if label == 'Sleep' else 'Fall'
    img_path = os.path.join(self.img_dir, 'train', subfolder, self.img_file.iloc[index, 0])
    if(label == "Sleep"):
       label = 0
    else: label = 1
    image = read_image(img_path)
    if self.transform:
        image = self.transform(image)
    if self.target_transform:
        label = self.target_transform(label)

    return image, label

In [28]:
def image_transform():

  H, W = 224, 224
  img = torch.randint(0, 256, size=(3, H, W), dtype=torch.uint8)

  transforms = v2.Compose([
      v2.RandomResizedCrop(size=(224, 224), antialias=True),
      v2.RandomPhotometricDistort(p=1),
      v2.RandomChannelPermutation() ,# 채널 무작위 변경
      v2.RandomHorizontalFlip(p=0.2),
      v2.ToDtype(torch.float32, scale=True),
      v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])


  return transforms


def visualize_data(train_dataloader):
    # 이미지와 정답(label)을 표시합니다.
    train_features, train_labels = next(iter(train_dataloader))
    print(f"Feature batch shape: {train_features.size()}")
    print(f"Labels batch shape: {train_labels.size()}")
    img = train_features[0].squeeze()
    label = train_labels[0]

    if img.dim() == 3 and img.size(0) == 3:
        img = img.permute(1, 2, 0)

    plt.imshow(img, cmap="gray")
    plt.show()
    print(f"Label: {label}")

In [29]:
class PatchEmbeddingLayer(nn.Module):
    def __init__(self, in_channels, patch_size, embedding_dim):
        super().__init__()
        self.patch_size = patch_size
        self.embedding_dim = embedding_dim
        self.in_channels = in_channels
        self.conv_layer = nn.Conv2d(in_channels=in_channels,
                                    out_channels=embedding_dim, kernel_size=patch_size, stride=patch_size)
        self.flatten_layer = nn.Flatten(start_dim=1, end_dim=2)
        self.class_token_embeddings = nn.Parameter(torch.randn(1, 1, embedding_dim), requires_grad=True)
        self.position_embeddings = nn.Parameter(torch.randn(1, NUM_OF_PATCHES + 1, embedding_dim), requires_grad=True)

    def forward(self, x):
        batch_size = x.size(0)
        class_token_embeddings = self.class_token_embeddings.repeat(batch_size, 1, 1)
        x = self.conv_layer(x)
        x = x.permute(0, 2, 3, 1)
        x = self.flatten_layer(x)
        x = torch.cat((class_token_embeddings, x), dim=1)
        x = x + self.position_embeddings
        return x

In [30]:
class MultiHeadSelfAttentionBlock(nn.Module):
    def __init__(self, embedding_dims = 768, num_heads = 12, attn_dropout=0.0):
        super().__init__()
        self.embedding_dims = embedding_dims
        self.num_head = num_heads
        self.attn_dropout = attn_dropout
        self.layernorm = nn.LayerNorm(normalized_shape = embedding_dims)
        self.multiheadattention = nn.MultiheadAttention(num_heads = num_heads,
                                                        embed_dim = embedding_dims,
                                                        dropout = attn_dropout,
                                                        batch_first = True,
                                                        )
    def forward(self, x):
        x = self.layernorm(x)
        output,_ = self.multiheadattention(query=x, key=x, value=x, need_weights=False)
        return output

In [31]:
class MachineLearningPerceptronBlock(nn.Module):
    def __init__(self, embedding_dims, mlp_size, mlp_dropout):
        super().__init__()
        self.embedding_dims = embedding_dims
        self.mlp_size = mlp_size
        self.mlp_dropout = mlp_dropout

        self.layernorm = nn.LayerNorm(normalized_shape = embedding_dims)
        self.mlp = nn.Sequential(
            nn.Linear(in_features = embedding_dims, out_features = mlp_size),
            nn.GELU(),
            nn.Dropout(p = mlp_dropout),
            nn.Linear(in_features = mlp_size, out_features = embedding_dims),
            nn.Dropout(p = mlp_dropout)
        )
    def forward(self,x):
        return self.mlp(self.layernorm(x))

In [32]:
class TransformerBlock(nn.Module):
    def __init__(self, embedding_dims = 768,
                 mlp_dropout=0.1,
                 attn_dropout = 0.0,
                 mlp_size = 3072,
                 num_heads = 12,
                 ):
        super().__init__()

        self.mas_block = MultiHeadSelfAttentionBlock(embedding_dims = embedding_dims, num_heads = num_heads, attn_dropout=attn_dropout,)

        self.mlp_block = MachineLearningPerceptronBlock(embedding_dims=embedding_dims, mlp_size=mlp_size, mlp_dropout=mlp_dropout,)
    def forward(self, x):
        x = self.mas_block(x) + x
        x = self.mlp_block(x) + x
        return x

In [33]:
class vit_model(nn.Module):
    def __init__(self, img_size = 224, in_channels = 3, patch_size = 16, embedding_dims = 768, num_transformer_layers = 12, mlp_dropout = 0.1, attn_dropout = 0.0, mlp_size = 2048, num_heads = 8, num_classes = 2):
        super().__init__()

        self.patch_embedding_layer = PatchEmbeddingLayer(in_channels=in_channels, patch_size=patch_size, embedding_dim=embedding_dims)

        self.transformer_encoder = nn.Sequential(*[TransformerBlock(embedding_dims = embedding_dims, mlp_dropout=mlp_dropout, attn_dropout=attn_dropout, mlp_size=mlp_size, num_heads=num_heads) for _ in range(num_transformer_layers)])
        self.classifier = nn.Sequential(nn.LayerNorm(normalized_shape = embedding_dims), nn.Linear(in_features = embedding_dims, out_features = num_classes))
    def forward(self, x):
        return self.classifier(self.transformer_encoder(self.patch_embedding_layer(x))[:, 0])


In [34]:
def train_loop(dataloader, model, loss_fn, optimizer):
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_accuracy = 0
        for data, label in tqdm(dataloader):
            data = data.to(device)
            label = label.to(device)

            output = model(data)
            loss = loss_fn(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = (output.argmax(dim=1) == label).float().mean()
            epoch_accuracy += acc / len(dataloader)
            epoch_loss += loss / len(dataloader)

        print(
            f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f}\n"
        )

In [35]:
file_path = r"/content/drive/MyDrive/image_dataset/processed/"
train_data_csv = "train_captions.csv"
test_data_csv = "test_captions.csv"
transform = image_transform()
train_dataset = CustomImageDataset(csv_file=train_data_csv, img_dir=file_path, transform=transform,)
# 트레이닝과 검증 데이터셋 크기 계산
total_train = len(train_dataset)
val_size = int(0.20 * total_train)
train_size = total_train - val_size

# 데이터셋 분할
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])
test_dataset = CustomImageDataset(csv_file=test_data_csv, img_dir=file_path, transform=transform,)
train_dataloader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=1)
val_dataloader = DataLoader(val_subset, batch_size=batch_size, shuffle=True, num_workers=1)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

In [36]:
# visualize_data(train_dataloader)

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = vit_model().to(device)
summary(model, input_size=(batch_size, IMAGE_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT))

Layer (type:depth-idx)                                  Output Shape              Param #
vit_model                                               [64, 2]                   --
├─PatchEmbeddingLayer: 1-1                              [64, 197, 768]            152,064
│    └─Conv2d: 2-1                                      [64, 768, 14, 14]         590,592
│    └─Flatten: 2-2                                     [64, 196, 768]            --
├─Sequential: 1-2                                       [64, 197, 768]            --
│    └─TransformerBlock: 2-3                            [64, 197, 768]            --
│    │    └─MultiHeadSelfAttentionBlock: 3-1            [64, 197, 768]            2,363,904
│    │    └─MachineLearningPerceptronBlock: 3-2         [64, 197, 768]            3,150,080
│    └─TransformerBlock: 2-4                            [64, 197, 768]            --
│    │    └─MultiHeadSelfAttentionBlock: 3-3            [64, 197, 768]            2,363,904
│    │    └─MachineLearningPe

In [38]:
# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# train_loop(train_dataloader, model, loss_fn, optimizer)

In [58]:
CHECKPOINT_FREQ = 3
def train_func(config):
  start = 1
  device = "cuda" if torch.cuda.is_available() else "cpu"
  model = vit_model().to(device)
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])

  #Define checkpoint
  checkpoint = train.get_checkpoint()
  if checkpoint:
      with checkpoint.as_directory() as checkpoint_dir:
          checkpoint_dict = torch.load(os.path.join(checkpoint_dir, "checkpoint.pt"))
          start = checkpoint_dict["epoch"] + 1
          model.load_state_dict(checkpoint_dict["model_state"])

  train_dataloader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=1)
  val_dataloader = DataLoader(val_subset, batch_size=batch_size, shuffle=True, num_workers=1)

  #model training
  for epoch in range(start, config["epochs"] + 1):  # loop over the dataset multiple times
      running_loss = 0.0
      epoch_steps = 0
      for i, data in enumerate(train_dataloader, 0):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)
          loss = loss_fn(outputs, labels)
          loss.backward()
          optimizer.step()

          # print statistics
          running_loss += loss.item()
          epoch_steps += 1
          if i % 2000 == 1999:  # print every 2000 mini-batches
              print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
              running_loss = 0.0

      # Validation loss
      val_loss = 0.0
      val_steps = 0
      total = 0
      correct = 0
      for i, data in enumerate(val_dataloader, 0):
          with torch.no_grad():
              inputs, labels = data
              inputs, labels = inputs.to(device), labels.to(device)

              outputs = model(inputs)
              _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

              loss = loss_fn(outputs, labels)
              val_loss += loss.cpu().numpy()
              val_steps += 1
      metrics = {
            "loss": running_loss / len(train_dataloader),
            "val_loss": val_loss / len(val_dataloader),
            "mean_accuracy": correct / total
      }
      if epoch % CHECKPOINT_FREQ == 0:
            checkpoint_dict = {
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "epoch": epoch
            }
            checkpoint_path = os.path.join(train.get_checkpoint_dir(), "checkpoint.pt")
            torch.save(checkpoint_dict, checkpoint_path)
            train.report(metrics, checkpoint=checkpoint_path)
      else:
          train.report(metrics)

In [40]:
def test_accuracy(model, device="cpu"):
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_dataloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [46]:
storage_path = "/content/drive/MyDrive/ray_results"
exp_name = "tune_analyzing_results"
trainable_with_resources = tune.with_resources(train_func, {"cpu":4 , "gpu":1, "accelerator_type:T4":1})
tuner = tune.Tuner(
    trainable_with_resources,
    param_space={
        "lr": tune.loguniform(1e-4, 1e-5),
        "batch_size": tune.choice([32, 64]),
        "epochs": 30,
        "should_checkpoint":True,

    },
    run_config=train.RunConfig(
        name=exp_name,
        stop={"training_iteration": 30},
        checkpoint_config=train.CheckpointConfig(
            checkpoint_score_attribute="mean_accuracy",
            num_to_keep=3
        ),
        storage_path=storage_path,
    ),
    tune_config=tune.TuneConfig(mode="max", metric="mean_accuracy", num_samples=8, max_concurrent_trials=1),
)
result_grid: ResultGrid = tuner.fit()


+-----------------------------------------------------------+
| Configuration for experiment     tune_analyzing_results   |
+-----------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator    |
| Scheduler                        FIFOScheduler            |
| Number of trials                 8                        |
+-----------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/ray_results/tune_analyzing_results
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-05-06_13-14-12_283656_9898/artifacts/2024-05-06_13-27-58/tune_analyzing_results/driver_artifacts`

Trial status: 1 PENDING
Current time: 2024-05-06 13:27:58. Total running time: 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------+
| Trial name               status              lr     batch_s

[36m(train_func pid=15174)[0m   self.pid = os.fork()



Trial status: 1 RUNNING
Current time: 2024-05-06 13:28:28. Total running time: 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size     acc     iter     total time (s)       loss     val_loss |
+----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32   0.625        2            25.4513   0.642763     0.623237 |
+----------------------------------------------------------------------------------------------------------------------------+


[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000000)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:28:59. Total running time: 1min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.5875 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32   0.5875        4            49.9956   0.648501     0.695762 |
+-----------------------------------------------------------------------------------------------------------------------------+


[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000001)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:29:29. Total running time: 1min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.575 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size     acc     iter     total time (s)       loss     val_loss |
+----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32   0.575        6            74.2769   0.622768     0.678835 |
+----------------------------------------------------------------------------------------------------------------------------+


[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000002)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:29:59. Total running time: 2min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.6 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size     acc     iter     total time (s)       loss     val_loss |
+----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32     0.6        9            110.755   0.596602     0.616686 |
+----------------------------------------------------------------------------------------------------------------------------+
Trial status: 1 RUNNING
Current time: 2024-05-06 13:30:29. Total running

[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000003)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:30:59. Total running time: 3min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size     acc     iter     total time (s)       loss     val_loss |
+----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32     0.7       14            172.489   0.576366     0.665532 |
+----------------------------------------------------------------------------------------------------------------------------+


[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000004)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:31:29. Total running time: 3min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+---------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size     acc     iter     total time (s)      loss     val_loss |
+---------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32     0.7       16            197.111   0.57065      0.62788 |
+---------------------------------------------------------------------------------------------------------------------------+


[36m(train_func pid=15174)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000005)


Trial status: 1 RUNNING
Current time: 2024-05-06 13:31:59. Total running time: 4min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.6875 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-----------------------------------------------------------------------------------------------------------------------------+
| Trial name               status              lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-----------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   RUNNING    1.00821e-05             32   0.6875       19            234.146   0.562506     0.584344 |
+-----------------------------------------------------------------------------------------------------------------------------+

Trial train_func_74691_00000 completed after 20 iterations at 2

[36m(train_func pid=16767)[0m   self.pid = os.fork()



Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:32:29. Total running time: 4min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.4625        1            13.1955   1.18724      0.788089 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
+------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000000)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:32:59. Total running time: 5min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.6           3            37.7182   0.676314     0.655295 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
+--------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000001)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:33:29. Total running time: 5min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.625         6            74.1285   0.626008     0.6604   |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
+-------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000002)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:34:29. Total running time: 6min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.65         10             122.94   0.54756      0.540926 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20             246.39   0.540257     0.641729 |
+-------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000003)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:34:59. Total running time: 7min 0s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.725        13            159.991   0.504101     0.575502 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
+--------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000004)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:35:29. Total running time: 7min 30s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.7375       15            184.453   0.503036     0.497861 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
+-------------------------------------

[36m(train_func pid=16767)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00001_1_batch_size=32,lr=0.0001_2024-05-06_13-32-09/checkpoint_000005)


Trial status: 1 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:35:59. Total running time: 8min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00001 with mean_accuracy=0.7875 and params={'lr': 7.431039494458575e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00001   RUNNING      7.43104e-05             32   0.7875       18            220.835   0.522037     0.460415 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
+---------------------------------------

[36m(train_func pid=18348)[0m   self.pid = os.fork()



Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:36:29. Total running time: 8min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64                                                              |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000000)


Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:37:29. Total running time: 9min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64   0.55          5            62.2008   0.651615     0.681285 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000001)


Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:38:00. Total running time: 10min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64   0.525         7            86.6169   0.623529     0.687574 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000002)


Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:38:30. Total running time: 10min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64   0.525        10            123.378   0.625832     0.647986 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000003)


Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:39:00. Total running time: 11min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64   0.6625       12            147.785   0.579124     0.632731 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000004)


Trial status: 2 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:39:30. Total running time: 11min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00002   RUNNING      5.87148e-05             64   0.6125       15            184.493   0.571382     0.632625 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=18348)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00002_2_batch_size=64,lr=0.0001_2024-05-06_13-36-19/checkpoint_000005)



Trial train_func_74691_00002 completed after 20 iterations at 2024-05-06 13:40:30. Total running time: 12min 31s
+-------------------------------------------------+
| Trial train_func_74691_00002 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        12.1237 |
| time_total_s                            245.474 |
| training_iteration                           20 |
| loss                                     0.5139 |
| mean_accuracy                               0.7 |
| val_loss                                0.58619 |
+-------------------------------------------------+

Trial status: 3 TERMINATED | 1 PENDING
Current time: 2024-05-06 13:40:30. Total running time: 12min 31s
Logical resource usage: 0/8 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+---

[36m(train_func pid=19931)[0m   self.pid = os.fork()



Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:41:00. Total running time: 13min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.4875        2            25.3523   0.786051     0.764761 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000000)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:41:30. Total running time: 13min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.5125        4            50.1129   0.691663     0.66214  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000001)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:42:00. Total running time: 14min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.5875        6            74.7253   0.608856     0.67321  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000002)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:42:30. Total running time: 14min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.6125        9            111.706   0.626129     0.661931 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000003)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:43:30. Total running time: 15min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.6375       14            173.497   0.579888     0.561397 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000004)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:44:00. Total running time: 16min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.7          16            198.217   0.556312     0.573201 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=19931)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00003_3_batch_size=32,lr=0.0001_2024-05-06_13-40-30/checkpoint_000005)


Trial status: 3 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:44:30. Total running time: 16min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00003   RUNNING      7.60115e-05             32   0.75         19            235.37    0.528926     0.528495 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=21525)[0m   self.pid = os.fork()



Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:45:00. Total running time: 17min 1s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32                                                              |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000000)


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:45:30. Total running time: 17min 31s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32   0.625         3            38.1587   0.731405     0.715849 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000001)


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:46:30. Total running time: 18min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32   0.575         8             99.805   0.628481     0.694976 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000002)


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:47:00. Total running time: 19min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32   0.55         10            124.581   0.588475     0.644847 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000003)


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:47:30. Total running time: 19min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32   0.6875       13            161.774   0.566096     0.681486 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000004)


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:48:01. Total running time: 20min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00004   RUNNING      6.61608e-05             32   0.7125       15            186.515   0.561827     0.57864  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=21525)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00004_4_batch_size=32,lr=0.0001_2024-05-06_13-44-42/checkpoint_000005)



Trial train_func_74691_00004 completed after 20 iterations at 2024-05-06 13:48:56. Total running time: 20min 57s
+-------------------------------------------------+
| Trial train_func_74691_00004 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        12.2441 |
| time_total_s                            248.356 |
| training_iteration                           20 |
| loss                                    0.53754 |
| mean_accuracy                              0.65 |
| val_loss                                0.58561 |
+-------------------------------------------------+

Trial train_func_74691_00005 started with configuration:
+-----------------------------------------------+
| Trial train_func_74691_00005 config           |
+-----------------------------------------------+
| batch_size                                 32 |
| epochs                                     20 |
| lr  

[36m(train_func pid=23118)[0m   self.pid = os.fork()


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:49:31. Total running time: 21min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.45          2            25.5143   0.776268     0.743548 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000000)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:50:01. Total running time: 22min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.525         4             49.938   0.632498     0.667241 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000001)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:50:31. Total running time: 22min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.5625        7            86.8855   0.588542     0.70768  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000002)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:51:01. Total running time: 23min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.5875        9            111.563   0.55628      0.660261 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000003)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:51:31. Total running time: 23min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.675        12            148.404   0.575805     0.55166  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000004)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:52:31. Total running time: 24min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.6875       17            209.225   0.556131     0.618344 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=23118)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00005_5_batch_size=32,lr=0.0000_2024-05-06_13-48-56/checkpoint_000005)


Trial status: 5 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:53:01. Total running time: 25min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00005   RUNNING      2.03388e-05             32   0.625        19            233.663   0.505988     0.505148 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=24705)[0m   self.pid = os.fork()



Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:53:31. Total running time: 25min 32s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.55          1            13.4089   2.14495      1.01465  |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATE

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000000)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:54:01. Total running time: 26min 2s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.575         3            37.6989   0.775483     0.798029 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000001)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:54:31. Total running time: 26min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.6375        6            74.1569   0.641875     0.660134 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000002)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:55:31. Total running time: 27min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.7          11            135.777   0.594821     0.638135 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000003)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:56:01. Total running time: 28min 3s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.6875       13            160.859   0.578776     0.510431 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000004)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:56:31. Total running time: 28min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.675        16            198.156   0.552823     0.528935 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=24705)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00006_6_batch_size=16,lr=0.0001_2024-05-06_13-53-06/checkpoint_000005)


Trial status: 6 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:57:02. Total running time: 29min 3s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00006   RUNNING      8.11878e-05             16   0.725        18            222.993   0.513286     0.477152 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=26302)[0m   self.pid = os.fork()



Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:57:32. Total running time: 29min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64                                                              |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATE

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000000)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:58:32. Total running time: 30min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.55          5            62.8455   0.645782     0.691765 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000001)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:59:02. Total running time: 31min 3s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.5875        7            87.7236   0.618031     0.683475 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20           246.39     0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000002)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 13:59:32. Total running time: 31min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.6125       10            124.612   0.587159     0.656678 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000003)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 14:00:02. Total running time: 32min 3s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.65         12            149.229   0.576714     0.584979 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000004)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 14:00:32. Total running time: 32min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00007 with mean_accuracy=0.775 and params={'lr': 6.531575756954835e-05, 'batch_size': 64, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.775        15            186.481   0.566425     0.537768 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED  

[36m(train_func pid=26302)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00007_7_batch_size=64,lr=0.0001_2024-05-06_13-57-20/checkpoint_000005)


Trial status: 7 TERMINATED | 1 RUNNING
Current time: 2024-05-06 14:01:32. Total running time: 33min 33s
Logical resource usage: 4.0/8 CPUs, 1.0/1 GPUs (1.0/1.0 accelerator_type:T4)
Current best trial: 74691_00007 with mean_accuracy=0.8125 and params={'lr': 6.531575756954835e-05, 'batch_size': 64, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00007   RUNNING      6.53158e-05             64   0.8125       19            236.043   0.519113     0.569293 |
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED 

You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).



Trial train_func_74691_00007 completed after 20 iterations at 2024-05-06 14:01:33. Total running time: 33min 34s
+-------------------------------------------------+
| Trial train_func_74691_00007 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        12.3231 |
| time_total_s                            248.366 |
| training_iteration                           20 |
| loss                                    0.50425 |
| mean_accuracy                             0.675 |
| val_loss                                0.67223 |
+-------------------------------------------------+


2024-05-06 14:01:33,466	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/ray_results/tune_analyzing_results' in 0.2265s.



Trial status: 8 TERMINATED
Current time: 2024-05-06 14:01:33. Total running time: 33min 34s
Logical resource usage: 0/8 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
Current best trial: 74691_00000 with mean_accuracy=0.7625 and params={'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}
+-------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status                lr     batch_size      acc     iter     total time (s)       loss     val_loss |
+-------------------------------------------------------------------------------------------------------------------------------+
| train_func_74691_00000   TERMINATED   1.00821e-05             32   0.7625       20            246.39    0.540257     0.641729 |
| train_func_74691_00001   TERMINATED   7.43104e-05             32   0.7          20            245.199   0.49289      0.588283 |
| train_func_74691_00002   TERMINATED   5.87148e-05 

In [47]:
num_results = len(result_grid)
print("Number of results:", num_results)

Number of results: 8


In [48]:
# Iterate over results
for i, result in enumerate(result_grid):
    if result.error:
        print(f"Trial #{i} had an error:", result.error)
        continue

    print(
        f"Trial #{i} finished successfully with a mean accuracy metric of:",
        result.metrics["mean_accuracy"]
    )

Trial #0 finished successfully with a mean accuracy metric of: 0.7625
Trial #1 finished successfully with a mean accuracy metric of: 0.7
Trial #2 finished successfully with a mean accuracy metric of: 0.7
Trial #3 finished successfully with a mean accuracy metric of: 0.7375
Trial #4 finished successfully with a mean accuracy metric of: 0.65
Trial #5 finished successfully with a mean accuracy metric of: 0.7125
Trial #6 finished successfully with a mean accuracy metric of: 0.675
Trial #7 finished successfully with a mean accuracy metric of: 0.675


In [49]:
results_df = result_grid.get_dataframe()
results_df[["training_iteration", "mean_accuracy"]]

Unnamed: 0,training_iteration,mean_accuracy
0,20,0.7625
1,20,0.7
2,20,0.7
3,20,0.7375
4,20,0.65
5,20,0.7125
6,20,0.675
7,20,0.675


In [51]:
# Get the result with the maximum test set `mean_accuracy`
best_result: Result = result_grid.get_best_result()

In [52]:
best_result.config

{'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20}

In [53]:
best_result.metrics

{'loss': 0.540256667137146,
 'val_loss': 0.6417292952537537,
 'mean_accuracy': 0.7625,
 'timestamp': 1715002329,
 'checkpoint_dir_name': None,
 'done': True,
 'training_iteration': 20,
 'trial_id': '74691_00000',
 'date': '2024-05-06_13-32-09',
 'time_this_iter_s': 12.243691682815552,
 'time_total_s': 246.38994336128235,
 'pid': 15174,
 'hostname': '58b00f5f90d3',
 'node_ip': '172.28.0.12',
 'config': {'lr': 1.0082107151605334e-05, 'batch_size': 32, 'epochs': 20},
 'time_since_restore': 246.38994336128235,
 'iterations_since_restore': 20,
 'experiment_tag': '0_batch_size=32,lr=0.0000'}

In [54]:
result_df = best_result.metrics_dataframe
result_df[["training_iteration", "mean_accuracy", "time_total_s"]]

Unnamed: 0,training_iteration,mean_accuracy,time_total_s
0,1,0.5125,13.162681
1,2,0.625,25.451342
2,3,0.5125,37.772498
3,4,0.5875,49.995648
4,5,0.6625,62.165181
5,6,0.575,74.27685
6,7,0.6625,86.365983
7,8,0.65,98.484185
8,9,0.6,110.755054
9,10,0.65,123.024614


In [57]:
from ray.tune.examples.mnist_pytorch import get_data_loaders

model = vit_model()

with best_result.checkpoint.as_directory() as checkpoint_dir:
    checkpoint_dict = torch.load(os.path.join(checkpoint_dir, "checkpoint.pt"))
    model.load_state_dict(checkpoint_dict["model_state"])

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/ray_results/tune_analyzing_results/train_func_74691_00000_0_batch_size=32,lr=0.0000_2024-05-06_13-27-58/checkpoint_000005/checkpoint.pt'