In [1]:
import os 
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [2]:
import datetime
import math
import random
import subprocess
import time
from collections import OrderedDict
from pathlib import Path

import optuna
optuna.logging.set_verbosity(optuna.logging.INFO)

import mlflow
import numpy as np
import matplotlib.pyplot as plt
import psutil

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data as data
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler, SequentialSampler

import torchvision
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms

from thop import profile
from tqdm.auto import tqdm
from torchsummary import summary

import warnings
warnings.filterwarnings("ignore")

In [3]:
# torch.cuda.memory._record_memory_history()

In [4]:
def increment_path(path, exist_ok=False, sep='', mkdir=True):
    path = Path(path)
    if path.exists() and not exist_ok:
        path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
        for n in range(2, 9999):
            p = f'{path}{sep}{n}{suffix}'
            if not os.path.exists(p):
                break
        path = Path(p)
    if mkdir:
        path.mkdir(parents=True, exist_ok=True)
    return path

In [5]:
EXPERIMENT_NAME = "exp"
FILE = Path(os.getcwd()).resolve()
SAVE_DIR = increment_path(FILE / "artifacts" / f"{EXPERIMENT_NAME}", exist_ok=False)
(SAVE_DIR / 'checkpoints').mkdir(parents=True, exist_ok=True)

In [6]:
local_tracking_uri = f"sqlite:///mlflow/mlflow.db"
mlflow.set_tracking_uri(local_tracking_uri)
mlflow.set_experiment(SAVE_DIR.stem)

2024/03/10 22:30:04 INFO mlflow.tracking.fluent: Experiment with name 'exp11' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/Hp/Desktop/mldev/2024/Git/mldevworld/Deep-Learning-Essentials/mlruns/11', creation_time=1710090004074, experiment_id='11', last_update_time=1710090004074, lifecycle_stage='active', name='exp11', tags={}>

In [7]:
def set_experiment_seed(seed=42):
    """
    Set random seeds and CUDA-related flags for experiment reproducibility.
    """
    # Set random seeds for reproducibility
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # If using GPU, set random seed for CUDA operations
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False  # Set to False for reproducibility

    # Optionally log seed information
    print(f"Random seed set to {seed}")

set_experiment_seed()

Random seed set to 42


In [8]:
# Download MNIST dataset
data_train = MNIST(
    "./data/mnist", download=True, 
    transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.GaussianBlur(kernel_size=1),
        transforms.ToTensor()
    ])
)
        
data_test = MNIST(
    "./data/mnist", download=True, train=False,
    transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor()
    ])
)

In [9]:
# Define hyperparameters
N_CLASSES = 10
# EPOCHS = 5
# BATCH_SIZE = 512
# DROPOUT_PROB = 0.2
# USE_BATCHNORM = True
# LABEL_SMOOTHING = 0.1
# LR = 1.34E-03
# WARMUP_PROPORTION = 0.1
# WEIGHT_DECAY = 1e-4
# ACCUMULATION_STEPS = 2

In [10]:
# plt.figure(figsize=(10,10))
# for i in range(25):
#     plt.subplot(5,5,i+1)
#     plt.xticks([])
#     plt.yticks([])
#     plt.grid(False)
#     plt.imshow(data_train[i][0].permute(1,2,0).numpy(), cmap=plt.cm.binary)
#     plt.xlabel(data_train[i][1])
# plt.show()

`torch.utils.data.DataLoader` supports asynchronous data loading and data augmentation in separate worker subprocesses. The default setting for DataLoader is `num_workers=0`, which means that the data loading is synchronous and done in the main process. As a result the main training process has to wait for the data to be available to continue the execution.

Settin`g num_workers >` 0 enables asynchronous data loading and overlap between the training and data loading. num_workers should be tuned depending on the workload, CPU, GPU, and location of training data.`

DataLo`ader acce`pts pin_me`mory argument, which defaults` to F`alse. When using a GPU it’s better to `set pin_memory=`True, this instru`cts DataLo`ader to use pinned memory and enables faster and asynchronous memory copy from the host to the GPU.

In [11]:
def calculate_num_workers():
    # Calculate the number of CPU cores
    num_cpu_cores = os.cpu_count()

    # Set a safe maximum multiplier value (e.g., 0.5) to avoid using all available resources
    max_multiplier = 0.5

    # Calculate the number of workers based on the available resources
    multiplier = min(sum(psutil.cpu_percent(interval=1, percpu=True)) / 100.0, max_multiplier)
    
    # Ensure that num_workers is at least 1
    num_workers = max(1, int(num_cpu_cores * multiplier))

    return num_workers

In [12]:
def get_dataloaders(batch_size):
    # Define train dataloader
    train_dataloader = DataLoader(
        data_train, batch_size=batch_size, 
        sampler=RandomSampler(data_train), 
        pin_memory=True, num_workers=calculate_num_workers()
    )
    
    # Define test dataloader
    test_dataloader = DataLoader(
        data_test, batch_size=batch_size, 
        sampler=SequentialSampler(data_test),
        pin_memory=True, num_workers=calculate_num_workers()
    )

    return train_dataloader, test_dataloader

# train_dataloader, test_dataloader = get_dataloaders(BATCH_SIZE)

`torch.nn.Conv2d()` has `bias` parameter which defaults to `True` (the same is true for `Conv1d` and `Conv3d` ).

If a `nn.Conv2d` layer is directly followed by a `nn.BatchNorm2d` layer, then the bias in the convolution is not needed, instead use `nn.Conv2d(..., bias=False, ....)`. Bias is not needed because in the first step `BatchNorm` subtracts the mean, which effectively cancels out the effect of bias.

This is also applicable to 1d and 3d convolutions as long as `BatchNorm`(or other normalization layer) normalizes on the same dimension as convolution’s bias.

In [13]:
class LeNet(nn.Module):
    def __init__(self, use_batchnorm, dropout_prob):
        super(LeNet, self).__init__()

        self.use_batchnorm = use_batchnorm
                
        self.features1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=(5, 5), bias=not use_batchnorm),
            self._get_norm_layer(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Dropout2d(p=dropout_prob)
        )
        
        self.features2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=(5, 5), bias=not use_batchnorm),
            self._get_norm_layer(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Dropout2d(p=dropout_prob)
        )

        self.features3 = nn.Sequential(
            nn.Conv2d(16, 120, kernel_size=(5, 5), bias=not use_batchnorm),
            self._get_norm_layer(120),
            nn.ReLU(),
            nn.Dropout2d(p=dropout_prob)
        )

        self.classifier = nn.Sequential(
            nn.Linear(120, 84, bias=True),
            nn.ReLU(),
            nn.Dropout(p=dropout_prob),
            nn.Linear(84, 10, bias=True)
        )

        # Initialize layers
        self.apply(self._initialize_weights)

    def _get_norm_layer(self, channels):
        if self.use_batchnorm:
            return nn.BatchNorm2d(channels)
        else:
            return nn.Identity()

    def _initialize_weights(self, m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)
        
    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = self.features3(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_model(use_batchnorm, dropout_prob):
    # Instantiate the model
    model = LeNet(use_batchnorm, dropout_prob)
    model = model.to(device)
    
    # Test with a random input
    # summary(model, input_size=(1, 32, 32), batch_size=BATCH_SIZE)
    return model

# model = get_model(USE_BATCHNORM, DROPOUT_PROB)

#### 1. Cross Entropy Loss
The standard cross-entropy loss for classification tasks is given by:

$$ \text{Traditional Cross Entropy Loss: } H(y, \hat{y}) = - \sum_i y_i \log(\hat{y}_i) $$

 - $y_i$ is a binary indicator of whether class $i$ is the correct classification.  
 - $p_i$ is the predicted probability of class $i$.

#### 2. Label Smoothed Cross Entropy
Label Smoothing Cross Entropy Loss introduces a modification to the target distribution:

 $$ \text{Label Smoothed Cross Entropy Loss} = - \sum_i \left( (1 - \text{smoothing}) \cdot 1_{\{y_i\}} + \frac{\text{smoothing}}{C-1} \cdot 1_{\{1 - y_i\}} \right) \cdot \log(p_i) $$

Where:
- $1_{\{y_i\}}$ is a binary indicator of whether class $i$ is the correct classification.
- $p_i$ is the predicted probability of class $i$.
- $C$ is the number of classes.
- $\text{smoothing}$  is the smoothing factor.



In [16]:
class LabelSmoothedCrossEntropy(nn.Module):
    def __init__(self, num_classes, smoothing):
        super(LabelSmoothedCrossEntropy, self).__init__()
        self.num_classes = num_classes
        self.smoothing = smoothing
        self.confidence = 1.0 - smoothing

    def forward(self, input_logits, target):
        log_probs = F.log_softmax(input_logits, dim=-1)
        true_dist = torch.zeros_like(log_probs)
        true_dist.fill_(self.smoothing / (self.num_classes - 1))
        true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        loss = -torch.sum(true_dist * log_probs) / input_logits.size(0)
        return loss

In [17]:
def get_loss(label_smoothing):
    # Initialize loss
    # criterion = nn.CrossEntropyLoss()
    return LabelSmoothedCrossEntropy(num_classes=N_CLASSES, smoothing=label_smoothing)

# criterion = get_loss(label_smoothing)

#### Adam w/ Weight Decay

In [18]:
def get_optimizer(model, lr, weight_decay):
    # Initialize optimizer
    # optimizer = optim.Adam(model.parameters(), lr=LR)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    return optimizer

# optimizer =  get_optimizer(LR, WEIGHT_DECAY)

#### Cosine Annealing With Warmup

In [19]:
def get_lr_lambda(initial_lr, warmup_steps, total_steps):
    def lr_lambda(current_step):
        if current_step < warmup_steps:
            return initial_lr + (1.0 - initial_lr) * float(current_step) / float(max(1, warmup_steps))
        else:
            return max(0.0, 0.5 * (1.0 + math.cos(math.pi * (current_step - warmup_steps) / float(total_steps - warmup_steps))))
    return lr_lambda

In [20]:
def get_cosine_with_warmup_scheduler(**kwargs):
    train_dataloader = kwargs['train_dataloader']
    optimizer = kwargs['optimizer']
    
    len_dataloader = len(train_dataloader)
    num_update_steps_per_epoch = max(len_dataloader // kwargs['accumulation_steps'], 1) 
    num_examples = len(train_dataloader.dataset)
    max_steps = math.ceil(kwargs['epochs'] * num_update_steps_per_epoch)
    num_warmup_steps = math.ceil(max_steps * kwargs['warmup_proportion'])
    lr_lambda = get_lr_lambda(kwargs['lr'], num_warmup_steps, max_steps)
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
    return scheduler

# scheduler = get_cosine_with_warmup_scheduler()

#### Adaptive Gradient Clipping

In [21]:
def unitwise_norm(x, norm_type=2.0):
    if x.ndim <= 1:
        return x.norm(norm_type)
    else:
        return x.norm(norm_type, dim=tuple(range(1, x.ndim)), keepdim=True)


def adaptive_clip_grad(parameters, clip_factor=0.01, eps=1e-3, norm_type=2.0):
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    for p in parameters:
        if p.grad is None:
            continue
        p_data = p.detach()
        g_data = p.grad.detach()
        max_norm = unitwise_norm(p_data, norm_type=norm_type).clamp_(min=eps).mul_(clip_factor)
        grad_norm = unitwise_norm(g_data, norm_type=norm_type)
        clipped_grad = g_data * (max_norm / grad_norm.clamp(min=1e-6))
        new_grads = torch.where(grad_norm < max_norm, g_data, clipped_grad)
        p.grad.detach().copy_(new_grads)

#### Logging Model Parameters

In [22]:
def calculate_flops(model, input_size):
    input_tensor = torch.randn(*input_size).to(device)
    flops, params = profile(model, inputs=(input_tensor,))
    return flops

def calculate_disk_size(model):
    # Calculate model disk size in bytes
    disk_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
    # Convert bytes to megabytes
    disk_size_mb = disk_size_bytes / (1024 * 1024)
    return disk_size_mb

def get_model_metrics(model, train_dataloader):
    num_params = sum(p.numel() for p in model.parameters())
    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    # Set an example input size (adjust based on your actual input size)
    input_size = tuple(next(iter(train_dataloader))[0][0].unsqueeze(0).shape)

    flops = calculate_flops(model, input_size)
    disk_size = calculate_disk_size(model)

    return {
        "num_params": num_params,
        "num_trainable_params": num_trainable_params,
        "flops": flops,
        "disk_size": disk_size
    }

#### Logging GPU Usage

In [23]:
def get_gpu_usage():
    try:
        # Run nvidia-smi command to get GPU utilization information
        result = subprocess.run(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv,nounits,noheader"], stdout=subprocess.PIPE, text=True)
        gpu_usage = int(result.stdout.strip())
        return gpu_usage
    except Exception as e:
        print(f"Error getting GPU usage: {e}")
        return None

#### Train

##### Gradient Penalty
Its primary purpose is to encourage smoothness in the learned representations of the model by penalizing sharp changes or oscillations in the model's gradients.

##### Gradient Accumulation
Gradient accumulation adds gradients over an effective batch of size `batch_per_iter * iters_to_accumulate` (`* num_procs` if distributed). The scale should be calibrated for the effective batch, which means inf/NaN checking, step skipping if inf/NaN grads are found, and scale updates should occur at effective-batch granularity. Also, grads should remain scaled, and the scale factor should remain constant, while grads for a given effective batch are accumulated. If grads are unscaled (or the scale factor changes) before accumulation is complete, the next backward pass will add scaled grads to unscaled grads (or grads scaled by a different factor) after which it’s impossible to recover the accumulated unscaled grads `step` must apply.

##### Automatic Mixed Precision Training
Automatic Mixed Precision (AMP) training is a technique used in deep learning to accelerate training by using a combination of lower-precision and higher-precision numerical representations. The primary idea behind AMP is to use lower-precision data types (such as float16) for some parts of the model computation, while maintaining higher precision (such as float32) for critical numerical stability aspects.

In [24]:
def train(epoch, train_dataloader, model, criterion, optimizer, scheduler, accumulation_steps):
    model.train()
    total_loss = 0.0
    
    # Create a GradScaler once at the beginning of training.
    scaler = torch.cuda.amp.GradScaler()
    
    optimizer.zero_grad()
    with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch}/Training", unit="batch") as pbar:
        for i, (images, labels) in enumerate(train_dataloader):
            images, labels = images.to(device), labels.to(device)

            # Runs the forward pass with autocasting.
            with torch.autocast(device_type=device.type, dtype=torch.float16, enabled=True):
                output = model(images)
                loss = criterion(output, labels)

            # # Scales the loss for autograd.grad's backward pass, producing scaled_grad_params
            # scaled_grad_params = torch.autograd.grad(outputs=scaler.scale(loss), inputs=model.parameters(), create_graph=True)
    
            # # Creates unscaled grad_params before computing the penalty. scaled_grad_params are
            # # not owned by any optimizer, so ordinary division is used instead of scaler.unscale_:
            # inv_scale = 1. / scaler.get_scale()
            # grad_params = [p * inv_scale for p in scaled_grad_params]
    
            # # Computes the penalty term and adds it to the loss
            # with torch.autocast(device_type=device.type, dtype=torch.float16, enabled=True):
            #     grad_norm = 0
            #     for grad in grad_params:
            #         grad_norm += grad.pow(2).sum()
            #     grad_norm = grad_norm.sqrt()
            #     loss = loss + grad_norm

            # Scale the loss by accumulation steps
            if accumulation_steps > 1:
                with torch.autocast(device_type=device.type, dtype=torch.float16, enabled=True):
                    loss = loss / accumulation_steps
                
            # Applies scaling to the backward call as usual.
            # Accumulates leaf gradients that are correctly scaled.
            scaler.scale(loss).backward()

            if (i + 1) % accumulation_steps == 0:
                # Unscales the gradients of optimizer's assigned params in-place
                scaler.unscale_(optimizer)
                # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
                adaptive_clip_grad(model.parameters())

                # optimizer's gradients are already unscaled, so scaler.step does not unscale them,
                # although it still skips optimizer.step() if the gradients contain infs or NaNs.
                scaler.step(optimizer)

                mlflow.log_metric("lr", optimizer.param_groups[0]["lr"])
                scheduler.step()

                # Updates the scale for next iteration.
                scaler.update()
                
                for param in model.parameters():
                    param.grad = None
            
            total_loss += loss.item()
            pbar.update(1)
            pbar.set_postfix(loss=total_loss / (i+1), lr=f"{optimizer.param_groups[0]['lr']:.5f}")

    avg_loss = total_loss / len(train_dataloader.dataset)
    mlflow.log_metric("train_loss", avg_loss)
        
    print(f"[Train][Epoch {epoch}] Average Loss: {avg_loss:.5f}, Updated Learning Rate: {optimizer.param_groups[0]['lr']}")

#### Eval

In [25]:
class BestLossTracker:
    def __init__(self):
        self.best_loss = float('inf')
    
    def update_best_loss(self, avg_loss):
        if avg_loss < self.best_loss:
            self.best_loss = avg_loss
            return True
        return False

In [26]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0.0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def update_best_loss(self, val_loss):
        if val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

        return not self.early_stop

In [27]:
def test(epoch, best_loss_tracker, test_dataloader, model, criterion):
    model.eval()
    total_correct = 0
    total_loss = 0.0

    with tqdm(total=len(test_dataloader), desc=f"Epoch {epoch}/Testing", unit="batch") as pbar: 
        with torch.no_grad():
            for i, (images, labels) in enumerate(test_dataloader):
                images, labels = images.to(device), labels.to(device)
                
                output = model(images)                
                loss = criterion(output, labels)

                total_loss += loss.item()
                pbar.update(1)
                pbar.set_postfix(loss=total_loss / (i+1))
                
                pred = output.detach().max(1)[1]
                total_correct += pred.eq(labels.view_as(pred)).sum()
            
    avg_loss = total_loss / len(test_dataloader.dataset)
    accuracy = total_correct / len(test_dataloader.dataset)

    mlflow.log_metric("val_loss", avg_loss)
    mlflow.log_metric("val_accuracy", accuracy)
    
    if best_loss_tracker.update_best_loss(avg_loss):
        checkpoint_name = f"checkpoint_best_epoch_{epoch}.pth"
        checkpoint_path = str(SAVE_DIR / "checkpoints" / checkpoint_name)
        torch.save(model.state_dict(), checkpoint_path)
        print(f"[Test][Epoch {epoch}] New best model found! Saving checkpoint. Loss: {avg_loss:.5f}, Accuracy: {accuracy:.3f}")
    else:
        print(f"[Test][Epoch {epoch}] Loss: {avg_loss:.5f}, Accuracy: {accuracy:.3f}")
    
    return avg_loss

#### Run

In [28]:
def run(run_name, **kwargs):
    print(kwargs)
    with mlflow.start_run(run_name=run_name):
        mlflow.log_param("torch_version", torch.__version__)
        mlflow.log_param("mlflow_version", mlflow.__version__)

        hyper_params = {
            "accumulation_steps": kwargs['accumulation_steps'], 
            "batch_size": kwargs['batch_size'], 
            "batchnorm": kwargs['batchnorm'],
            "dropout":kwargs['dropout_prob'],
            "epochs": kwargs['epochs'], 
            "label_smoothing": kwargs['label_smoothing'],
            "start_lr": kwargs['lr'],
            "warmup_proportion": kwargs['warmup_proportion'],
            "weight_decay": kwargs['weight_decay']
        }

        mlflow.log_params(hyper_params)
        mlflow.set_tags({"model": "lenet", "dataset": "mnist"})

        train_dataloader, test_dataloader = get_dataloaders(kwargs['batch_size'])
        model = get_model(kwargs['dropout_prob'], kwargs['batchnorm'])
        criterion = get_loss(kwargs['label_smoothing'])
        optimizer = get_optimizer(model, kwargs['lr'], kwargs['weight_decay'])
        
        scheduler_kwargs = {
            'epochs': kwargs['epochs'],
            'train_dataloader':train_dataloader,
            'optimizer': optimizer,
            'lr':kwargs['lr'],
            'accumulation_steps': kwargs['accumulation_steps'],
            'warmup_proportion': kwargs['warmup_proportion']
        }
        scheduler = get_cosine_with_warmup_scheduler(**scheduler_kwargs)
        
        mlflow.pytorch.log_model(model, "model")
        mlflow.log_param("start_time", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        model_metrics = get_model_metrics(model, train_dataloader)
        mlflow.log_param("num_params", model_metrics['num_params'])
        mlflow.log_param("num_trainable_params", model_metrics['num_trainable_params'])
        mlflow.log_param("flops", model_metrics['flops'])
        mlflow.log_param("disk_size", model_metrics['disk_size'])

        best_loss_tracker = BestLossTracker()
        early_stopping = EarlyStopping(patience=1, delta=1e-6)

        start_time = time.time()
        for epoch in range(kwargs['epochs']):
            train(epoch, train_dataloader, model, criterion, optimizer, scheduler, kwargs['accumulation_steps'])
            val_loss = test(epoch, best_loss_tracker, test_dataloader, model, criterion)
            
            if get_gpu_usage() is not None:
                mlflow.log_metric("gpu", get_gpu_usage(), step=epoch)

            if not early_stopping.update_best_loss(val_loss):
                print("Early stopping triggered.")
                break


        total_time = time.time() - start_time
        avg_epoch_time = total_time / kwargs['epochs']
        avg_batch_time = avg_epoch_time / len(train_dataloader)

        mlflow.log_metric("total_training_time", total_time)
        mlflow.log_metric("average_epoch_time", avg_epoch_time)
        mlflow.log_metric("average_batch_time", avg_batch_time)
        mlflow.log_param("end_time", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

    # torch.cuda.memory._dump_snapshot("snapshot.pickle")
    return val_loss

In [29]:
def objective(trial):
    EPOCHS = 5
    accumulation_steps = trial.suggest_int('accumulation_steps', 1, 3)  # Accumulation steps
    batchnorm = trial.suggest_categorical('batchnorm', [True, False])  # Use batch normalization
    batch_size = trial.suggest_int('batch_size', 256, 2048, log=True)  # Batch size
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.4)  # Dropout probability
    label_smoothing = trial.suggest_float('label_smoothing', 0.0, 0.25)  # Label smoothing
    lr = trial.suggest_float('lr', 1e-3, 1e-1, log=True)  # Learning rate
    warmup_proportion = trial.suggest_float('warmup_proportion', 0.0, 0.1)  # Warmup proportion
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)  # Weight decay

    run_name = f"run_{trial.number}"  # Create a unique run_name based on the trial number
    
    # Update the model and optimizer with the sampled hyperparameters
    kwargs = {
        'epochs': EPOCHS,
        'accumulation_steps': accumulation_steps,
        'batchnorm': batchnorm,
        'batch_size': batch_size,
        'dropout_prob': dropout_prob,
        'label_smoothing': label_smoothing,
        'lr': lr,
        'warmup_proportion': warmup_proportion,
        'weight_decay': weight_decay
    }
    
    # Train and evaluate the model
    val_loss = run(run_name=run_name, **kwargs)

    # Return the validation loss for optimization
    return val_loss

In [30]:
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler())  # Change 'minimize' to 'maximize' if you're maximizing a metric
study.optimize(objective, n_trials=100)  # Adjust the number of trials as needed

[I 2024-03-10 22:30:04,648] A new study created in memory with name: no-name-105c6438-58f0-4288-beb6-8d0e3d0c0428


{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1221, 'dropout_prob': 0.1451223008122237, 'label_smoothing': 0.2069263707464151, 'lr': 0.0014513999543911257, 'warmup_proportion': 0.04213229508497701, 'weight_decay': 0.0006887755240006057}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00192, Updated Learning Rate: 0.001358111449214277


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.51094, Accuracy: 0.104


Epoch 1/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00192, Updated Learning Rate: 0.0010089120347935119


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-10 22:31:02,752] Trial 0 finished with value: 74.44430390625 and parameters: {'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1221, 'dropout_prob': 0.1451223008122237, 'label_smoothing': 0.2069263707464151, 'lr': 0.0014513999543911257, 'warmup_proportion': 0.04213229508497701, 'weight_decay': 0.0006887755240006057}. Best is trial 0 with value: 74.44430390625.


[Test][Epoch 1] Loss: 74.44430, Accuracy: 0.103
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 306, 'dropout_prob': 0.3243738591345986, 'label_smoothing': 0.09768230864805996, 'lr': 0.009730465780858325, 'warmup_proportion': 0.039214878621064514, 'weight_decay': 6.365078881414862e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/197 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00378, Updated Learning Rate: 0.009084057769771836


Epoch 0/Testing:   0%|          | 0/33 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 609.40280, Accuracy: 0.155


Epoch 1/Training:   0%|          | 0/197 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00378, Updated Learning Rate: 0.00673458553816387


Epoch 1/Testing:   0%|          | 0/33 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 608.46390, Accuracy: 0.155


Epoch 2/Training:   0%|          | 0/197 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00378, Updated Learning Rate: 0.0036112541194416154


Epoch 2/Testing:   0%|          | 0/33 [00:00<?, ?batch/s]

[I 2024-03-10 22:34:23,417] Trial 1 finished with value: 609.92189375 and parameters: {'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 306, 'dropout_prob': 0.3243738591345986, 'label_smoothing': 0.09768230864805996, 'lr': 0.009730465780858325, 'warmup_proportion': 0.039214878621064514, 'weight_decay': 6.365078881414862e-05}. Best is trial 0 with value: 74.44430390625.


[Test][Epoch 2] Loss: 609.92189, Accuracy: 0.156
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1455, 'dropout_prob': 0.3003211659471703, 'label_smoothing': 0.16495847481123055, 'lr': 0.0021027838170086114, 'warmup_proportion': 0.07511850493740019, 'weight_decay': 2.9647313463561168e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00048, Updated Learning Rate: 0.002022751373419413


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00095, Accuracy: 0.746


Epoch 1/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00034, Updated Learning Rate: 0.0015470146232984935


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00080, Accuracy: 0.887


Epoch 2/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00029, Updated Learning Rate: 0.0008462755225090491


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00075, Accuracy: 0.921


Epoch 3/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00027, Updated Learning Rate: 0.00023865497264947906


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00073, Accuracy: 0.932


Epoch 4/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00027, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 22:37:44,600] Trial 2 finished with value: 0.0007258852958679199 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1455, 'dropout_prob': 0.3003211659471703, 'label_smoothing': 0.16495847481123055, 'lr': 0.0021027838170086114, 'warmup_proportion': 0.07511850493740019, 'weight_decay': 2.9647313463561168e-05}. Best is trial 2 with value: 0.0007258852958679199.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00073, Accuracy: 0.934
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 439, 'dropout_prob': 0.030600731469720577, 'label_smoothing': 0.055868582104746434, 'lr': 0.026147846541169317, 'warmup_proportion': 0.07288919886552776, 'weight_decay': 8.069520934441288e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/137 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00103, Updated Learning Rate: 0.024963918865667283


Epoch 0/Testing:   0%|          | 0/23 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00125, Accuracy: 0.940


Epoch 1/Training:   0%|          | 0/137 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00059, Updated Learning Rate: 0.018922061049586894


Epoch 1/Testing:   0%|          | 0/23 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00100, Accuracy: 0.977


Epoch 2/Training:   0%|          | 0/137 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00054, Updated Learning Rate: 0.01029196536405235


Epoch 2/Testing:   0%|          | 0/23 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00094, Accuracy: 0.984


Epoch 3/Training:   0%|          | 0/137 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00051, Updated Learning Rate: 0.0028930938864643566


Epoch 3/Testing:   0%|          | 0/23 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00094, Accuracy: 0.981


Epoch 4/Training:   0%|          | 0/137 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00050, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/23 [00:00<?, ?batch/s]

[I 2024-03-10 22:41:08,743] Trial 3 finished with value: 0.0009276029706001282 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 439, 'dropout_prob': 0.030600731469720577, 'label_smoothing': 0.055868582104746434, 'lr': 0.026147846541169317, 'warmup_proportion': 0.07288919886552776, 'weight_decay': 8.069520934441288e-06}. Best is trial 2 with value: 0.0007258852958679199.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00093, Accuracy: 0.983
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 545, 'dropout_prob': 0.19918007956039283, 'label_smoothing': 0.11565733474080886, 'lr': 0.05043128858476219, 'warmup_proportion': 0.07199384103602492, 'weight_decay': 3.1704728283556938e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/111 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00100, Updated Learning Rate: 0.048123181438563506


Epoch 0/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00150, Accuracy: 0.956


Epoch 1/Training:   0%|          | 0/111 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00072, Updated Learning Rate: 0.036455224119113096


Epoch 1/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00135, Accuracy: 0.971


Epoch 2/Training:   0%|          | 0/111 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00068, Updated Learning Rate: 0.01982118376387369


Epoch 2/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00131, Accuracy: 0.976


Epoch 3/Training:   0%|          | 0/111 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00066, Updated Learning Rate: 0.005570625650037887


Epoch 3/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00128, Accuracy: 0.983


Epoch 4/Training:   0%|          | 0/111 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00065, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[I 2024-03-10 22:43:16,743] Trial 4 finished with value: 0.0012740315675735473 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 545, 'dropout_prob': 0.19918007956039283, 'label_smoothing': 0.11565733474080886, 'lr': 0.05043128858476219, 'warmup_proportion': 0.07199384103602492, 'weight_decay': 3.1704728283556938e-06}. Best is trial 2 with value: 0.0007258852958679199.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00127, Accuracy: 0.981
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 529, 'dropout_prob': 0.07592134919382998, 'label_smoothing': 0.08181435903512166, 'lr': 0.03126306236108547, 'warmup_proportion': 0.08372642835652444, 'weight_decay': 4.000280229405524e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/114 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00437, Updated Learning Rate: 0.030046034750972717


Epoch 0/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 721.78963, Accuracy: 0.094


Epoch 1/Training:   0%|          | 0/114 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00437, Updated Learning Rate: 0.022953472326570905


Epoch 1/Testing:   0%|          | 0/19 [00:00<?, ?batch/s]

[I 2024-03-10 22:44:28,254] Trial 5 finished with value: 1165.336025 and parameters: {'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 529, 'dropout_prob': 0.07592134919382998, 'label_smoothing': 0.08181435903512166, 'lr': 0.03126306236108547, 'warmup_proportion': 0.08372642835652444, 'weight_decay': 4.000280229405524e-05}. Best is trial 2 with value: 0.0007258852958679199.


[Test][Epoch 1] Loss: 1165.33603, Accuracy: 0.094
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1030, 'dropout_prob': 0.3754555768400405, 'label_smoothing': 0.13575912327829695, 'lr': 0.01610731483882752, 'warmup_proportion': 0.06889986899514466, 'weight_decay': 0.0006701132602394994}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00121, Updated Learning Rate: 0.01535491641644112


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00088, Accuracy: 0.946


Epoch 1/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00085, Updated Learning Rate: 0.011619149342767022


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00079, Accuracy: 0.975


Epoch 2/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00081, Updated Learning Rate: 0.006313031219334113


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.983


Epoch 3/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00079, Updated Learning Rate: 0.0017735377971463725


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.980


Epoch 4/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00078, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-10 22:47:12,219] Trial 6 finished with value: 0.0007622253298759461 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1030, 'dropout_prob': 0.3754555768400405, 'label_smoothing': 0.13575912327829695, 'lr': 0.01610731483882752, 'warmup_proportion': 0.06889986899514466, 'weight_decay': 0.0006701132602394994}. Best is trial 2 with value: 0.0007258852958679199.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00076, Accuracy: 0.980
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1933, 'dropout_prob': 0.01661403758630624, 'label_smoothing': 0.04202568800819467, 'lr': 0.007319596288072455, 'warmup_proportion': 0.07692142464279984, 'weight_decay': 2.683958287613536e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.007016606362974937


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00047, Accuracy: 0.850


Epoch 1/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.005343543315260655


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00035, Accuracy: 0.906


Epoch 2/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.002915190205073188


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.940


Epoch 3/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.0008208514020082457


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.949


Epoch 4/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 22:49:58,341] Trial 7 finished with value: 0.00027190839052200315 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1933, 'dropout_prob': 0.01661403758630624, 'label_smoothing': 0.04202568800819467, 'lr': 0.007319596288072455, 'warmup_proportion': 0.07692142464279984, 'weight_decay': 2.683958287613536e-06}. Best is trial 7 with value: 0.00027190839052200315.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.950
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 383, 'dropout_prob': 0.24245313725793186, 'label_smoothing': 0.15437927876340252, 'lr': 0.012435366248203749, 'warmup_proportion': 0.06288194564868863, 'weight_decay': 0.0001984218589233328}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/157 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00301, Updated Learning Rate: 0.011799567071853628


Epoch 0/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 1198.77133, Accuracy: 0.089


Epoch 1/Training:   0%|          | 0/157 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00301, Updated Learning Rate: 0.008884397288523672


Epoch 1/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[I 2024-03-10 22:51:10,388] Trial 8 finished with value: 1205.289178125 and parameters: {'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 383, 'dropout_prob': 0.24245313725793186, 'label_smoothing': 0.15437927876340252, 'lr': 0.012435366248203749, 'warmup_proportion': 0.06288194564868863, 'weight_decay': 0.0001984218589233328}. Best is trial 7 with value: 0.00027190839052200315.


[Test][Epoch 1] Loss: 1205.28918, Accuracy: 0.088
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 514, 'dropout_prob': 0.09840609444828657, 'label_smoothing': 0.15437088084394157, 'lr': 0.001582783755459807, 'warmup_proportion': 0.0026520366753583604, 'weight_decay': 1.4807182991689228e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/117 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00147, Updated Learning Rate: 0.0014356624092111024


Epoch 0/Testing:   0%|          | 0/20 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00208, Accuracy: 0.908


Epoch 1/Training:   0%|          | 0/117 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00105, Updated Learning Rate: 0.001040849272872664


Epoch 1/Testing:   0%|          | 0/20 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00190, Accuracy: 0.949


Epoch 2/Training:   0%|          | 0/117 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00099, Updated Learning Rate: 0.0005501133680549705


Epoch 2/Testing:   0%|          | 0/20 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00186, Accuracy: 0.955


Epoch 3/Training:   0%|          | 0/117 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00096, Updated Learning Rate: 0.0001521552404177181


Epoch 3/Testing:   0%|          | 0/20 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00183, Accuracy: 0.960


Epoch 4/Training:   0%|          | 0/117 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00095, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/20 [00:00<?, ?batch/s]

[I 2024-03-10 22:53:18,356] Trial 9 finished with value: 0.0018298095524311067 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 514, 'dropout_prob': 0.09840609444828657, 'label_smoothing': 0.15437088084394157, 'lr': 0.001582783755459807, 'warmup_proportion': 0.0026520366753583604, 'weight_decay': 1.4807182991689228e-06}. Best is trial 7 with value: 0.00027190839052200315.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00183, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1981, 'dropout_prob': 0.010637250040331109, 'label_smoothing': 0.0011370928764577112, 'lr': 0.004429305809869871, 'warmup_proportion': 0.09301320716693588, 'weight_decay': 7.299487103407476e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.004295745897304371


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00039, Accuracy: 0.791


Epoch 1/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.0033219793574024038


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.859


Epoch 2/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.0018300824638282107


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.926


Epoch 3/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0005181303536722262


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.945


Epoch 4/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00006, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 22:56:38,730] Trial 10 finished with value: 0.00010674896910786629 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1981, 'dropout_prob': 0.010637250040331109, 'label_smoothing': 0.0011370928764577112, 'lr': 0.004429305809869871, 'warmup_proportion': 0.09301320716693588, 'weight_decay': 7.299487103407476e-06}. Best is trial 10 with value: 0.00010674896910786629.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.946
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2004, 'dropout_prob': 0.010782012196208663, 'label_smoothing': 0.0024582737850606, 'lr': 0.004736243327261055, 'warmup_proportion': 0.09874362627349086, 'weight_decay': 8.001240986920908e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.004593428116067383


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00046, Accuracy: 0.718


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.0035521824954457912


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.891


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.0019569016522475073


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.930


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0005540352225766926


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.936


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-10 22:58:57,002] Trial 11 finished with value: 0.00010795635730028152 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2004, 'dropout_prob': 0.010782012196208663, 'label_smoothing': 0.0024582737850606, 'lr': 0.004736243327261055, 'warmup_proportion': 0.09874362627349086, 'weight_decay': 8.001240986920908e-06}. Best is trial 10 with value: 0.00010674896910786629.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.939
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2030, 'dropout_prob': 0.098927511782571, 'label_smoothing': 0.0009805456348906938, 'lr': 0.004033572769646186, 'warmup_proportion': 0.0999264569274631, 'weight_decay': 1.0877697810098024e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.003911945668342843


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.741


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.0030251795772346397


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.886


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.0016665751043550866


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.924


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0004718383817713501


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.940


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00006, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-10 23:01:09,441] Trial 12 finished with value: 9.675593301653863e-05 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2030, 'dropout_prob': 0.098927511782571, 'label_smoothing': 0.0009805456348906938, 'lr': 0.004033572769646186, 'warmup_proportion': 0.0999264569274631, 'weight_decay': 1.0877697810098024e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.943
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 868, 'dropout_prob': 0.11599471913971118, 'label_smoothing': 0.010739719762420634, 'lr': 0.004361838616412535, 'warmup_proportion': 0.09800669471991089, 'weight_decay': 1.0436728592947257e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/70 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00059, Updated Learning Rate: 0.004240236115167705


Epoch 0/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00051, Accuracy: 0.888


Epoch 1/Training:   0%|          | 0/70 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00019, Updated Learning Rate: 0.0032905249432587974


Epoch 1/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.943


Epoch 2/Training:   0%|          | 0/70 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0018167728796290904


Epoch 2/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.956


Epoch 3/Training:   0%|          | 0/70 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00012, Updated Learning Rate: 0.0005149986909490671


Epoch 3/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.964


Epoch 4/Training:   0%|          | 0/70 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[I 2024-03-10 23:03:16,832] Trial 13 finished with value: 0.00025169426053762436 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 868, 'dropout_prob': 0.11599471913971118, 'label_smoothing': 0.010739719762420634, 'lr': 0.004361838616412535, 'warmup_proportion': 0.09800669471991089, 'weight_decay': 1.0436728592947257e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] Loss: 0.00025, Accuracy: 0.962
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1445, 'dropout_prob': 0.1745769427699067, 'label_smoothing': 0.03880282952132687, 'lr': 0.0034334122336878415, 'warmup_proportion': 0.019289416776569458, 'weight_decay': 1.4075059784864738e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00043, Updated Learning Rate: 0.0031762790743964675


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00061, Accuracy: 0.784


Epoch 1/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00022, Updated Learning Rate: 0.002336851894846296


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00039, Accuracy: 0.911


Epoch 2/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00016, Updated Learning Rate: 0.001246907187833379


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00033, Accuracy: 0.942


Epoch 3/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00014, Updated Learning Rate: 0.00034674506142510825


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.945


Epoch 4/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00014, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 23:06:38,870] Trial 14 finished with value: 0.0003169318586587906 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1445, 'dropout_prob': 0.1745769427699067, 'label_smoothing': 0.03880282952132687, 'lr': 0.0034334122336878415, 'warmup_proportion': 0.019289416776569458, 'weight_decay': 1.4075059784864738e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.947
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 768, 'dropout_prob': 0.06100809238217579, 'label_smoothing': 0.06658468571810651, 'lr': 0.0027114201825804746, 'warmup_proportion': 0.08855932290236279, 'weight_decay': 0.00010666774678723855}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/79 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00072, Updated Learning Rate: 0.0026183319454460476


Epoch 0/Testing:   0%|          | 0/14 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00106, Accuracy: 0.881


Epoch 1/Training:   0%|          | 0/79 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00036, Updated Learning Rate: 0.0020126205959436786


Epoch 1/Testing:   0%|          | 0/14 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00084, Accuracy: 0.943


Epoch 2/Training:   0%|          | 0/79 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00030, Updated Learning Rate: 0.001104512298243044


Epoch 2/Testing:   0%|          | 0/14 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00078, Accuracy: 0.958


Epoch 3/Training:   0%|          | 0/79 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00028, Updated Learning Rate: 0.00031203839133615234


Epoch 3/Testing:   0%|          | 0/14 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.961


Epoch 4/Training:   0%|          | 0/79 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00028, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/14 [00:00<?, ?batch/s]

[I 2024-03-10 23:12:00,204] Trial 15 finished with value: 0.000761160358786583 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 768, 'dropout_prob': 0.06100809238217579, 'label_smoothing': 0.06658468571810651, 'lr': 0.0027114201825804746, 'warmup_proportion': 0.08855932290236279, 'weight_decay': 0.00010666774678723855}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00076, Accuracy: 0.962
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1580, 'dropout_prob': 0.12167555371301995, 'label_smoothing': 0.24853464412941306, 'lr': 0.0010749075521783032, 'warmup_proportion': 0.055695902600475605, 'weight_decay': 3.6520396057506486e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00049, Updated Learning Rate: 0.0010216828962820747


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00128, Accuracy: 0.568


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00039, Updated Learning Rate: 0.0007706462300615636


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00111, Accuracy: 0.757


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00036, Updated Learning Rate: 0.00041785905987544577


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00104, Accuracy: 0.821


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00034, Updated Learning Rate: 0.00011725549357132985


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00102, Accuracy: 0.846


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00034, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 23:15:15,776] Trial 16 finished with value: 0.0010118022322654725 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1580, 'dropout_prob': 0.12167555371301995, 'label_smoothing': 0.24853464412941306, 'lr': 0.0010749075521783032, 'warmup_proportion': 0.055695902600475605, 'weight_decay': 3.6520396057506486e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00101, Accuracy: 0.850
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1066, 'dropout_prob': 0.0646171911795538, 'label_smoothing': 0.023495019799586958, 'lr': 0.006120359080901847, 'warmup_proportion': 0.08926686602248009, 'weight_decay': 1.4057358549679689e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/57 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00049, Updated Learning Rate: 0.0059184365268458525


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00066, Accuracy: 0.834


Epoch 1/Training:   0%|          | 0/57 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.004557886025053932


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00038, Accuracy: 0.939


Epoch 2/Training:   0%|          | 0/57 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00014, Updated Learning Rate: 0.002504338165291719


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.959


Epoch 3/Training:   0%|          | 0/57 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00013, Updated Learning Rate: 0.0007079812894745393


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.964


Epoch 4/Training:   0%|          | 0/57 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00012, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-10 23:18:31,369] Trial 17 finished with value: 0.0003036852046847343 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1066, 'dropout_prob': 0.0646171911795538, 'label_smoothing': 0.023495019799586958, 'lr': 0.006120359080901847, 'warmup_proportion': 0.08926686602248009, 'weight_decay': 1.4057358549679689e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.963
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1700, 'dropout_prob': 0.23929617064990163, 'label_smoothing': 0.004612073534762902, 'lr': 0.002748894015773769, 'warmup_proportion': 0.0246652558177621, 'weight_decay': 2.4878399651784663e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00046, Updated Learning Rate: 0.0025521517898568326


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.10945, Accuracy: 0.094


Epoch 1/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00046, Updated Learning Rate: 0.0018831822879986008


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 23:19:55,386] Trial 18 finished with value: 5.086788623046875 and parameters: {'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1700, 'dropout_prob': 0.23929617064990163, 'label_smoothing': 0.004612073534762902, 'lr': 0.002748894015773769, 'warmup_proportion': 0.0246652558177621, 'weight_decay': 2.4878399651784663e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 1] Loss: 5.08679, Accuracy: 0.095
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 2040, 'dropout_prob': 0.1572000276998347, 'label_smoothing': 0.08098572401468691, 'lr': 0.08016730598073825, 'warmup_proportion': 0.09877022658132287, 'weight_decay': 1.076057855022966e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.07802746723395483


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.711


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00019, Updated Learning Rate: 0.060665574382727704


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.954


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00017, Updated Learning Rate: 0.03353489151275318


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.968


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00016, Updated Learning Rate: 0.009512444970094696


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.972


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00016, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-10 23:22:39,092] Trial 19 finished with value: 0.00028162769079208375 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 2040, 'dropout_prob': 0.1572000276998347, 'label_smoothing': 0.08098572401468691, 'lr': 0.08016730598073825, 'warmup_proportion': 0.09877022658132287, 'weight_decay': 1.076057855022966e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.972
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1240, 'dropout_prob': 0.0020425564571031896, 'label_smoothing': 0.029830217046906832, 'lr': 0.017110783300254073, 'warmup_proportion': 0.08534882634013566, 'weight_decay': 4.815358890666891e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00036, Updated Learning Rate: 0.016477040757577038


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00057, Accuracy: 0.884


Epoch 1/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.012618784086308362


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00034, Accuracy: 0.956


Epoch 2/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.006908892992680061


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00031, Accuracy: 0.963


Epoch 3/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.001949289654512278


Epoch 3/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.970


Epoch 4/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-10 23:28:01,748] Trial 20 finished with value: 0.00028082954883575437 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1240, 'dropout_prob': 0.0020425564571031896, 'label_smoothing': 0.029830217046906832, 'lr': 0.017110783300254073, 'warmup_proportion': 0.08534882634013566, 'weight_decay': 4.815358890666891e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.971
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2037, 'dropout_prob': 0.043102425960477274, 'label_smoothing': 0.0024346970621232098, 'lr': 0.005206968960316317, 'warmup_proportion': 0.09987999931887383, 'weight_decay': 6.732090754703305e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00032, Updated Learning Rate: 0.005049959634493417


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.727


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00014, Updated Learning Rate: 0.003905226720237238


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.897


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00008, Updated Learning Rate: 0.002151394144594559


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.921


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0006090996613865004


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.941


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00006, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-10 23:30:41,624] Trial 21 finished with value: 0.00010402768701314926 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2037, 'dropout_prob': 0.043102425960477274, 'label_smoothing': 0.0024346970621232098, 'lr': 0.005206968960316317, 'warmup_proportion': 0.09987999931887383, 'weight_decay': 6.732090754703305e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.941
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1648, 'dropout_prob': 0.06756314823371665, 'label_smoothing': 0.0015061153643863, 'lr': 0.008288598375033563, 'warmup_proportion': 0.0918871087536229, 'weight_decay': 6.4270072447862914e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00035, Updated Learning Rate: 0.008038666552355337


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00081, Accuracy: 0.653


Epoch 1/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00012, Updated Learning Rate: 0.0062164487812751724


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.932


Epoch 2/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00007, Updated Learning Rate: 0.0034246491858979534


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.944


Epoch 3/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00006, Updated Learning Rate: 0.0009695818242970546


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.958


Epoch 4/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00005, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 23:34:01,966] Trial 22 finished with value: 0.00010083994939923287 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1648, 'dropout_prob': 0.06756314823371665, 'label_smoothing': 0.0015061153643863, 'lr': 0.008288598375033563, 'warmup_proportion': 0.0918871087536229, 'weight_decay': 6.4270072447862914e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1628, 'dropout_prob': 0.05221313037960226, 'label_smoothing': 0.05623074102371584, 'lr': 0.0075028331514503015, 'warmup_proportion': 0.0835192644421218, 'weight_decay': 1.9443394981967603e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00038, Updated Learning Rate: 0.007276595049428016


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00056, Accuracy: 0.855


Epoch 1/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.005627124863587726


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00044, Accuracy: 0.924


Epoch 2/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00014, Updated Learning Rate: 0.0030999899236809634


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00038, Accuracy: 0.951


Epoch 3/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00013, Updated Learning Rate: 0.0008776647540664734


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00036, Accuracy: 0.953


Epoch 4/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00013, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 23:37:19,769] Trial 23 finished with value: 0.0003589511275291443 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1628, 'dropout_prob': 0.05221313037960226, 'label_smoothing': 0.05623074102371584, 'lr': 0.0075028331514503015, 'warmup_proportion': 0.0835192644421218, 'weight_decay': 1.9443394981967603e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00036, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1272, 'dropout_prob': 0.11274565342183124, 'label_smoothing': 0.024893962219692096, 'lr': 0.009804392909208334, 'warmup_proportion': 0.09437146866757967, 'weight_decay': 2.284525115373476e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/48 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00054, Updated Learning Rate: 0.009508754288638545


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00044, Accuracy: 0.891


Epoch 1/Training:   0%|          | 0/48 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00020, Updated Learning Rate: 0.00735329468190625


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.936


Epoch 2/Training:   0%|          | 0/48 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00016, Updated Learning Rate: 0.0040509389736968675


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.964


Epoch 3/Training:   0%|          | 0/48 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00015, Updated Learning Rate: 0.0011468961014770883


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.970


Epoch 4/Training:   0%|          | 0/48 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00014, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-10 23:39:59,557] Trial 24 finished with value: 0.00022974773198366165 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1272, 'dropout_prob': 0.11274565342183124, 'label_smoothing': 0.024893962219692096, 'lr': 0.009804392909208334, 'warmup_proportion': 0.09437146866757967, 'weight_decay': 2.284525115373476e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.971
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1766, 'dropout_prob': 0.08781859752580784, 'label_smoothing': 0.04455715317948723, 'lr': 0.0063503817633186466, 'warmup_proportion': 0.05775277708566127, 'weight_decay': 5.341495764784108e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.0060597415290448335


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00049, Accuracy: 0.808


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.004590495244526808


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00036, Accuracy: 0.914


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.0024959125065767873


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.947


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0007014613453475075


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.955


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 23:42:09,472] Trial 25 finished with value: 0.00027549923062324523 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1766, 'dropout_prob': 0.08781859752580784, 'label_smoothing': 0.04455715317948723, 'lr': 0.0063503817633186466, 'warmup_proportion': 0.05775277708566127, 'weight_decay': 5.341495764784108e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.956
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1424, 'dropout_prob': 0.1415047682362926, 'label_smoothing': 0.018320995057560397, 'lr': 0.014579658590104416, 'warmup_proportion': 0.07899151783148109, 'weight_decay': 1.6320845160803314e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00055, Updated Learning Rate: 0.014024753376252124


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.17604, Accuracy: 0.042


Epoch 1/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00055, Updated Learning Rate: 0.010726231036758384


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-10 23:43:11,303] Trial 26 finished with value: 15.5095416015625 and parameters: {'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1424, 'dropout_prob': 0.1415047682362926, 'label_smoothing': 0.018320995057560397, 'lr': 0.014579658590104416, 'warmup_proportion': 0.07899151783148109, 'weight_decay': 1.6320845160803314e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 1] Loss: 15.50954, Accuracy: 0.043
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 907, 'dropout_prob': 0.036209372369698706, 'label_smoothing': 0.08262128613839981, 'lr': 0.022817272088603183, 'warmup_proportion': 0.09091127314537721, 'weight_decay': 5.874084922159233e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/67 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00067, Updated Learning Rate: 0.02209221190146309


Epoch 0/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00081, Accuracy: 0.947


Epoch 1/Training:   0%|          | 0/67 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00038, Updated Learning Rate: 0.017043374236268834


Epoch 1/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00069, Accuracy: 0.973


Epoch 2/Training:   0%|          | 0/67 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00035, Updated Learning Rate: 0.009374926651923401


Epoch 2/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00067, Accuracy: 0.976


Epoch 3/Training:   0%|          | 0/67 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00033, Updated Learning Rate: 0.0026519578308219515


Epoch 3/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00065, Accuracy: 0.979


Epoch 4/Training:   0%|          | 0/67 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00033, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/12 [00:00<?, ?batch/s]

[I 2024-03-10 23:45:19,815] Trial 27 finished with value: 0.0006539548456668854 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 907, 'dropout_prob': 0.036209372369698706, 'label_smoothing': 0.08262128613839981, 'lr': 0.022817272088603183, 'warmup_proportion': 0.09091127314537721, 'weight_decay': 5.874084922159233e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00065, Accuracy: 0.980
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 627, 'dropout_prob': 0.19216664626620464, 'label_smoothing': 0.022847485709152373, 'lr': 0.0034850293045288654, 'warmup_proportion': 0.0649826500583231, 'weight_decay': 5.096795642752917e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/96 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00079, Updated Learning Rate: 0.003316992140252004


Epoch 0/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00089, Accuracy: 0.870


Epoch 1/Training:   0%|          | 0/96 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00028, Updated Learning Rate: 0.0025056488979424162


Epoch 1/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00053, Accuracy: 0.954


Epoch 2/Training:   0%|          | 0/96 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00022, Updated Learning Rate: 0.0013598874048829925


Epoch 2/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00047, Accuracy: 0.966


Epoch 3/Training:   0%|          | 0/96 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00020, Updated Learning Rate: 0.0003817996015881341


Epoch 3/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00046, Accuracy: 0.967


Epoch 4/Training:   0%|          | 0/96 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00019, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[I 2024-03-10 23:47:28,162] Trial 28 finished with value: 0.00045484486371278764 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 627, 'dropout_prob': 0.19216664626620464, 'label_smoothing': 0.022847485709152373, 'lr': 0.0034850293045288654, 'warmup_proportion': 0.0649826500583231, 'weight_decay': 5.096795642752917e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00045, Accuracy: 0.969
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1332, 'dropout_prob': 0.13901221535739477, 'label_smoothing': 0.22455100499862526, 'lr': 0.0019519278708419181, 'warmup_proportion': 0.045089317675399726, 'weight_decay': 2.51052890415051e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00176, Updated Learning Rate: 0.001831477299897316


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.49440, Accuracy: 0.085


Epoch 1/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00176, Updated Learning Rate: 0.0013639513849464857


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-10 23:48:28,697] Trial 29 finished with value: 55.203887109375 and parameters: {'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1332, 'dropout_prob': 0.13901221535739477, 'label_smoothing': 0.22455100499862526, 'lr': 0.0019519278708419181, 'warmup_proportion': 0.045089317675399726, 'weight_decay': 2.51052890415051e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 1] Loss: 55.20389, Accuracy: 0.085
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1089, 'dropout_prob': 0.04587775289362337, 'label_smoothing': 0.18777729858412195, 'lr': 0.005361404692473498, 'warmup_proportion': 0.08123509116173881, 'weight_decay': 1.7947846074654357e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00058, Updated Learning Rate: 0.00516706034304647


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00128, Accuracy: 0.849


Epoch 1/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00039, Updated Learning Rate: 0.003961326984566366


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00110, Accuracy: 0.935


Epoch 2/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00036, Updated Learning Rate: 0.0021703204531964056


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00106, Accuracy: 0.952


Epoch 3/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00035, Updated Learning Rate: 0.0006125694917918013


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00104, Accuracy: 0.957


Epoch 4/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00035, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-10 23:50:41,001] Trial 30 finished with value: 0.0010404990196228028 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1089, 'dropout_prob': 0.04587775289362337, 'label_smoothing': 0.18777729858412195, 'lr': 0.005361404692473498, 'warmup_proportion': 0.08123509116173881, 'weight_decay': 1.7947846074654357e-05}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00104, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1774, 'dropout_prob': 0.038819814958578946, 'label_smoothing': 8.582147054318995e-05, 'lr': 0.003325357117078681, 'warmup_proportion': 0.0936041066135404, 'weight_decay': 9.709687768548523e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00039, Updated Learning Rate: 0.003240653199952516


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00056, Accuracy: 0.694


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00019, Updated Learning Rate: 0.0025245987667753913


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.889


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00011, Updated Learning Rate: 0.0013973152351222115


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.906


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00009, Updated Learning Rate: 0.0003966386900725418


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.934


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 23:52:55,302] Trial 31 finished with value: 0.00012797627449035644 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1774, 'dropout_prob': 0.038819814958578946, 'label_smoothing': 8.582147054318995e-05, 'lr': 0.003325357117078681, 'warmup_proportion': 0.0936041066135404, 'weight_decay': 9.709687768548523e-06}. Best is trial 12 with value: 9.675593301653863e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.935
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1832, 'dropout_prob': 0.07985584476619423, 'label_smoothing': 0.0004311194470224544, 'lr': 0.008730687529148204, 'warmup_proportion': 0.0993370192538024, 'weight_decay': 7.09143369245701e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.008508298352020341


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.758


Epoch 1/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00011, Updated Learning Rate: 0.006628305530250007


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.910


Epoch 2/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00007, Updated Learning Rate: 0.0036686353579634587


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.946


Epoch 3/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00005, Updated Learning Rate: 0.0010413703981472506


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00008, Accuracy: 0.958


Epoch 4/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00005, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-10 23:55:13,177] Trial 32 finished with value: 7.672419399023056e-05 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1832, 'dropout_prob': 0.07985584476619423, 'label_smoothing': 0.0004311194470224544, 'lr': 0.008730687529148204, 'warmup_proportion': 0.0993370192538024, 'weight_decay': 7.09143369245701e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00008, Accuracy: 0.960
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1548, 'dropout_prob': 0.07647214091928793, 'label_smoothing': 0.03544435836714206, 'lr': 0.008694217893494775, 'warmup_proportion': 0.0974848667225722, 'weight_decay': 5.426841681649178e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00038, Updated Learning Rate: 0.008466660525609682


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00083, Accuracy: 0.662


Epoch 1/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.006588277731811744


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00033, Accuracy: 0.938


Epoch 2/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.003643824982110209


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.952


Epoch 3/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.0010339074580306639


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.957


Epoch 4/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-10 23:57:26,360] Trial 33 finished with value: 0.00027399299442768096 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1548, 'dropout_prob': 0.07647214091928793, 'label_smoothing': 0.03544435836714206, 'lr': 0.008694217893494775, 'warmup_proportion': 0.0974848667225722, 'weight_decay': 5.426841681649178e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 264, 'dropout_prob': 0.23296697356891147, 'label_smoothing': 0.05313215822264636, 'lr': 0.010847393682350926, 'warmup_proportion': 0.09953744407804783, 'weight_decay': 3.498010111471705e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/228 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00126, Updated Learning Rate: 0.010520304740207887


Epoch 0/Testing:   0%|          | 0/38 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00197, Accuracy: 0.947


Epoch 1/Training:   0%|          | 0/228 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00065, Updated Learning Rate: 0.008135545261763195


Epoch 1/Testing:   0%|          | 0/38 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00161, Accuracy: 0.977


Epoch 2/Training:   0%|          | 0/228 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00059, Updated Learning Rate: 0.004481881768487458


Epoch 2/Testing:   0%|          | 0/38 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00159, Accuracy: 0.977


Epoch 3/Training:   0%|          | 0/228 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00056, Updated Learning Rate: 0.0012689040148310458


Epoch 3/Testing:   0%|          | 0/38 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00152, Accuracy: 0.981


Epoch 4/Training:   0%|          | 0/228 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00055, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/38 [00:00<?, ?batch/s]

[I 2024-03-10 23:59:42,962] Trial 34 finished with value: 0.0015111255675554274 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 264, 'dropout_prob': 0.23296697356891147, 'label_smoothing': 0.05313215822264636, 'lr': 0.010847393682350926, 'warmup_proportion': 0.09953744407804783, 'weight_decay': 3.498010111471705e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00151, Accuracy: 0.981
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1781, 'dropout_prob': 0.10166356875940774, 'label_smoothing': 0.10803373833328106, 'lr': 0.007007906757396097, 'warmup_proportion': 0.08724099897949009, 'weight_decay': 1.1230029514109688e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00050, Updated Learning Rate: 0.006774321035049212


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00056, Accuracy: 0.883


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00028, Updated Learning Rate: 0.005214500045280918


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00048, Accuracy: 0.944


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00024, Updated Learning Rate: 0.00286423723811967


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00044, Accuracy: 0.961


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00023, Updated Learning Rate: 0.0008095865873835523


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00043, Accuracy: 0.966


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00023, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:02:02,063] Trial 35 finished with value: 0.00043155853748321534 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1781, 'dropout_prob': 0.10166356875940774, 'label_smoothing': 0.10803373833328106, 'lr': 0.007007906757396097, 'warmup_proportion': 0.08724099897949009, 'weight_decay': 1.1230029514109688e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] Loss: 0.00043, Accuracy: 0.968
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1426, 'dropout_prob': 0.08395748942188344, 'label_smoothing': 0.01615457580074229, 'lr': 0.002306260145502806, 'warmup_proportion': 0.0297121833508251, 'weight_decay': 3.3762365241971748e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00055, Updated Learning Rate: 0.0021562452712540607


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.13175, Accuracy: 0.088


Epoch 1/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00055, Updated Learning Rate: 0.0016006504590575811


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 00:03:07,551] Trial 36 finished with value: 11.78404296875 and parameters: {'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1426, 'dropout_prob': 0.08395748942188344, 'label_smoothing': 0.01615457580074229, 'lr': 0.002306260145502806, 'warmup_proportion': 0.0297121833508251, 'weight_decay': 3.3762365241971748e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 1] Loss: 11.78404, Accuracy: 0.088
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1802, 'dropout_prob': 0.16059651826575091, 'label_smoothing': 0.01557981237452252, 'lr': 0.01999277302338321, 'warmup_proportion': 0.07495341872655466, 'weight_decay': 5.194808328343723e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00036, Updated Learning Rate: 0.019192856613316222


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.890


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00012, Updated Learning Rate: 0.01464193896038047


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.959


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.007996852364437004


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.970


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0022531359193882494


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.970


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:05:27,844] Trial 37 finished with value: 0.00013149097561836241 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1802, 'dropout_prob': 0.16059651826575091, 'label_smoothing': 0.01557981237452252, 'lr': 0.01999277302338321, 'warmup_proportion': 0.07495341872655466, 'weight_decay': 5.194808328343723e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.972
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1582, 'dropout_prob': 0.30096110826918593, 'label_smoothing': 0.06437037250910484, 'lr': 0.03679458845079387, 'warmup_proportion': 0.09206875423876412, 'weight_decay': 6.42267565924399e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00031, Updated Learning Rate: 0.03568509585142964


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00060, Accuracy: 0.894


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.027595941338095405


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.951


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.015202637609154421


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00037, Accuracy: 0.961


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00012, Updated Learning Rate: 0.004304149215606746


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00036, Accuracy: 0.967


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00012, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 00:07:43,364] Trial 38 finished with value: 0.000354938668012619 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1582, 'dropout_prob': 0.30096110826918593, 'label_smoothing': 0.06437037250910484, 'lr': 0.03679458845079387, 'warmup_proportion': 0.09206875423876412, 'weight_decay': 6.42267565924399e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00035, Accuracy: 0.969
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 1187, 'dropout_prob': 0.026865211857513366, 'label_smoothing': 0.03506366320822358, 'lr': 0.012377821684166056, 'warmup_proportion': 0.0698320698345305, 'weight_decay': 1.8792414435923372e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/51 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00098, Updated Learning Rate: 0.011805814196892668


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.65865, Accuracy: 0.091


Epoch 1/Training:   0%|          | 0/51 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00098, Updated Learning Rate: 0.008938716378801113


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 00:08:46,611] Trial 39 finished with value: 99.87796640625 and parameters: {'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 1187, 'dropout_prob': 0.026865211857513366, 'label_smoothing': 0.03506366320822358, 'lr': 0.012377821684166056, 'warmup_proportion': 0.0698320698345305, 'weight_decay': 1.8792414435923372e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 1] Loss: 99.87797, Accuracy: 0.090
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1869, 'dropout_prob': 0.3807695268299693, 'label_smoothing': 0.04888299119121421, 'lr': 0.008039197764394468, 'warmup_proportion': 0.0841086859038431, 'weight_decay': 0.00010763741035381019}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.007756927405566921


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00058, Accuracy: 0.766


Epoch 1/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.005956055412110898


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00038, Accuracy: 0.899


Epoch 2/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.003266401159543807


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.955


Epoch 3/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0009224447128175059


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.958


Epoch 4/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:11:07,490] Trial 40 finished with value: 0.00028276586532592775 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1869, 'dropout_prob': 0.3807695268299693, 'label_smoothing': 0.04888299119121421, 'lr': 0.008039197764394468, 'warmup_proportion': 0.0841086859038431, 'weight_decay': 0.00010763741035381019}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.957
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1994, 'dropout_prob': 0.06300913813620652, 'label_smoothing': 0.0015262399516501582, 'lr': 0.0042939261444058475, 'warmup_proportion': 0.09303702943224616, 'weight_decay': 6.740337290041167e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.004164448428251854


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00053, Accuracy: 0.748


Epoch 1/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.0032204446083043854


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.870


Epoch 2/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.001774146847196692


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.924


Epoch 3/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0005022939411602251


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.939


Epoch 4/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:13:26,963] Trial 41 finished with value: 0.00012302398681640625 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1994, 'dropout_prob': 0.06300913813620652, 'label_smoothing': 0.0015262399516501582, 'lr': 0.0042939261444058475, 'warmup_proportion': 0.09303702943224616, 'weight_decay': 6.740337290041167e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.942
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2024, 'dropout_prob': 0.023551620618388114, 'label_smoothing': 0.01261610623746187, 'lr': 0.005442453785293932, 'warmup_proportion': 0.0937276168480096, 'weight_decay': 8.680970962764236e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00033, Updated Learning Rate: 0.005278343723151488


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.760


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.004081840338970449


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.852


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.002248690801720577


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.934


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0006366461530688342


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.943


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-11 00:15:45,459] Trial 42 finished with value: 0.0001395293802022934 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2024, 'dropout_prob': 0.023551620618388114, 'label_smoothing': 0.01261610623746187, 'lr': 0.005442453785293932, 'warmup_proportion': 0.0937276168480096, 'weight_decay': 8.680970962764236e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.947
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1617, 'dropout_prob': 7.066229373137167e-05, 'label_smoothing': 0.0013270250577958771, 'lr': 0.004101979102293477, 'warmup_proportion': 0.0870300233605791, 'weight_decay': 4.549711062958117e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00041, Updated Learning Rate: 0.003978289297668331


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00050, Accuracy: 0.812


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.0030764843267201076


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.885


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.0016948389531761924


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.926


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.00047984040259569207


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.940


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 00:18:03,804] Trial 43 finished with value: 0.00013843236565589905 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1617, 'dropout_prob': 7.066229373137167e-05, 'label_smoothing': 0.0013270250577958771, 'lr': 0.004101979102293477, 'warmup_proportion': 0.0870300233605791, 'weight_decay': 4.549711062958117e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.945
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1858, 'dropout_prob': 0.04617589712207457, 'label_smoothing': 0.031571706419335266, 'lr': 0.009926887702573158, 'warmup_proportion': 0.09922103951289431, 'weight_decay': 2.142312975023968e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00032, Updated Learning Rate: 0.009674028763314874


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00045, Accuracy: 0.816


Epoch 1/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00013, Updated Learning Rate: 0.007536456257020114


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00034, Accuracy: 0.896


Epoch 2/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.004171278733617169


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.949


Epoch 3/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00009, Updated Learning Rate: 0.0011840495911322835


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.960


Epoch 4/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:20:31,478] Trial 44 finished with value: 0.0002163852334022522 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1858, 'dropout_prob': 0.04617589712207457, 'label_smoothing': 0.031571706419335266, 'lr': 0.009926887702573158, 'warmup_proportion': 0.09922103951289431, 'weight_decay': 2.142312975023968e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.963
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1325, 'dropout_prob': 0.3478812348423442, 'label_smoothing': 0.011990221435607917, 'lr': 0.0047353062484082, 'warmup_proportion': 0.07934852987075283, 'weight_decay': 1.0603360840417003e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00042, Updated Learning Rate: 0.004539291327768795


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00046, Accuracy: 0.839


Epoch 1/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.0034569275481253397


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.911


Epoch 2/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00011, Updated Learning Rate: 0.001885939859261922


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.944


Epoch 3/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00009, Updated Learning Rate: 0.000531037863287386


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.955


Epoch 4/Training:   0%|          | 0/46 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00009, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 00:23:00,941] Trial 45 finished with value: 0.00019673039466142655 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1325, 'dropout_prob': 0.3478812348423442, 'label_smoothing': 0.011990221435607917, 'lr': 0.0047353062484082, 'warmup_proportion': 0.07934852987075283, 'weight_decay': 1.0603360840417003e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 379, 'dropout_prob': 0.12532478127071403, 'label_smoothing': 0.023207273543586954, 'lr': 0.0027047054771084504, 'warmup_proportion': 0.09530365432486898, 'weight_decay': 3.7370275617275377e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/159 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00127, Updated Learning Rate: 0.0026204247964557964


Epoch 0/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00117, Accuracy: 0.917


Epoch 1/Training:   0%|          | 0/159 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00043, Updated Learning Rate: 0.0020233910508486476


Epoch 1/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00088, Accuracy: 0.956


Epoch 2/Training:   0%|          | 0/159 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00035, Updated Learning Rate: 0.0011136298769680022


Epoch 2/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00079, Accuracy: 0.966


Epoch 3/Training:   0%|          | 0/159 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00032, Updated Learning Rate: 0.0003151219433363187


Epoch 3/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00078, Accuracy: 0.966


Epoch 4/Training:   0%|          | 0/159 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00031, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/27 [00:00<?, ?batch/s]

[I 2024-03-11 00:26:15,167] Trial 46 finished with value: 0.0007615103632211685 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 379, 'dropout_prob': 0.12532478127071403, 'label_smoothing': 0.023207273543586954, 'lr': 0.0027047054771084504, 'warmup_proportion': 0.09530365432486898, 'weight_decay': 3.7370275617275377e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00076, Accuracy: 0.969
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1456, 'dropout_prob': 0.09776517374201914, 'label_smoothing': 0.12424701195091421, 'lr': 0.0014485607060082983, 'warmup_proportion': 9.547112115332324e-05, 'weight_decay': 7.479046812158262e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00048, Updated Learning Rate: 0.0013253499652461545


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00094, Accuracy: 0.710


Epoch 1/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00033, Updated Learning Rate: 0.0009668270767212436


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00074, Accuracy: 0.854


Epoch 2/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00028, Updated Learning Rate: 0.000513046929023582


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00068, Accuracy: 0.893


Epoch 3/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00026, Updated Learning Rate: 0.00014222612501729937


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00066, Accuracy: 0.908


Epoch 4/Training:   0%|          | 0/42 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00026, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00066, Accuracy: 0.909


[I 2024-03-11 00:29:46,076] Trial 47 finished with value: 0.0006585891544818878 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1456, 'dropout_prob': 0.09776517374201914, 'label_smoothing': 0.12424701195091421, 'lr': 0.0014485607060082983, 'warmup_proportion': 9.547112115332324e-05, 'weight_decay': 7.479046812158262e-06}. Best is trial 32 with value: 7.672419399023056e-05.


{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1697, 'dropout_prob': 0.07082147003631134, 'label_smoothing': 0.1821572554676943, 'lr': 0.00590311909569932, 'warmup_proportion': 0.08903678862653412, 'weight_decay': 0.0005623841211419445}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00098, Updated Learning Rate: 0.005707417402432405


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00075, Accuracy: 0.860


Epoch 1/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00066, Updated Learning Rate: 0.004394374618739986


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00061, Accuracy: 0.957


Epoch 2/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00063, Updated Learning Rate: 0.0024141466705884674


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00059, Accuracy: 0.968


Epoch 3/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00062, Updated Learning Rate: 0.0006824286708925685


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00059, Accuracy: 0.968


Epoch 4/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00061, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00059, Accuracy: 0.969


[I 2024-03-11 00:33:36,409] Trial 48 finished with value: 0.000585165399312973 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1697, 'dropout_prob': 0.07082147003631134, 'label_smoothing': 0.1821572554676943, 'lr': 0.00590311909569932, 'warmup_proportion': 0.08903678862653412, 'weight_decay': 0.0005623841211419445}. Best is trial 32 with value: 7.672419399023056e-05.


{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 968, 'dropout_prob': 0.020115059950453644, 'label_smoothing': 0.009697344358271373, 'lr': 0.003738260280127671, 'warmup_proportion': 0.008456759014071169, 'weight_decay': 1.2713170342649703e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/62 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00046, Updated Learning Rate: 0.00340868886407908


Epoch 0/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00047, Accuracy: 0.888


Epoch 1/Training:   0%|          | 0/62 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00017, Updated Learning Rate: 0.0024804627281497216


Epoch 1/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00034, Accuracy: 0.928


Epoch 2/Training:   0%|          | 0/62 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0013141473173386906


Epoch 2/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.953


Epoch 3/Training:   0%|          | 0/62 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.00036397523081556696


Epoch 3/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.956


Epoch 4/Training:   0%|          | 0/62 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[I 2024-03-11 00:37:03,662] Trial 49 finished with value: 0.0002470990613102913 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 968, 'dropout_prob': 0.020115059950453644, 'label_smoothing': 0.009697344358271373, 'lr': 0.003738260280127671, 'warmup_proportion': 0.008456759014071169, 'weight_decay': 1.2713170342649703e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.957
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2048, 'dropout_prob': 0.2194195323254399, 'label_smoothing': 0.07100021036648316, 'lr': 0.013326781784477762, 'warmup_proportion': 0.07443025547725267, 'weight_decay': 2.3754504934027695e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00029, Updated Learning Rate: 0.012775128324948887


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00059, Accuracy: 0.697


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.009728984074494178


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00034, Accuracy: 0.940


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.005307683947892726


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.947


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0014945233427521838


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.959


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-11 00:40:20,251] Trial 50 finished with value: 0.0002853821396827698 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2048, 'dropout_prob': 0.2194195323254399, 'label_smoothing': 0.07100021036648316, 'lr': 0.013326781784477762, 'warmup_proportion': 0.07443025547725267, 'weight_decay': 2.3754504934027695e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00029, Accuracy: 0.962
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1910, 'dropout_prob': 0.011244867264315426, 'label_smoothing': 0.0006663369310054537, 'lr': 0.00455823515995647, 'warmup_proportion': 0.09907346973852363, 'weight_decay': 8.197211303389555e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.00442078755178722


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00049, Accuracy: 0.757


Epoch 1/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.0034186763699673526


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.876


Epoch 2/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.00188335296552635


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.919


Epoch 3/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0005332122226211353


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.935


Epoch 4/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:43:16,631] Trial 51 finished with value: 0.0001220807082951069 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1910, 'dropout_prob': 0.011244867264315426, 'label_smoothing': 0.0006663369310054537, 'lr': 0.00455823515995647, 'warmup_proportion': 0.09907346973852363, 'weight_decay': 8.197211303389555e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.937
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1531, 'dropout_prob': 0.056588318406691596, 'label_smoothing': 0.010387616273347925, 'lr': 0.005124348014480555, 'warmup_proportion': 0.09488668384971018, 'weight_decay': 3.8123538957438616e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00041, Updated Learning Rate: 0.004990226330323675


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00042, Accuracy: 0.824


Epoch 1/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.003883112699431709


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.914


Epoch 2/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.0021476603808333475


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.949


Epoch 3/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0006093822002874161


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.959


Epoch 4/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 00:46:13,275] Trial 52 finished with value: 0.00015829007774591445 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1531, 'dropout_prob': 0.056588318406691596, 'label_smoothing': 0.010387616273347925, 'lr': 0.005124348014480555, 'warmup_proportion': 0.09488668384971018, 'weight_decay': 3.8123538957438616e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.959
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1871, 'dropout_prob': 0.03663925950208341, 'label_smoothing': 0.03999437996109294, 'lr': 0.006910177816613813, 'warmup_proportion': 0.090355641954154, 'weight_decay': 7.88921137557113e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.006667549331903977


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00084, Accuracy: 0.610


Epoch 1/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.005119590684231843


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00039, Accuracy: 0.881


Epoch 2/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.0028076698066578022


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.950


Epoch 3/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.0007928971494886908


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.956


Epoch 4/Training:   0%|          | 0/33 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:49:20,285] Trial 53 finished with value: 0.00025623578429222107 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1871, 'dropout_prob': 0.03663925950208341, 'label_smoothing': 0.03999437996109294, 'lr': 0.006910177816613813, 'warmup_proportion': 0.090355641954154, 'weight_decay': 7.88921137557113e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.957
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1685, 'dropout_prob': 0.013904941686217556, 'label_smoothing': 0.02435521708040058, 'lr': 0.0028505332450974937, 'warmup_proportion': 0.09604114467978811, 'weight_decay': 2.8687222661720064e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00042, Updated Learning Rate: 0.002764579150410259


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00061, Accuracy: 0.715


Epoch 1/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.0021378999338231204


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.880


Epoch 2/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00014, Updated Learning Rate: 0.0011777716708536567


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.926


Epoch 3/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00012, Updated Learning Rate: 0.0003334490463823251


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.938


Epoch 4/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 00:52:25,480] Trial 54 finished with value: 0.00022504575848579405 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1685, 'dropout_prob': 0.013904941686217556, 'label_smoothing': 0.02435521708040058, 'lr': 0.0028505332450974937, 'warmup_proportion': 0.09604114467978811, 'weight_decay': 2.8687222661720064e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.941
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1156, 'dropout_prob': 0.08782180972457004, 'label_smoothing': 0.009620404991221522, 'lr': 0.009128015822471544, 'warmup_proportion': 0.08259579236844573, 'weight_decay': 1.287457297760135e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/52 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00067, Updated Learning Rate: 0.008823763177095587


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 1.01059, Accuracy: 0.109


Epoch 1/Training:   0%|          | 0/52 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00066, Updated Learning Rate: 0.006792047977717192


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 00:53:50,428] Trial 55 finished with value: 157.5256296875 and parameters: {'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 1156, 'dropout_prob': 0.08782180972457004, 'label_smoothing': 0.009620404991221522, 'lr': 0.009128015822471544, 'warmup_proportion': 0.08259579236844573, 'weight_decay': 1.287457297760135e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 1] Loss: 157.52563, Accuracy: 0.109
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2045, 'dropout_prob': 0.05196682931493239, 'label_smoothing': 0.030693617693221605, 'lr': 0.0031330422076305464, 'warmup_proportion': 0.08724502881929494, 'weight_decay': 1.607234374958887e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00036, Updated Learning Rate: 0.0030385694253758813


Epoch 0/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00053, Accuracy: 0.726


Epoch 1/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00020, Updated Learning Rate: 0.0023497816557229096


Epoch 1/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.876


Epoch 2/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0012944975688609628


Epoch 2/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.903


Epoch 3/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0003664963172089757


Epoch 3/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.925


Epoch 4/Training:   0%|          | 0/30 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/5 [00:00<?, ?batch/s]

[I 2024-03-11 00:56:39,241] Trial 56 finished with value: 0.0002262932389974594 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 2045, 'dropout_prob': 0.05196682931493239, 'label_smoothing': 0.030693617693221605, 'lr': 0.0031330422076305464, 'warmup_proportion': 0.08724502881929494, 'weight_decay': 1.607234374958887e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.929
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1507, 'dropout_prob': 0.2765682978738866, 'label_smoothing': 0.14089553425116288, 'lr': 0.01104412946332945, 'warmup_proportion': 0.09041731837318381, 'weight_decay': 6.227068425213167e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00038, Updated Learning Rate: 0.010664963390497722


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.880


Epoch 1/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00024, Updated Learning Rate: 0.0081977860034261


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00065, Accuracy: 0.939


Epoch 2/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00022, Updated Learning Rate: 0.004498888403208114


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00061, Accuracy: 0.955


Epoch 3/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00021, Updated Learning Rate: 0.0012709916425294745


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00060, Accuracy: 0.959


Epoch 4/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00020, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 00:59:04,654] Trial 57 finished with value: 0.0005987521529197693 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1507, 'dropout_prob': 0.2765682978738866, 'label_smoothing': 0.14089553425116288, 'lr': 0.01104412946332945, 'warmup_proportion': 0.09041731837318381, 'weight_decay': 6.227068425213167e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00060, Accuracy: 0.962
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1702, 'dropout_prob': 0.10959292744083032, 'label_smoothing': 0.019128781297432347, 'lr': 0.00234632539556916, 'warmup_proportion': 0.09946743538298781, 'weight_decay': 2.018304379925148e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00061, Updated Learning Rate: 0.0022755750278740387


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00053, Accuracy: 0.739


Epoch 1/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00026, Updated Learning Rate: 0.00175974404667687


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.896


Epoch 2/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00017, Updated Learning Rate: 0.000969445133207468


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.935


Epoch 3/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00015, Updated Learning Rate: 0.0002744679322722333


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.944


Epoch 4/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00014, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:01:37,337] Trial 58 finished with value: 0.00019985833466053008 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1702, 'dropout_prob': 0.10959292744083032, 'label_smoothing': 0.019128781297432347, 'lr': 0.00234632539556916, 'warmup_proportion': 0.09946743538298781, 'weight_decay': 2.018304379925148e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.945
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1351, 'dropout_prob': 0.07246236468130336, 'label_smoothing': 0.04429345289365813, 'lr': 0.007724008852157527, 'warmup_proportion': 0.032653827346873505, 'weight_decay': 4.250467799400843e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00040, Updated Learning Rate: 0.0072065983685909146


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00063, Accuracy: 0.820


Epoch 1/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.005339929535659754


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.940


Epoch 2/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00014, Updated Learning Rate: 0.0028624441283393487


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00036, Accuracy: 0.956


Epoch 3/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00013, Updated Learning Rate: 0.0007980703144296423


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00035, Accuracy: 0.957


Epoch 4/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00013, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 01:04:23,693] Trial 59 finished with value: 0.0003443235069513321 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1351, 'dropout_prob': 0.07246236468130336, 'label_smoothing': 0.04429345289365813, 'lr': 0.007724008852157527, 'warmup_proportion': 0.032653827346873505, 'weight_decay': 4.250467799400843e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00034, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 798, 'dropout_prob': 0.039162771049601114, 'label_smoothing': 0.09652444906963403, 'lr': 0.006144901520183269, 'warmup_proportion': 0.05173860508328992, 'weight_decay': 3.0318844957758715e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/76 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00097, Updated Learning Rate: 0.0057987963175680874


Epoch 0/Testing:   0%|          | 0/13 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 10.58357, Accuracy: 0.055


Epoch 1/Training:   0%|          | 0/76 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00097, Updated Learning Rate: 0.004342024854334814


Epoch 1/Testing:   0%|          | 0/13 [00:00<?, ?batch/s]

[I 2024-03-11 01:05:50,931] Trial 60 finished with value: 358.36845625 and parameters: {'accumulation_steps': 3, 'batchnorm': True, 'batch_size': 798, 'dropout_prob': 0.039162771049601114, 'label_smoothing': 0.09652444906963403, 'lr': 0.006144901520183269, 'warmup_proportion': 0.05173860508328992, 'weight_decay': 3.0318844957758715e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 1] Loss: 358.36846, Accuracy: 0.055
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1908, 'dropout_prob': 0.012608585144554835, 'label_smoothing': 0.0025294074673229416, 'lr': 0.0052701089239719475, 'warmup_proportion': 0.09681524166760228, 'weight_decay': 8.858445654611237e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00035, Updated Learning Rate: 0.0051111956952831755


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00059, Accuracy: 0.684


Epoch 1/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.003952581692978961


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.891


Epoch 2/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.002177482056608996


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.931


Epoch 3/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0006164856340657506


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.944


Epoch 4/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00006, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:08:34,872] Trial 61 finished with value: 0.00012276450395584107 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1908, 'dropout_prob': 0.012608585144554835, 'label_smoothing': 0.0025294074673229416, 'lr': 0.0052701089239719475, 'warmup_proportion': 0.09681524166760228, 'weight_decay': 8.858445654611237e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.946
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1951, 'dropout_prob': 0.0027115179379815224, 'label_smoothing': 0.000477595046984889, 'lr': 0.004023982723839898, 'warmup_proportion': 0.09658910194567755, 'weight_decay': 6.046506885341318e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.0039026447978011154


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00047, Accuracy: 0.775


Epoch 1/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.0030179870428799238


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.899


Epoch 2/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.0016626127284409447


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.934


Epoch 3/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.00047071655951778777


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.942


Epoch 4/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:11:15,263] Trial 62 finished with value: 0.00011712319999933243 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1951, 'dropout_prob': 0.0027115179379815224, 'label_smoothing': 0.000477595046984889, 'lr': 0.004023982723839898, 'warmup_proportion': 0.09658910194567755, 'weight_decay': 6.046506885341318e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.943
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 637, 'dropout_prob': 0.030826453233809826, 'label_smoothing': 0.028211290714924897, 'lr': 0.0037588123069648774, 'warmup_proportion': 0.092119320736955, 'weight_decay': 5.88969001213547e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/95 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00076, Updated Learning Rate: 0.0036389717304675957


Epoch 0/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00077, Accuracy: 0.913


Epoch 1/Training:   0%|          | 0/95 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00028, Updated Learning Rate: 0.0028069085353067255


Epoch 1/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00057, Accuracy: 0.959


Epoch 2/Training:   0%|          | 0/95 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00023, Updated Learning Rate: 0.0015438252266511646


Epoch 2/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00051, Accuracy: 0.969


Epoch 3/Training:   0%|          | 0/95 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00021, Updated Learning Rate: 0.00043668990476516365


Epoch 3/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00051, Accuracy: 0.967


Epoch 4/Training:   0%|          | 0/95 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00020, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/16 [00:00<?, ?batch/s]

[I 2024-03-11 01:13:49,050] Trial 63 finished with value: 0.0005039022266864776 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 637, 'dropout_prob': 0.030826453233809826, 'label_smoothing': 0.028211290714924897, 'lr': 0.0037588123069648774, 'warmup_proportion': 0.092119320736955, 'weight_decay': 5.88969001213547e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00050, Accuracy: 0.969
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1800, 'dropout_prob': 0.0033103471174727754, 'label_smoothing': 0.007991713259979227, 'lr': 0.0042001047633916455, 'warmup_proportion': 0.095778733744986, 'weight_decay': 1.1783698450478562e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00037, Updated Learning Rate: 0.0040931191635676855


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00048, Accuracy: 0.752


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00017, Updated Learning Rate: 0.003188703929429756


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.893


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.0017648842420125714


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.927


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0005009759832900641


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.940


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.943


[I 2024-03-11 01:16:42,567] Trial 64 finished with value: 0.00015349487066268922 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1800, 'dropout_prob': 0.0033103471174727754, 'label_smoothing': 0.007991713259979227, 'lr': 0.0042001047633916455, 'warmup_proportion': 0.095778733744986, 'weight_decay': 1.1783698450478562e-05}. Best is trial 32 with value: 7.672419399023056e-05.


{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1654, 'dropout_prob': 0.024467625090603715, 'label_smoothing': 0.018970775969641546, 'lr': 0.0018394782514119623, 'warmup_proportion': 0.08603705445054774, 'weight_decay': 1.2310562074035852e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00043, Updated Learning Rate: 0.0017840111951799747


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00084, Accuracy: 0.674


Epoch 1/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00027, Updated Learning Rate: 0.0013796086885589716


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00048, Accuracy: 0.845


Epoch 2/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00019, Updated Learning Rate: 0.000760028102598162


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00037, Accuracy: 0.885


Epoch 3/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00016, Updated Learning Rate: 0.0002151780793398069


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00033, Accuracy: 0.902


Epoch 4/Training:   0%|          | 0/37 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00015, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 01:19:16,788] Trial 65 finished with value: 0.0003228820353746414 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1654, 'dropout_prob': 0.024467625090603715, 'label_smoothing': 0.018970775969641546, 'lr': 0.0018394782514119623, 'warmup_proportion': 0.08603705445054774, 'weight_decay': 1.2310562074035852e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.903
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1893, 'dropout_prob': 0.05285432067544883, 'label_smoothing': 0.008691567191083804, 'lr': 0.006503340994859891, 'warmup_proportion': 0.09019985121159034, 'weight_decay': 1.5346723340406505e-05}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00034, Updated Learning Rate: 0.00630724126909211


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00063, Accuracy: 0.645


Epoch 1/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00014, Updated Learning Rate: 0.004877505746144918


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.878


Epoch 2/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.002687023841177915


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.930


Epoch 3/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0007607463820198131


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.946


Epoch 4/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:21:57,539] Trial 66 finished with value: 0.0001488666906952858 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1893, 'dropout_prob': 0.05285432067544883, 'label_smoothing': 0.008691567191083804, 'lr': 0.006503340994859891, 'warmup_proportion': 0.09019985121159034, 'weight_decay': 1.5346723340406505e-05}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.947
{'epochs': 5, 'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1784, 'dropout_prob': 0.079996902208114, 'label_smoothing': 0.01655763892667384, 'lr': 0.0030763152399638977, 'warmup_proportion': 0.07811644606779555, 'weight_decay': 4.9313713889771346e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00041, Updated Learning Rate: 0.0029683004066310015


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00055, Accuracy: 0.762


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.0022791707047558024


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00037, Accuracy: 0.833


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0012499356231096819


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.904


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0003529868043093675


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.932


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:24:39,123] Trial 67 finished with value: 0.00020747385025024414 and parameters: {'accumulation_steps': 3, 'batchnorm': False, 'batch_size': 1784, 'dropout_prob': 0.079996902208114, 'label_smoothing': 0.01655763892667384, 'lr': 0.0030763152399638977, 'warmup_proportion': 0.07811644606779555, 'weight_decay': 4.9313713889771346e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.934
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1930, 'dropout_prob': 0.06617749925302711, 'label_smoothing': 0.0006590912614831109, 'lr': 0.004851705448029092, 'warmup_proportion': 0.0994046091609695, 'weight_decay': 2.8768462739162755e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00046, Updated Learning Rate: 0.00470540862788441


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00052, Accuracy: 0.706


Epoch 1/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00017, Updated Learning Rate: 0.003638779086021819


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.911


Epoch 2/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.002004607819201061


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.949


Epoch 3/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00007, Updated Learning Rate: 0.0005675417249581675


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.956


Epoch 4/Training:   0%|          | 0/32 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00009, Accuracy: 0.956


[I 2024-03-11 01:27:20,984] Trial 68 finished with value: 9.192864671349526e-05 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1930, 'dropout_prob': 0.06617749925302711, 'label_smoothing': 0.0006590912614831109, 'lr': 0.004851705448029092, 'warmup_proportion': 0.0994046091609695, 'weight_decay': 2.8768462739162755e-06}. Best is trial 32 with value: 7.672419399023056e-05.


{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1579, 'dropout_prob': 0.13009080475648893, 'label_smoothing': 0.037833809788747036, 'lr': 0.00859070775167085, 'warmup_proportion': 0.09983366211202707, 'weight_decay': 1.7629226701210268e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00047, Updated Learning Rate: 0.008355252895502966


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00044, Accuracy: 0.894


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00019, Updated Learning Rate: 0.006488695550587501


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.950


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00015, Updated Learning Rate: 0.003584241392108344


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.965


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00014, Updated Learning Rate: 0.0010162881032573206


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.972


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00014, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 01:29:58,767] Trial 69 finished with value: 0.00025559661388397216 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1579, 'dropout_prob': 0.13009080475648893, 'label_smoothing': 0.037833809788747036, 'lr': 0.00859070775167085, 'warmup_proportion': 0.09983366211202707, 'weight_decay': 1.7629226701210268e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.971
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 472, 'dropout_prob': 0.09513497578138688, 'label_smoothing': 0.025634104988118273, 'lr': 0.00497057921772258, 'warmup_proportion': 0.09246544247975522, 'weight_decay': 2.5797672441499613e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/128 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00246, Updated Learning Rate: 0.004803895484118803


Epoch 0/Testing:   0%|          | 0/22 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 437.32650, Accuracy: 0.057


Epoch 1/Training:   0%|          | 0/128 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00245, Updated Learning Rate: 0.0036967174775946144


Epoch 1/Testing:   0%|          | 0/22 [00:00<?, ?batch/s]

[I 2024-03-11 01:31:09,757] Trial 70 finished with value: 499.1525765625 and parameters: {'accumulation_steps': 2, 'batchnorm': True, 'batch_size': 472, 'dropout_prob': 0.09513497578138688, 'label_smoothing': 0.025634104988118273, 'lr': 0.00497057921772258, 'warmup_proportion': 0.09246544247975522, 'weight_decay': 2.5797672441499613e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 1] Loss: 499.15258, Accuracy: 0.057
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1942, 'dropout_prob': 0.061320308395246374, 'label_smoothing': 0.005485139548162462, 'lr': 0.003817606155518297, 'warmup_proportion': 0.09569847139182544, 'weight_decay': 3.043452993656249e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00046, Updated Learning Rate: 0.003715705992208584


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00035, Accuracy: 0.853


Epoch 1/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.0028889241986901756


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.908


Epoch 2/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00011, Updated Learning Rate: 0.0015969478666837682


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.948


Epoch 3/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00009, Updated Learning Rate: 0.00045298726242076667


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.957


Epoch 4/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00008, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:34:26,906] Trial 71 finished with value: 0.000112393319606781 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1942, 'dropout_prob': 0.061320308395246374, 'label_smoothing': 0.005485139548162462, 'lr': 0.003817606155518297, 'warmup_proportion': 0.09569847139182544, 'weight_decay': 3.043452993656249e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.958
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1694, 'dropout_prob': 0.06302729759580003, 'label_smoothing': 0.008858948102414693, 'lr': 0.005827351205432264, 'warmup_proportion': 0.09649740402444897, 'weight_decay': 3.231662262492706e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00047, Updated Learning Rate: 0.005651635065952415


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00039, Accuracy: 0.794


Epoch 1/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.004370513404074198


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.945


Epoch 2/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00011, Updated Learning Rate: 0.0024077211439918807


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.954


Epoch 3/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00009, Updated Learning Rate: 0.0006816705982040993


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.962


Epoch 4/Training:   0%|          | 0/36 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00009, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:37:43,205] Trial 72 finished with value: 0.00011883199065923691 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1694, 'dropout_prob': 0.06302729759580003, 'label_smoothing': 0.008858948102414693, 'lr': 0.005827351205432264, 'warmup_proportion': 0.09649740402444897, 'weight_decay': 3.231662262492706e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.965
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1956, 'dropout_prob': 0.06849107920029406, 'label_smoothing': 0.015629455811888, 'lr': 0.003529193382400453, 'warmup_proportion': 0.08127852216203038, 'weight_decay': 4.045510746461231e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00047, Updated Learning Rate: 0.003410034101843251


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00041, Accuracy: 0.824


Epoch 1/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.0026232573440025126


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00026, Accuracy: 0.902


Epoch 2/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0014403529000514695


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.943


Epoch 3/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0004070325115759073


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.952


Epoch 4/Training:   0%|          | 0/31 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.953


[I 2024-03-11 01:40:27,655] Trial 73 finished with value: 0.0001704613596200943 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1956, 'dropout_prob': 0.06849107920029406, 'label_smoothing': 0.015629455811888, 'lr': 0.003529193382400453, 'warmup_proportion': 0.08127852216203038, 'weight_decay': 4.045510746461231e-06}. Best is trial 32 with value: 7.672419399023056e-05.


{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1769, 'dropout_prob': 0.10661053906196688, 'label_smoothing': 0.005536829671775631, 'lr': 0.007440382344339986, 'warmup_proportion': 0.09392746585129103, 'weight_decay': 2.1655362852746594e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00043, Updated Learning Rate: 0.007192381458397058


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00037, Accuracy: 0.816


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00014, Updated Learning Rate: 0.0055362999843742485


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.939


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00009, Updated Learning Rate: 0.003040996536378743


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.958


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0008595482159670207


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.962


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:43:38,828] Trial 74 finished with value: 9.842046275734901e-05 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1769, 'dropout_prob': 0.10661053906196688, 'label_smoothing': 0.005536829671775631, 'lr': 0.007440382344339986, 'warmup_proportion': 0.09392746585129103, 'weight_decay': 2.1655362852746594e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.963
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1751, 'dropout_prob': 0.1128891922628885, 'label_smoothing': 0.019951781839721707, 'lr': 0.007049327095982044, 'warmup_proportion': 0.09994578514027992, 'weight_decay': 1.5452988117834795e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/35 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00050, Updated Learning Rate: 0.006858351102874662


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00083, Accuracy: 0.614


Epoch 1/Training:   0%|          | 0/35 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.005328886121278733


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.898


Epoch 2/Training:   0%|          | 0/35 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.0029445229954320018


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.953


Epoch 3/Training:   0%|          | 0/35 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0008350489487544695


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.961


Epoch 4/Training:   0%|          | 0/35 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:46:27,999] Trial 75 finished with value: 0.00017520414441823959 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1751, 'dropout_prob': 0.1128891922628885, 'label_smoothing': 0.019951781839721707, 'lr': 0.007049327095982044, 'warmup_proportion': 0.09994578514027992, 'weight_decay': 1.5452988117834795e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.960
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1478, 'dropout_prob': 0.045181094193727775, 'label_smoothing': 0.030958704137732985, 'lr': 0.016093282539320387, 'warmup_proportion': 0.08469556387315039, 'weight_decay': 1.2497648289007854e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/41 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00044, Updated Learning Rate: 0.015520009560906883


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00051, Accuracy: 0.855


Epoch 1/Training:   0%|          | 0/41 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.01190852598666804


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00027, Accuracy: 0.948


Epoch 2/Training:   0%|          | 0/41 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00015, Updated Learning Rate: 0.006527937544952325


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.966


Epoch 3/Training:   0%|          | 0/41 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00013, Updated Learning Rate: 0.0018430576363793608


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.968


Epoch 4/Training:   0%|          | 0/41 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00013, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 01:49:03,701] Trial 76 finished with value: 0.00022508572340011596 and parameters: {'accumulation_steps': 2, 'batchnorm': False, 'batch_size': 1478, 'dropout_prob': 0.045181094193727775, 'label_smoothing': 0.030958704137732985, 'lr': 0.016093282539320387, 'warmup_proportion': 0.08469556387315039, 'weight_decay': 1.2497648289007854e-06}. Best is trial 32 with value: 7.672419399023056e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00023, Accuracy: 0.971
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1818, 'dropout_prob': 0.09099453479465104, 'label_smoothing': 0.00041236524150670356, 'lr': 0.01128750591525955, 'warmup_proportion': 0.08838801161035545, 'weight_decay': 9.558261298622317e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00064, Updated Learning Rate: 0.010911273708698299


Epoch 0/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00025, Accuracy: 0.871


Epoch 1/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00015, Updated Learning Rate: 0.008398898864359246


Epoch 1/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.928


Epoch 2/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00016, Updated Learning Rate: 0.00461337399129369


Epoch 2/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00006, Accuracy: 0.968


Epoch 3/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00011, Updated Learning Rate: 0.0013039861559747826


Epoch 3/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.971


Epoch 4/Training:   0%|          | 0/34 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/6 [00:00<?, ?batch/s]

[I 2024-03-11 01:51:54,048] Trial 77 finished with value: 4.9139298126101495e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1818, 'dropout_prob': 0.09099453479465104, 'label_smoothing': 0.00041236524150670356, 'lr': 0.01128750591525955, 'warmup_proportion': 0.08838801161035545, 'weight_decay': 9.558261298622317e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.974
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1393, 'dropout_prob': 0.09019808641768803, 'label_smoothing': 0.00027204710879494927, 'lr': 0.011100221041937443, 'warmup_proportion': 0.0640075239753762, 'weight_decay': 2.048117691844822e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00064, Updated Learning Rate: 0.01056108328800287


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.947


Epoch 1/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00016, Updated Learning Rate: 0.007974621251909293


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00007, Accuracy: 0.971


Epoch 2/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.004326943704797737


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00006, Accuracy: 0.973


Epoch 3/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.0012146498874086558


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.980


Epoch 4/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00009, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 01:54:54,440] Trial 78 finished with value: 5.441967248916626e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1393, 'dropout_prob': 0.09019808641768803, 'label_smoothing': 0.00027204710879494927, 'lr': 0.011100221041937443, 'warmup_proportion': 0.0640075239753762, 'weight_decay': 2.048117691844822e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] Loss: 0.00005, Accuracy: 0.978
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1383, 'dropout_prob': 0.16556461055700467, 'label_smoothing': 0.01424757139731482, 'lr': 0.011290478236395417, 'warmup_proportion': 0.08885295093197397, 'weight_decay': 2.191987990444896e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00073, Updated Learning Rate: 0.010894049707514465


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.950


Epoch 1/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00025, Updated Learning Rate: 0.008364853804572182


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.968


Epoch 2/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00020, Updated Learning Rate: 0.0045874267910790655


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.967


Epoch 3/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00018, Updated Learning Rate: 0.0012955076189904557


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.973


Epoch 4/Training:   0%|          | 0/44 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00017, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 01:57:04,504] Trial 79 finished with value: 0.00015299172848463058 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1383, 'dropout_prob': 0.16556461055700467, 'label_smoothing': 0.01424757139731482, 'lr': 0.011290478236395417, 'warmup_proportion': 0.08885295093197397, 'weight_decay': 2.191987990444896e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.979
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1590, 'dropout_prob': 0.1484471288141354, 'label_smoothing': 0.006203593404504267, 'lr': 0.014213681860476713, 'warmup_proportion': 0.0401314436840497, 'weight_decay': 2.1401542058879345e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00054, Updated Learning Rate: 0.013281889510164898


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.940


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.009854819965336385


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.962


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00014, Updated Learning Rate: 0.005287211845276396


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.970


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00012, Updated Learning Rate: 0.0014748340735943153


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00009, Accuracy: 0.981


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00011, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 01:59:36,993] Trial 80 finished with value: 8.808303549885749e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1590, 'dropout_prob': 0.1484471288141354, 'label_smoothing': 0.006203593404504267, 'lr': 0.014213681860476713, 'warmup_proportion': 0.0401314436840497, 'weight_decay': 2.1401542058879345e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00009, Accuracy: 0.981
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1278, 'dropout_prob': 0.1330949043780857, 'label_smoothing': 0.0001751482485685057, 'lr': 0.015027348719658981, 'warmup_proportion': 0.04111802111306783, 'weight_decay': 1.4156428486034392e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00062, Updated Learning Rate: 0.014046776117654605


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.935


Epoch 1/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00017, Updated Learning Rate: 0.010425340285788945


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.939


Epoch 2/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.00559433811810013


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00006, Accuracy: 0.974


Epoch 3/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.0015606679809931434


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.978


Epoch 4/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00009, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 02:01:48,723] Trial 81 finished with value: 5.434997007250786e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1278, 'dropout_prob': 0.1330949043780857, 'label_smoothing': 0.0001751482485685057, 'lr': 0.015027348719658981, 'warmup_proportion': 0.04111802111306783, 'weight_decay': 1.4156428486034392e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] Loss: 0.00005, Accuracy: 0.978
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1240, 'dropout_prob': 0.15101485052790634, 'label_smoothing': 0.006079533570475484, 'lr': 0.019555245558801237, 'warmup_proportion': 0.041549708806068016, 'weight_decay': 1.5861836771535662e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00060, Updated Learning Rate: 0.01831016267197363


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.935


Epoch 1/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.013610124432732927


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.964


Epoch 2/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00017, Updated Learning Rate: 0.0073104431348988775


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.972


Epoch 3/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00014, Updated Learning Rate: 0.0020405348546964036


Epoch 3/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.979


Epoch 4/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00013, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 02:04:06,773] Trial 82 finished with value: 0.0001076039381325245 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1240, 'dropout_prob': 0.15101485052790634, 'label_smoothing': 0.006079533570475484, 'lr': 0.019555245558801237, 'warmup_proportion': 0.041549708806068016, 'weight_decay': 1.5861836771535662e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.979
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1299, 'dropout_prob': 0.10640080167532477, 'label_smoothing': 0.021862240952618444, 'lr': 0.015570401585579645, 'warmup_proportion': 0.03610467147737083, 'weight_decay': 1.0176268466175279e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00062, Updated Learning Rate: 0.01450927331762368


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.937


Epoch 1/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00028, Updated Learning Rate: 0.010739454438713714


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.971


Epoch 2/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00024, Updated Learning Rate: 0.005752825749528555


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.975


Epoch 3/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00022, Updated Learning Rate: 0.0016032998237472435


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.975


Epoch 4/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00021, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 02:06:12,862] Trial 83 finished with value: 0.00018890149295330047 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1299, 'dropout_prob': 0.10640080167532477, 'label_smoothing': 0.021862240952618444, 'lr': 0.015570401585579645, 'warmup_proportion': 0.03610467147737083, 'weight_decay': 1.0176268466175279e-06}. Best is trial 77 with value: 4.9139298126101495e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.978
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1609, 'dropout_prob': 0.12931721102481436, 'label_smoothing': 8.83890730090731e-07, 'lr': 0.028147621640854794, 'warmup_proportion': 0.04584907346713653, 'weight_decay': 2.1415275778699216e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00045, Updated Learning Rate: 0.026402075025726137


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00012, Accuracy: 0.945


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00013, Updated Learning Rate: 0.019656468746473504


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.960


Epoch 2/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.010569064550042183


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00006, Accuracy: 0.973


Epoch 3/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.0029518216325127794


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.977


Epoch 4/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00007, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 02:08:22,236] Trial 84 finished with value: 4.6606420353055e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1609, 'dropout_prob': 0.12931721102481436, 'label_smoothing': 8.83890730090731e-07, 'lr': 0.028147621640854794, 'warmup_proportion': 0.04584907346713653, 'weight_decay': 2.1415275778699216e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] Loss: 0.00005, Accuracy: 0.978
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1412, 'dropout_prob': 0.1344738440515692, 'label_smoothing': 0.012565473661815428, 'lr': 0.028152168453605124, 'warmup_proportion': 0.04487391523795066, 'weight_decay': 1.3023121195979873e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00057, Updated Learning Rate: 0.026390208778437323


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.926


Epoch 1/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.019636660795594955


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00016, Accuracy: 0.972


Epoch 2/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00018, Updated Learning Rate: 0.01055461487496293


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.973


Epoch 3/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00016, Updated Learning Rate: 0.0029471883615642147


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.980


Epoch 4/Training:   0%|          | 0/43 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00015, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 02:10:29,759] Trial 85 finished with value: 0.00013243851214647293 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1412, 'dropout_prob': 0.1344738440515692, 'label_smoothing': 0.012565473661815428, 'lr': 0.028152168453605124, 'warmup_proportion': 0.04487391523795066, 'weight_decay': 1.3023121195979873e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] Loss: 0.00013, Accuracy: 0.979
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1127, 'dropout_prob': 0.17956987689599113, 'label_smoothing': 0.007105180504132021, 'lr': 0.06745294746314857, 'warmup_proportion': 0.049586309260943755, 'weight_decay': 1.970315313608002e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/54 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00056, Updated Learning Rate: 0.06347056808693587


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.953


Epoch 1/Training:   0%|          | 0/54 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00023, Updated Learning Rate: 0.04739383426331222


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.961


Epoch 2/Training:   0%|          | 0/54 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00018, Updated Learning Rate: 0.025531609076773664


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.977


Epoch 3/Training:   0%|          | 0/54 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00016, Updated Learning Rate: 0.007138328648845122


Epoch 3/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.981


Epoch 4/Training:   0%|          | 0/54 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00014, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 02:12:36,772] Trial 86 finished with value: 0.00010937424078583718 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1127, 'dropout_prob': 0.17956987689599113, 'label_smoothing': 0.007105180504132021, 'lr': 0.06745294746314857, 'warmup_proportion': 0.049586309260943755, 'weight_decay': 1.970315313608002e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00011, Accuracy: 0.982
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1014, 'dropout_prob': 0.1226996516311242, 'label_smoothing': 0.0005011892549716046, 'lr': 0.036302127256330224, 'warmup_proportion': 0.06364242819916421, 'weight_decay': 2.0680402907745966e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00063, Updated Learning Rate: 0.03450460687645781


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.931


Epoch 1/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00021, Updated Learning Rate: 0.0260265149841141


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00015, Accuracy: 0.956


Epoch 2/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00015, Updated Learning Rate: 0.014112071997325323


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.970


Epoch 3/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00012, Updated Learning Rate: 0.003959990643385249


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00007, Accuracy: 0.978


Epoch 4/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00010, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-11 02:15:02,289] Trial 87 finished with value: 6.671954840421677e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1014, 'dropout_prob': 0.1226996516311242, 'label_smoothing': 0.0005011892549716046, 'lr': 0.036302127256330224, 'warmup_proportion': 0.06364242819916421, 'weight_decay': 2.0680402907745966e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00007, Accuracy: 0.979
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1027, 'dropout_prob': 0.11985673508254018, 'label_smoothing': 0.23850194634012017, 'lr': 0.03715924455621446, 'warmup_proportion': 0.06002000726607524, 'weight_decay': 2.88094601225188e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00142, Updated Learning Rate: 0.035186480363961256


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00122, Accuracy: 0.951


Epoch 1/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00119, Updated Learning Rate: 0.026437014495890917


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00116, Accuracy: 0.966


Epoch 2/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00116, Updated Learning Rate: 0.014298663650893


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00114, Accuracy: 0.976


Epoch 3/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00114, Updated Learning Rate: 0.00400668574700645


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00113, Accuracy: 0.979


Epoch 4/Training:   0%|          | 0/59 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00113, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-11 02:17:05,768] Trial 88 finished with value: 0.0011234356999397278 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1027, 'dropout_prob': 0.11985673508254018, 'label_smoothing': 0.23850194634012017, 'lr': 0.03715924455621446, 'warmup_proportion': 0.06002000726607524, 'weight_decay': 2.88094601225188e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00112, Accuracy: 0.979
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1542, 'dropout_prob': 0.14588098218295464, 'label_smoothing': 0.000730932224792607, 'lr': 0.04105490292483175, 'warmup_proportion': 0.051794173761270816, 'weight_decay': 1.3914106054110367e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00045, Updated Learning Rate: 0.03875347218736715


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00033, Accuracy: 0.877


Epoch 1/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00014, Updated Learning Rate: 0.029025869932212806


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00009, Accuracy: 0.962


Epoch 2/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00010, Updated Learning Rate: 0.015667281313263864


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00005, Accuracy: 0.979


Epoch 3/Training:   0%|          | 0/39 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00008, Updated Learning Rate: 0.004385221592016308


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 02:18:46,714] Trial 89 finished with value: 5.446032136678696e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1542, 'dropout_prob': 0.14588098218295464, 'label_smoothing': 0.000730932224792607, 'lr': 0.04105490292483175, 'warmup_proportion': 0.051794173761270816, 'weight_decay': 1.3914106054110367e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 3] Loss: 0.00005, Accuracy: 0.979
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1218, 'dropout_prob': 0.14544103903577663, 'label_smoothing': 0.0004250191535122064, 'lr': 0.04895058704616252, 'warmup_proportion': 0.04656483952580117, 'weight_decay': 1.4051783210808624e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00052, Updated Learning Rate: 0.045935596105459664


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00084, Accuracy: 0.798


Epoch 1/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.03421339177895887


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00008, Accuracy: 0.976


Epoch 2/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00012, Updated Learning Rate: 0.018401053313675093


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00007, Accuracy: 0.976


Epoch 3/Training:   0%|          | 0/50 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00010, Updated Learning Rate: 0.005139978616208111


Epoch 3/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 02:21:30,761] Trial 90 finished with value: 7.076191119849682e-05 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1218, 'dropout_prob': 0.14544103903577663, 'label_smoothing': 0.0004250191535122064, 'lr': 0.04895058704616252, 'warmup_proportion': 0.04656483952580117, 'weight_decay': 1.4051783210808624e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 3] Loss: 0.00007, Accuracy: 0.974
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1174, 'dropout_prob': 0.14599915242657968, 'label_smoothing': 0.0003123981787481759, 'lr': 0.03531056475481153, 'warmup_proportion': 0.052252020555818984, 'weight_decay': 1.4004933112543e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/52 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00055, Updated Learning Rate: 0.03327210818152333


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.933


Epoch 1/Training:   0%|          | 0/52 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00018, Updated Learning Rate: 0.02487750467840632


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00009, Accuracy: 0.970


Epoch 2/Training:   0%|          | 0/52 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00013, Updated Learning Rate: 0.013413258897067517


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 02:22:49,533] Trial 91 finished with value: 0.00015124109983444213 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1174, 'dropout_prob': 0.14599915242657968, 'label_smoothing': 0.0003123981787481759, 'lr': 0.03531056475481153, 'warmup_proportion': 0.052252020555818984, 'weight_decay': 1.4004933112543e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 2] Loss: 0.00015, Accuracy: 0.949
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1007, 'dropout_prob': 0.1753022615660737, 'label_smoothing': 0.015025790070462843, 'lr': 0.046274419795688616, 'warmup_proportion': 0.03816183693898994, 'weight_decay': 1.7230050771506873e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00065, Updated Learning Rate: 0.04317462144207024


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00032, Accuracy: 0.940


Epoch 1/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00030, Updated Learning Rate: 0.031991436796902896


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.974


Epoch 2/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00025, Updated Learning Rate: 0.01714885932574765


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.977


Epoch 3/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00022, Updated Learning Rate: 0.004781227140370101


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.983


Epoch 4/Training:   0%|          | 0/60 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00021, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-11 02:24:54,990] Trial 92 finished with value: 0.00017207796722650527 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1007, 'dropout_prob': 0.1753022615660737, 'label_smoothing': 0.015025790070462843, 'lr': 0.046274419795688616, 'warmup_proportion': 0.03816183693898994, 'weight_decay': 1.7230050771506873e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.983
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1245, 'dropout_prob': 0.19163072717145666, 'label_smoothing': 0.00579984198477846, 'lr': 0.05087704994205054, 'warmup_proportion': 0.04765431539396159, 'weight_decay': 1.131120556720482e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00053, Updated Learning Rate: 0.047776582105869714


Epoch 0/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.943


Epoch 1/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00022, Updated Learning Rate: 0.03560744552818607


Epoch 1/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00020, Accuracy: 0.943


Epoch 2/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00016, Updated Learning Rate: 0.019158742790386096


Epoch 2/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.981


Epoch 3/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00014, Updated Learning Rate: 0.005352871527382155


Epoch 3/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.982


Epoch 4/Training:   0%|          | 0/49 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00012, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/9 [00:00<?, ?batch/s]

[I 2024-03-11 02:27:35,975] Trial 93 finished with value: 0.00010051075294613838 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1245, 'dropout_prob': 0.19163072717145666, 'label_smoothing': 0.00579984198477846, 'lr': 0.05087704994205054, 'warmup_proportion': 0.04765431539396159, 'weight_decay': 1.131120556720482e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00010, Accuracy: 0.982
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1526, 'dropout_prob': 0.15507945728762118, 'label_smoothing': 0.026144513538877725, 'lr': 0.0472792832862768, 'warmup_proportion': 0.04355204138242804, 'weight_decay': 1.466634472382698e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00053, Updated Learning Rate: 0.044272255269991664


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00033, Accuracy: 0.912


Epoch 1/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00025, Updated Learning Rate: 0.03291017770274255


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.963


Epoch 2/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00021, Updated Learning Rate: 0.017677885831472763


Epoch 2/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.978


Epoch 3/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00020, Updated Learning Rate: 0.004934476207253098


Epoch 3/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.982


Epoch 4/Training:   0%|          | 0/40 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00019, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 02:29:41,127] Trial 94 finished with value: 0.00017635799646377565 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1526, 'dropout_prob': 0.15507945728762118, 'label_smoothing': 0.026144513538877725, 'lr': 0.0472792832862768, 'warmup_proportion': 0.04355204138242804, 'weight_decay': 1.466634472382698e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] Loss: 0.00018, Accuracy: 0.979
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1077, 'dropout_prob': 0.12335619173633919, 'label_smoothing': 0.014040408215465799, 'lr': 0.02553775723719236, 'warmup_proportion': 0.06354969074060182, 'weight_decay': 2.2122013936583115e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00070, Updated Learning Rate: 0.024235010059777563


Epoch 0/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00031, Accuracy: 0.935


Epoch 1/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00027, Updated Learning Rate: 0.018249894535788666


Epoch 1/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00022, Accuracy: 0.970


Epoch 2/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00023, Updated Learning Rate: 0.00988490388057156


Epoch 2/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.980


Epoch 3/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00020, Updated Learning Rate: 0.002772144538273329


Epoch 3/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.982


Epoch 4/Training:   0%|          | 0/56 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00019, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/10 [00:00<?, ?batch/s]

[I 2024-03-11 02:31:47,560] Trial 95 finished with value: 0.00017212972193956375 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1077, 'dropout_prob': 0.12335619173633919, 'label_smoothing': 0.014040408215465799, 'lr': 0.02553775723719236, 'warmup_proportion': 0.06354969074060182, 'weight_decay': 2.2122013936583115e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00017, Accuracy: 0.983
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1357, 'dropout_prob': 0.1405084330328094, 'label_smoothing': 0.020636429042130255, 'lr': 0.01949452294128528, 'warmup_proportion': 0.05750315146067203, 'weight_decay': 1.967939745099535e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00061, Updated Learning Rate: 0.018418981755779756


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.945


Epoch 1/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00026, Updated Learning Rate: 0.013808361089906403


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.969


Epoch 2/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00022, Updated Learning Rate: 0.007457765766610055


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00021, Accuracy: 0.971


Epoch 3/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00020, Updated Learning Rate: 0.0020881029189415055


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00019, Accuracy: 0.975


Epoch 4/Training:   0%|          | 0/45 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00020, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 02:35:13,603] Trial 96 finished with value: 0.00018337733000516892 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1357, 'dropout_prob': 0.1405084330328094, 'label_smoothing': 0.020636429042130255, 'lr': 0.01949452294128528, 'warmup_proportion': 0.05750315146067203, 'weight_decay': 1.967939745099535e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.979
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1585, 'dropout_prob': 0.09148700728392728, 'label_smoothing': 0.010905647870567836, 'lr': 0.04146692352903154, 'warmup_proportion': 0.041519777841391346, 'weight_decay': 2.545948198339146e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00146, Updated Learning Rate: 0.038748517241723464


Epoch 0/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.07844, Accuracy: 0.117


Epoch 1/Training:   0%|          | 0/38 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00146, Updated Learning Rate: 0.028750401894901523


Epoch 1/Testing:   0%|          | 0/7 [00:00<?, ?batch/s]

[I 2024-03-11 02:36:14,017] Trial 97 finished with value: 4.390048291015625 and parameters: {'accumulation_steps': 1, 'batchnorm': True, 'batch_size': 1585, 'dropout_prob': 0.09148700728392728, 'label_smoothing': 0.010905647870567836, 'lr': 0.04146692352903154, 'warmup_proportion': 0.041519777841391346, 'weight_decay': 2.545948198339146e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 1] Loss: 4.39005, Accuracy: 0.117
Early stopping triggered.
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 915, 'dropout_prob': 0.13049371377289745, 'label_smoothing': 0.00592473410669996, 'lr': 0.06597907541445124, 'warmup_proportion': 0.06772061854474638, 'weight_decay': 1.0118648237154745e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/66 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00072, Updated Learning Rate: 0.06283648848352925


Epoch 0/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00030, Accuracy: 0.935


Epoch 1/Training:   0%|          | 0/66 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00030, Updated Learning Rate: 0.047498379911508154


Epoch 1/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.952


Epoch 2/Training:   0%|          | 0/66 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00023, Updated Learning Rate: 0.025789798473896224


Epoch 2/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00018, Accuracy: 0.968


Epoch 3/Training:   0%|          | 0/66 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00019, Updated Learning Rate: 0.007242444016319718


Epoch 3/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00014, Accuracy: 0.977


Epoch 4/Training:   0%|          | 0/66 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00017, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/11 [00:00<?, ?batch/s]

[I 2024-03-11 02:38:21,643] Trial 98 finished with value: 0.00012755308225750924 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 915, 'dropout_prob': 0.13049371377289745, 'label_smoothing': 0.00592473410669996, 'lr': 0.06597907541445124, 'warmup_proportion': 0.06772061854474638, 'weight_decay': 1.0118648237154745e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00013, Accuracy: 0.982
{'epochs': 5, 'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1287, 'dropout_prob': 0.16847003099338972, 'label_smoothing': 0.03425209877627631, 'lr': 0.05775113013921722, 'warmup_proportion': 0.0540230239045516, 'weight_decay': 1.5959801171624955e-06}




[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.


Epoch 0/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 0] Average Loss: 0.00062, Updated Learning Rate: 0.05447276270308113


Epoch 0/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 0] New best model found! Saving checkpoint. Loss: 0.00035, Accuracy: 0.952


Epoch 1/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 1] Average Loss: 0.00033, Updated Learning Rate: 0.04076944636553103


Epoch 1/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 1] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.970


Epoch 2/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 2] Average Loss: 0.00029, Updated Learning Rate: 0.021995713609575716


Epoch 2/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 2] New best model found! Saving checkpoint. Loss: 0.00028, Accuracy: 0.965


Epoch 3/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 3] Average Loss: 0.00027, Updated Learning Rate: 0.006154893242034694


Epoch 3/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[Test][Epoch 3] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.977


Epoch 4/Training:   0%|          | 0/47 [00:00<?, ?batch/s]

[Train][Epoch 4] Average Loss: 0.00026, Updated Learning Rate: 0.0


Epoch 4/Testing:   0%|          | 0/8 [00:00<?, ?batch/s]

[I 2024-03-11 02:40:27,976] Trial 99 finished with value: 0.00023607323169708253 and parameters: {'accumulation_steps': 1, 'batchnorm': False, 'batch_size': 1287, 'dropout_prob': 0.16847003099338972, 'label_smoothing': 0.03425209877627631, 'lr': 0.05775113013921722, 'warmup_proportion': 0.0540230239045516, 'weight_decay': 1.5959801171624955e-06}. Best is trial 84 with value: 4.6606420353055e-05.


[Test][Epoch 4] New best model found! Saving checkpoint. Loss: 0.00024, Accuracy: 0.980


In [31]:
# Print the best hyperparameters and the corresponding value
print('Best trial:')
trial = study.best_trial

print('Value: {}'.format(trial.value))
print('Params: ')
for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))

Best trial:
Value: 4.6606420353055e-05
Params: 
    accumulation_steps: 1
    batchnorm: False
    batch_size: 1609
    dropout_prob: 0.12931721102481436
    label_smoothing: 8.83890730090731e-07
    lr: 0.028147621640854794
    warmup_proportion: 0.04584907346713653
    weight_decay: 2.1415275778699216e-06
