In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets

from resnet import ResNet20

import os
import torch
import torch.optim as optim
import torch.nn as nn
from datetime import datetime

from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [39]:
hyperparameters = {
    'sparsity_type': "nm", # nm / entropy / feather / spartan / ses / base (no sparsity)
    'epochs': 200,
    'lr': 0.1,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'batch_size': 128,
}

In [40]:
transform_train = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.201]),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.201]),
])


train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=hyperparameters['batch_size'], shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=hyperparameters['batch_size'], shuffle=False, num_workers=2)

In [41]:
class Pruner:
    def __init__(self, model, sparsity_ratio=0.5):
        self.sparsity_ratio = 0.5
        self.model = model

    def apply_nm_sparsity(self):
        """
        Apply N:M sparsity (2:4 pattern) to the ResNet-20 model.
        This applies after training is completed and should only affect inference time.
        """
        for name, module in self.model.named_modules():
            if isinstance(module, nn.Conv2d):
                # For each convolutional layer, apply 2:4 sparsity
                weight = module.weight.data
                num_filters = weight.size(0)  # Number of filters in the convolution
                
                # For every 4 weights, we keep the 2 largest magnitude weights and zero out the others
                for i in range(num_filters):
                    # Get the weights of the i-th filter
                    filter_weights = weight[i].view(-1)  # Flatten the filter weights
                    topk_values, topk_indices = torch.topk(torch.abs(filter_weights),
                                                           int(len(filter_weights) * self.sparsity_ratio))
                    
                    # Set the smallest 2 values in each 4-group to zero
                    threshold = topk_values[-1]  # The smallest non-zero magnitude in the top-k
                    weight[i].data[torch.abs(weight[i]) < threshold] = 0  # Prune the less important weights
                
                print(f"Applied N:M sparsity (2:4) on {name} filters")

    def print_sparsity(self):
        tot_params = 0
        tot_zeros = 0

        for name, module in self.model.named_modules():
            if isinstance(module, nn.Conv2d):
                weight = module.weight.data
                n_params = weight.numel()
                n_zeros = torch.sum(weight == 0).item()

                tot_params += n_params
                tot_zeros += n_zeros

                print(f"{name}: Total Params = {n_params}. Total zero params = {n_zeros}. Sparsity = {n_zeros / n_params:.2%}")
            

In [42]:
def train(model, train_loader, criterion, optimizer, epoch, log_file):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}", ncols=100)
    
    for batch_idx, (inputs, targets) in enumerate(pbar):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(targets).sum().item()
        total += targets.size(0)

        pbar.set_postfix(loss=running_loss/(batch_idx+1), accuracy=100.0 * correct / total)

    avg_loss = running_loss / len(train_loader)
    accuracy = 100.0 * correct / total
    log_file.write(f'Epoch [{epoch+1}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%\n')

    return avg_loss, accuracy


In [43]:
def test(model, test_loader, criterion, log_file):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    pbar = tqdm(test_loader, desc="Testing", ncols=100)
    
    with torch.no_grad():
        for inputs, targets in pbar:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()

            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
            total += targets.size(0)

            pbar.set_postfix(loss=test_loss/(total + inputs.size(0)), accuracy=100.0 * correct / total)

    avg_test_loss = test_loss / len(test_loader)
    accuracy = 100.0 * correct / total
    log_file.write(f'Test Loss: {avg_test_loss:.4f}, Accuracy: {accuracy:.2f}%\n')

    return avg_test_loss, accuracy

In [44]:
resnet20_model = ResNet20()
resnet20_model.to(device)

ResNet20(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (residual_layers): Sequential(
    (0): ResBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bnorm2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (residual): Sequential()
    )
    (1): ResBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bnorm2): BatchNorm2d(16, eps=1e-05,

In [45]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet20_model.parameters(), lr=hyperparameters['lr'], 
                      momentum=hyperparameters['momentum'], weight_decay=hyperparameters['weight_decay'])

In [46]:
current_learning_rate = hyperparameters['lr']
DECAY = 0.1
DECAY_EPOCHS = [100, 150]
EPOCHS = 200

In [47]:
sparsity_type = hyperparameters['sparsity_type']
output_base_path = "/home/sg666/Class/ECE661/outputs"
sparsity_folder_path = os.path.join(output_base_path, sparsity_type)

hyperparameter_str = f"epochs_{hyperparameters['epochs']}_lr_{hyperparameters['lr']}_momentum_{hyperparameters['momentum']}_wd_{hyperparameters['weight_decay']}_batch_{hyperparameters['batch_size']}"
output_folder = os.path.join(sparsity_folder_path, hyperparameter_str)

os.makedirs(output_folder, exist_ok=True)

hyperparameter_file = os.path.join(output_folder, 'hyperparameters.txt')
with open(hyperparameter_file, 'w') as f:
    for key, value in hyperparameters.items():
        f.write(f"{key}: {value}\n")

log_file_path = os.path.join(output_folder, 'training_log.txt')

In [48]:
pruner = Pruner(resnet20_model, sparsity_ratio=0.5)

In [49]:
# Apply N:M sparsity on the trained base ResNet-20, then train model again with sparsity
resnet20_model.load_state_dict(torch.load("/home/sg666/Class/ECE661/outputs/base/epochs_200_lr_0.1_momentum_0.9_wd_0.0005_batch_128/best_model.pth"))
pruner.apply_nm_sparsity()
pruner.print_sparsity()

Applied N:M sparsity (2:4) on conv1 filters
Applied N:M sparsity (2:4) on residual_layers.0.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.0.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.1.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.1.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.2.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.2.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.3.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.3.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.3.residual.0 filters
Applied N:M sparsity (2:4) on residual_layers.4.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.4.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.5.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.5.conv2 filters
Applied N:M sparsity (2:4) on residual_layers.6.conv1 filters
Applied N:M sparsity (2:4) on residual_layers.6.conv2 filters
Applied N:M sparsity 

In [50]:
with open(log_file_path, 'w') as log_file:
    log_file.write(f"Training started at {datetime.now()}\n")

    best_accuracy = 0.0
    
    for epoch in range(hyperparameters['epochs']):
        train_loss, train_accuracy = train(resnet20_model, train_loader, criterion, optimizer, epoch, log_file)
        test_loss, test_accuracy = test(resnet20_model, test_loader, criterion, log_file)

        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            model_checkpoint_path = os.path.join(output_folder, f"model_best.pth")
            torch.save(resnet20_model.state_dict(), model_checkpoint_path)
            print(f"Saved best model at epoch {epoch+1} with accuracy: {best_accuracy:.2f}%")


        if epoch+1 in DECAY_EPOCHS:
            current_learning_rate = current_learning_rate * DECAY
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_learning_rate
            print("Current learning rate has decayed to %f" %current_learning_rate)

    log_file.write(f"Training completed at {datetime.now()}\n")
    log_file.write(f"Best model accuracy: {best_accuracy:.2f}%\n")


Epoch 1: 100%|█████████████████████████| 391/391 [00:06<00:00, 56.84it/s, accuracy=67.8, loss=0.931]
Testing: 100%|██████████████████████████| 79/79 [00:00<00:00, 79.95it/s, accuracy=61.6, loss=0.0103]


Saved best model at epoch 1 with accuracy: 61.60%


Epoch 2: 100%|███████████████████████████| 391/391 [00:07<00:00, 50.47it/s, accuracy=78, loss=0.643]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.45it/s, accuracy=74.7, loss=0.00606]


Saved best model at epoch 2 with accuracy: 74.71%


Epoch 3: 100%|█████████████████████████| 391/391 [00:07<00:00, 49.15it/s, accuracy=80.7, loss=0.568]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 69.12it/s, accuracy=76.4, loss=0.00597]


Saved best model at epoch 3 with accuracy: 76.44%


Epoch 4: 100%|█████████████████████████| 391/391 [00:08<00:00, 48.62it/s, accuracy=81.8, loss=0.531]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.42it/s, accuracy=73.5, loss=0.00686]
Epoch 5: 100%|██████████████████████████| 391/391 [00:07<00:00, 51.41it/s, accuracy=82.5, loss=0.51]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.98it/s, accuracy=75.7, loss=0.00574]
Epoch 6: 100%|█████████████████████████| 391/391 [00:08<00:00, 48.48it/s, accuracy=83.2, loss=0.492]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 70.22it/s, accuracy=75.1, loss=0.00591]
Epoch 7: 100%|█████████████████████████| 391/391 [00:07<00:00, 49.20it/s, accuracy=83.4, loss=0.484]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 68.88it/s, accuracy=69.5, loss=0.00749]
Epoch 8: 100%|█████████████████████████| 391/391 [00:08<00:00, 45.69it/s, accuracy=83.8, loss=0.473]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 66.95it/s, accuracy=79.1, loss

Saved best model at epoch 8 with accuracy: 79.14%


Epoch 9: 100%|█████████████████████████| 391/391 [00:08<00:00, 46.81it/s, accuracy=83.8, loss=0.468]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 73.88it/s, accuracy=78.8, loss=0.00496]
Epoch 10: 100%|████████████████████████| 391/391 [00:08<00:00, 47.65it/s, accuracy=84.2, loss=0.461]
Testing: 100%|███████████████████████████| 79/79 [00:00<00:00, 79.26it/s, accuracy=80, loss=0.00486]


Saved best model at epoch 10 with accuracy: 79.99%


Epoch 11: 100%|████████████████████████| 391/391 [00:07<00:00, 51.91it/s, accuracy=84.1, loss=0.457]
Testing: 100%|██████████████████████████| 79/79 [00:01<00:00, 64.80it/s, accuracy=75.5, loss=0.0064]
Epoch 12: 100%|████████████████████████| 391/391 [00:08<00:00, 46.20it/s, accuracy=84.5, loss=0.453]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 79.20it/s, accuracy=78.5, loss=0.00502]
Epoch 13: 100%|████████████████████████| 391/391 [00:08<00:00, 48.14it/s, accuracy=84.4, loss=0.452]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.03it/s, accuracy=74.8, loss=0.00667]
Epoch 14: 100%|████████████████████████| 391/391 [00:08<00:00, 47.86it/s, accuracy=84.6, loss=0.451]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 67.33it/s, accuracy=78.1, loss=0.00531]
Epoch 15: 100%|████████████████████████| 391/391 [00:07<00:00, 49.17it/s, accuracy=84.7, loss=0.445]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 66.07it/s, accuracy=80.8, loss

Saved best model at epoch 15 with accuracy: 80.80%


Epoch 16: 100%|████████████████████████| 391/391 [00:08<00:00, 48.20it/s, accuracy=84.7, loss=0.446]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.99it/s, accuracy=79.6, loss=0.00517]
Epoch 17: 100%|██████████████████████████| 391/391 [00:08<00:00, 46.75it/s, accuracy=85, loss=0.443]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.22it/s, accuracy=80.6, loss=0.00459]
Epoch 18: 100%|████████████████████████| 391/391 [00:07<00:00, 49.45it/s, accuracy=84.7, loss=0.443]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.23it/s, accuracy=79.9, loss=0.00489]
Epoch 19: 100%|████████████████████████| 391/391 [00:07<00:00, 49.24it/s, accuracy=85.1, loss=0.435]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 69.63it/s, accuracy=76.8, loss=0.00555]
Epoch 20: 100%|████████████████████████| 391/391 [00:07<00:00, 52.03it/s, accuracy=84.6, loss=0.448]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.91it/s, accuracy=75.5, loss

Saved best model at epoch 30 with accuracy: 83.80%


Epoch 31: 100%|████████████████████████| 391/391 [00:08<00:00, 48.01it/s, accuracy=85.2, loss=0.428]
Testing: 100%|███████████████████████████| 79/79 [00:00<00:00, 85.77it/s, accuracy=78, loss=0.00533]
Epoch 32: 100%|█████████████████████████| 391/391 [00:07<00:00, 52.00it/s, accuracy=85.2, loss=0.43]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 77.06it/s, accuracy=75.9, loss=0.00609]
Epoch 33: 100%|████████████████████████| 391/391 [00:08<00:00, 48.10it/s, accuracy=85.3, loss=0.425]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 76.53it/s, accuracy=70.7, loss=0.00755]
Epoch 34: 100%|████████████████████████| 391/391 [00:08<00:00, 44.54it/s, accuracy=85.6, loss=0.421]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.64it/s, accuracy=74.7, loss=0.00653]
Epoch 35: 100%|█████████████████████████| 391/391 [00:08<00:00, 45.20it/s, accuracy=85.3, loss=0.43]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.49it/s, accuracy=70.5, loss

Current learning rate has decayed to 0.010000


Epoch 101: 100%|███████████████████████| 391/391 [00:09<00:00, 43.39it/s, accuracy=91.2, loss=0.264]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.85it/s, accuracy=90.2, loss=0.00228]


Saved best model at epoch 101 with accuracy: 90.15%


Epoch 102: 100%|████████████████████████| 391/391 [00:07<00:00, 50.93it/s, accuracy=92.6, loss=0.22]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.50it/s, accuracy=90.3, loss=0.00222]


Saved best model at epoch 102 with accuracy: 90.33%


Epoch 103: 100%|███████████████████████| 391/391 [00:08<00:00, 47.59it/s, accuracy=93.1, loss=0.205]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.23it/s, accuracy=90.8, loss=0.00218]


Saved best model at epoch 103 with accuracy: 90.79%


Epoch 104: 100%|███████████████████████| 391/391 [00:08<00:00, 47.35it/s, accuracy=93.3, loss=0.195]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.73it/s, accuracy=90.8, loss=0.00216]


Saved best model at epoch 104 with accuracy: 90.85%


Epoch 105: 100%|███████████████████████| 391/391 [00:08<00:00, 47.99it/s, accuracy=93.8, loss=0.184]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 73.86it/s, accuracy=90.8, loss=0.00219]
Epoch 106: 100%|█████████████████████████| 391/391 [00:07<00:00, 50.23it/s, accuracy=94, loss=0.175]
Testing: 100%|██████████████████████████| 79/79 [00:01<00:00, 72.56it/s, accuracy=90.7, loss=0.0022]
Epoch 107: 100%|████████████████████████| 391/391 [00:07<00:00, 49.76it/s, accuracy=94.2, loss=0.17]
Testing: 100%|███████████████████████████| 79/79 [00:01<00:00, 75.09it/s, accuracy=91, loss=0.00216]


Saved best model at epoch 107 with accuracy: 90.95%


Epoch 108: 100%|███████████████████████| 391/391 [00:08<00:00, 46.75it/s, accuracy=94.4, loss=0.166]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 67.97it/s, accuracy=90.7, loss=0.00219]
Epoch 109: 100%|███████████████████████| 391/391 [00:07<00:00, 49.17it/s, accuracy=94.4, loss=0.161]
Testing: 100%|███████████████████████████| 79/79 [00:01<00:00, 69.41it/s, accuracy=91, loss=0.00219]


Saved best model at epoch 109 with accuracy: 91.03%


Epoch 110: 100%|███████████████████████| 391/391 [00:08<00:00, 46.88it/s, accuracy=94.6, loss=0.157]
Testing: 100%|███████████████████████████| 79/79 [00:01<00:00, 72.59it/s, accuracy=91, loss=0.00215]
Epoch 111: 100%|███████████████████████| 391/391 [00:07<00:00, 49.55it/s, accuracy=94.9, loss=0.153]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.32it/s, accuracy=90.8, loss=0.00219]
Epoch 112: 100%|███████████████████████| 391/391 [00:08<00:00, 47.86it/s, accuracy=94.9, loss=0.149]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.07it/s, accuracy=91.1, loss=0.00217]


Saved best model at epoch 112 with accuracy: 91.07%


Epoch 113: 100%|███████████████████████| 391/391 [00:07<00:00, 51.88it/s, accuracy=94.9, loss=0.147]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.45it/s, accuracy=91.3, loss=0.00218]


Saved best model at epoch 113 with accuracy: 91.27%


Epoch 114: 100%|███████████████████████| 391/391 [00:08<00:00, 48.17it/s, accuracy=95.1, loss=0.143]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 65.35it/s, accuracy=91.1, loss=0.00222]
Epoch 115: 100%|█████████████████████████| 391/391 [00:08<00:00, 46.29it/s, accuracy=95, loss=0.143]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.54it/s, accuracy=90.6, loss=0.00226]
Epoch 116: 100%|███████████████████████| 391/391 [00:07<00:00, 50.49it/s, accuracy=95.3, loss=0.137]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 71.23it/s, accuracy=90.7, loss=0.00221]
Epoch 117: 100%|███████████████████████| 391/391 [00:07<00:00, 50.19it/s, accuracy=95.1, loss=0.141]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 74.92it/s, accuracy=90.7, loss=0.00239]
Epoch 118: 100%|███████████████████████| 391/391 [00:08<00:00, 46.51it/s, accuracy=95.2, loss=0.139]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 66.77it/s, accuracy=90.8, loss

Current learning rate has decayed to 0.001000


Epoch 151: 100%|██████████████████████| 391/391 [00:08<00:00, 47.97it/s, accuracy=97.1, loss=0.0904]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.10it/s, accuracy=91.7, loss=0.00209]


Saved best model at epoch 151 with accuracy: 91.70%


Epoch 152: 100%|██████████████████████| 391/391 [00:07<00:00, 48.99it/s, accuracy=97.8, loss=0.0725]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 80.52it/s, accuracy=92.1, loss=0.00204]


Saved best model at epoch 152 with accuracy: 92.08%


Epoch 153: 100%|████████████████████████| 391/391 [00:07<00:00, 49.80it/s, accuracy=98, loss=0.0682]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 76.50it/s, accuracy=92.2, loss=0.00202]


Saved best model at epoch 153 with accuracy: 92.25%


Epoch 154: 100%|██████████████████████| 391/391 [00:08<00:00, 44.92it/s, accuracy=98.1, loss=0.0641]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.70it/s, accuracy=92.3, loss=0.00201]


Saved best model at epoch 154 with accuracy: 92.26%


Epoch 155: 100%|██████████████████████| 391/391 [00:09<00:00, 43.09it/s, accuracy=98.2, loss=0.0607]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 76.51it/s, accuracy=92.3, loss=0.00205]


Saved best model at epoch 155 with accuracy: 92.32%


Epoch 156: 100%|██████████████████████| 391/391 [00:08<00:00, 45.55it/s, accuracy=98.3, loss=0.0583]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 76.60it/s, accuracy=92.3, loss=0.00203]
Epoch 157: 100%|██████████████████████| 391/391 [00:08<00:00, 47.97it/s, accuracy=98.4, loss=0.0571]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 77.86it/s, accuracy=92.3, loss=0.00205]
Epoch 158: 100%|██████████████████████| 391/391 [00:08<00:00, 47.42it/s, accuracy=98.5, loss=0.0555]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 79.42it/s, accuracy=92.3, loss=0.00205]


Saved best model at epoch 158 with accuracy: 92.34%


Epoch 159: 100%|██████████████████████| 391/391 [00:07<00:00, 55.22it/s, accuracy=98.5, loss=0.0542]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 80.21it/s, accuracy=92.3, loss=0.00208]


Saved best model at epoch 159 with accuracy: 92.35%


Epoch 160: 100%|██████████████████████| 391/391 [00:06<00:00, 57.84it/s, accuracy=98.5, loss=0.0516]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 87.96it/s, accuracy=92.2, loss=0.00208]
Epoch 161: 100%|███████████████████████| 391/391 [00:07<00:00, 51.02it/s, accuracy=98.6, loss=0.052]
Testing: 100%|██████████████████████████| 79/79 [00:01<00:00, 74.30it/s, accuracy=92.3, loss=0.0021]
Epoch 162: 100%|████████████████████████| 391/391 [00:07<00:00, 52.97it/s, accuracy=98.6, loss=0.05]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 82.95it/s, accuracy=92.3, loss=0.00208]
Epoch 163: 100%|██████████████████████| 391/391 [00:07<00:00, 49.89it/s, accuracy=98.7, loss=0.0485]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 83.76it/s, accuracy=92.3, loss=0.00209]
Epoch 164: 100%|██████████████████████| 391/391 [00:07<00:00, 50.94it/s, accuracy=98.6, loss=0.0485]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.98it/s, accuracy=92.4, loss

Saved best model at epoch 164 with accuracy: 92.41%


Epoch 165: 100%|██████████████████████| 391/391 [00:06<00:00, 56.64it/s, accuracy=98.7, loss=0.0475]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 85.72it/s, accuracy=92.4, loss=0.00207]
Epoch 166: 100%|██████████████████████| 391/391 [00:07<00:00, 50.65it/s, accuracy=98.7, loss=0.0471]
Testing: 100%|██████████████████████████| 79/79 [00:00<00:00, 80.81it/s, accuracy=92.3, loss=0.0021]
Epoch 167: 100%|██████████████████████| 391/391 [00:07<00:00, 50.86it/s, accuracy=98.7, loss=0.0459]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.54it/s, accuracy=92.2, loss=0.00213]
Epoch 168: 100%|██████████████████████| 391/391 [00:08<00:00, 47.66it/s, accuracy=98.8, loss=0.0439]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.82it/s, accuracy=92.3, loss=0.00214]
Epoch 169: 100%|██████████████████████| 391/391 [00:07<00:00, 50.75it/s, accuracy=98.9, loss=0.0443]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 85.36it/s, accuracy=92.2, loss

Saved best model at epoch 173 with accuracy: 92.46%


Epoch 174: 100%|██████████████████████| 391/391 [00:07<00:00, 54.59it/s, accuracy=98.9, loss=0.0405]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 83.44it/s, accuracy=92.5, loss=0.00212]


Saved best model at epoch 174 with accuracy: 92.48%


Epoch 175: 100%|██████████████████████| 391/391 [00:07<00:00, 49.04it/s, accuracy=98.9, loss=0.0403]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 80.91it/s, accuracy=92.3, loss=0.00214]
Epoch 176: 100%|████████████████████████| 391/391 [00:08<00:00, 48.09it/s, accuracy=99, loss=0.0388]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 75.05it/s, accuracy=92.5, loss=0.00215]
Epoch 177: 100%|██████████████████████| 391/391 [00:07<00:00, 49.70it/s, accuracy=98.9, loss=0.0391]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 70.41it/s, accuracy=92.4, loss=0.00217]
Epoch 178: 100%|████████████████████████| 391/391 [00:08<00:00, 46.38it/s, accuracy=99, loss=0.0393]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 79.10it/s, accuracy=92.4, loss=0.00217]
Epoch 179: 100%|██████████████████████| 391/391 [00:07<00:00, 50.33it/s, accuracy=99.1, loss=0.0375]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 68.15it/s, accuracy=92.4, loss

Saved best model at epoch 185 with accuracy: 92.50%


Epoch 186: 100%|███████████████████████| 391/391 [00:08<00:00, 45.17it/s, accuracy=99.1, loss=0.036]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 69.57it/s, accuracy=92.5, loss=0.00222]
Epoch 187: 100%|████████████████████████| 391/391 [00:08<00:00, 46.64it/s, accuracy=99, loss=0.0368]
Testing: 100%|█████████████████████████| 79/79 [00:01<00:00, 72.02it/s, accuracy=92.5, loss=0.00219]
Epoch 188: 100%|██████████████████████| 391/391 [00:08<00:00, 47.11it/s, accuracy=99.1, loss=0.0349]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 82.00it/s, accuracy=92.4, loss=0.00219]
Epoch 189: 100%|██████████████████████| 391/391 [00:07<00:00, 52.42it/s, accuracy=99.1, loss=0.0351]
Testing: 100%|█████████████████████████| 79/79 [00:00<00:00, 84.12it/s, accuracy=92.4, loss=0.00219]
Epoch 190: 100%|██████████████████████| 391/391 [00:07<00:00, 55.26it/s, accuracy=99.1, loss=0.0345]
Testing: 100%|██████████████████████████| 79/79 [00:00<00:00, 79.33it/s, accuracy=92.3, los