In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import v2
import torch.nn.functional as F
from torch.nn import init

FLIP_PROBABILITY = 0.1

data_transform = v2.Compose([
    v2.Resize(size=(224,224)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=(-10,10)),
    v2.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
    v2.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    v2.ToTensor(),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Add normalization
])



In [4]:
train_data = datasets.ImageFolder(root="/content/drive/MyDrive/dataset_v4/train",
                                  transform=data_transform,
                                  target_transform=None)
test_data = datasets.ImageFolder(root="/content/drive/MyDrive/dataset_v4/test",
                                  transform=data_transform)
class_names = train_data.classes

In [5]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

class PatchEmbed(nn.Module):
    """Split image into patches and then embed them."""
    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96):
        super().__init__()
        self.img_size = (img_size, img_size)
        self.patch_size = (patch_size, patch_size)
        self.patches_resolution = [img_size // patch_size, img_size // patch_size]
        self.num_patches = (img_size // patch_size) ** 2

        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        B, C, H, W = x.shape
        x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
        x = self.norm(x)
        return x

In [7]:
class WindowAttention(nn.Module):
    """Window based multi-head self attention."""
    def __init__(self, dim, window_size, num_heads, qkv_bias=True):
        super().__init__()
        self.dim = dim
        self.window_size = window_size  # Wh, Ww
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.proj = nn.Linear(dim, dim)

        self.relative_position_bias_table = nn.Parameter(
            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))

        # Get pair-wise relative position index for each token inside the window
        coords_h = torch.arange(window_size[0])
        coords_w = torch.arange(window_size[1])
        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij"))
        coords_flatten = torch.flatten(coords, 1)
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
        relative_coords = relative_coords.permute(1, 2, 0).contiguous()
        relative_coords[:, :, 0] += window_size[0] - 1
        relative_coords[:, :, 1] += window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)
        self.register_buffer("relative_position_index", relative_position_index)

        init.trunc_normal_(self.relative_position_bias_table, std=.02)

    def forward(self, x, mask=None):
        B_, N, C = x.shape
        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))

        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)
        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()
        attn = attn + relative_position_bias.unsqueeze(0)

        if mask is not None:
            nW = mask.shape[0]
            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
            attn = attn.view(-1, self.num_heads, N, N)

        attn = F.softmax(attn, dim=-1)
        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
        x = self.proj(x)
        return x

In [8]:
class SwinTransformerBlock(nn.Module):
    """Swin Transformer Block."""
    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio

        self.norm1 = nn.LayerNorm(dim)
        self.attn = WindowAttention(
            dim, window_size=(window_size, window_size), num_heads=num_heads, qkv_bias=qkv_bias)

        self.norm2 = nn.LayerNorm(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = nn.Sequential(
            nn.Linear(dim, mlp_hidden_dim),
            nn.GELU(),
            nn.Linear(mlp_hidden_dim, dim)
        )

    def forward(self, x, H, W):
        B, L, C = x.shape
        shortcut = x
        x = self.norm1(x)
        x = x.view(B, H, W, C)

        # pad feature maps to multiples of window size
        pad_l = pad_t = 0
        pad_r = (self.window_size - W % self.window_size) % self.window_size
        pad_b = (self.window_size - H % self.window_size) % self.window_size
        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
        _, Hp, Wp, _ = x.shape

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
        else:
            shifted_x = x

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C

        # W-MSA/SW-MSA
        attn_windows = self.attn(x_windows)  # nW*B, window_size*window_size, C

        # merge windows
        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C

        # reverse cyclic shift
        if self.shift_size > 0:
            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        else:
            x = shifted_x

        if pad_r > 0 or pad_b > 0:
            x = x[:, :H, :W, :].contiguous()

        x = x.view(B, H * W, C)

        # FFN
        x = shortcut + x
        x = x + self.mlp(self.norm2(x))

        return x

In [9]:
class SwinTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000,
                 embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
                 window_size=7, mlp_ratio=4., qkv_bias=True,
                 drop_rate=0., attn_drop_rate=0.):
        super().__init__()
        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))

        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)

        # stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_rate, sum(depths))]

        # build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            layer = BasicLayer(
                dim=int(embed_dim * 2 ** i_layer),
                depth=depths[i_layer],
                num_heads=num_heads[i_layer],
                window_size=window_size,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                attn_drop=attn_drop_rate,
                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None
            )
            self.layers.append(layer)

        self.norm = nn.LayerNorm(self.num_features)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.head = nn.Linear(self.num_features, num_classes)

    def forward(self, x):
        x = self.patch_embed(x)
        for layer in self.layers:
            x = layer(x)
        x = self.norm(x)
        x = self.avgpool(x.transpose(1, 2))
        x = torch.flatten(x, 1)
        x = self.head(x)
        return x

def window_partition(x, window_size):
    """
    Args:
        x: (B, H, W, C)
        window_size (int): window size
    Returns:
        windows: (num_windows*B, window_size, window_size, C)
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows

def window_reverse(windows, window_size, H, W):
    """
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size
        H (int): Height of image
        W (int): Width of image
    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x

class PatchMerging(nn.Module):
    """Patch Merging Layer."""
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
        self.norm = nn.LayerNorm(4 * dim)

    def forward(self, x, H, W):
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"
        x = x.view(B, H, W, C)

        # padding
        pad_input = (H % 2 == 1) or (W % 2 == 1)
        if pad_input:
            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))

        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C

        x = self.norm(x)
        x = self.reduction(x)

        return x

In [10]:
class BasicLayer(nn.Module):
    """A basic Swin Transformer layer for one stage."""
    def __init__(self, dim, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
                 downsample=None):
        super().__init__()
        self.dim = dim
        self.depth = depth
        self.window_size = window_size
        self.shift_size = window_size // 2

        # build blocks
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(
                dim=dim,
                num_heads=num_heads,
                window_size=window_size,
                shift_size=0 if (i % 2 == 0) else self.shift_size,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop[i] if isinstance(drop, list) else drop,
                attn_drop=attn_drop)
            for i in range(depth)])

        # patch merging layer
        if downsample is not None:
            self.downsample = downsample(dim=dim)
        else:
            self.downsample = None

    def forward(self, x):
        B, L, C = x.shape
        H = W = int(L ** 0.5)

        for blk in self.blocks:
            x = blk(x, H, W)

        if self.downsample is not None:
            x = self.downsample(x, H, W)
            H, W = H // 2, W // 2

        return x

In [12]:
# Create the Swin Transformer model
model = SwinTransformer(
    img_size=224,
    patch_size=4,
    in_chans=3,
    num_classes=len(class_names),
    embed_dim=96,
    depths=[2, 2, 6, 2],
    num_heads=[3, 6, 12, 24],
    window_size=7,
    mlp_ratio=4.,
    qkv_bias=True,
    drop_rate=0.1,
    attn_drop_rate=0.1
).to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.05)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6)

from tqdm import tqdm

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    progress_bar = tqdm(dataloader, desc='Training')
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return total_loss / len(dataloader), 100. * correct / total

def test(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    progress_bar = tqdm(dataloader, desc='Testing')
    with torch.no_grad():
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return total_loss / len(dataloader), 100. * correct / total

# Training loop
num_epochs = 100
best_acc = 0
best_results = None

for epoch in range(num_epochs):
    print(f'\nEpoch: {epoch+1}')

    # Train
    train_loss, train_acc = train_one_epoch(model, train_dataloader, criterion, optimizer, device)

    # Test
    test_loss, test_acc = test(model, test_dataloader, criterion, device)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

    # Update learning rate
    scheduler.step()

    # Save best model
    if test_acc > best_acc:
        best_acc = test_acc
        best_results = (test_loss, test_acc)
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'train_acc': train_acc,
            'test_loss': test_loss,
            'test_acc': test_acc,
        }, 'best_swin_transformer.pth')

print('\nTraining finished!')
print(f'Best model performance:')
print(f'Test Loss: {best_results[0]:.4f}, Test Acc: {best_results[1]:.2f}%')


Epoch: 1


Training: 100%|██████████| 121/121 [00:49<00:00,  2.44it/s]
Testing: 100%|██████████| 46/46 [03:21<00:00,  4.38s/it]


Train Loss: 0.6135, Train Acc: 74.77%
Test Loss: 0.5114, Test Acc: 81.24%

Epoch: 2


Training: 100%|██████████| 121/121 [00:49<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.62it/s]


Train Loss: 0.5519, Train Acc: 78.41%
Test Loss: 0.5154, Test Acc: 81.93%

Epoch: 3


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.5107, Train Acc: 81.17%
Test Loss: 0.4914, Test Acc: 83.03%

Epoch: 4


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.5127, Train Acc: 81.22%
Test Loss: 0.5088, Test Acc: 82.90%

Epoch: 5


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.4994, Train Acc: 82.99%
Test Loss: 0.4762, Test Acc: 84.41%

Epoch: 6


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.4950, Train Acc: 82.93%
Test Loss: 0.4646, Test Acc: 84.97%

Epoch: 7


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.4793, Train Acc: 83.77%
Test Loss: 0.4853, Test Acc: 84.55%

Epoch: 8


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.4772, Train Acc: 84.08%
Test Loss: 0.4770, Test Acc: 83.59%

Epoch: 9


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.4698, Train Acc: 83.77%
Test Loss: 0.4840, Test Acc: 83.86%

Epoch: 10


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.4624, Train Acc: 84.70%
Test Loss: 0.5584, Test Acc: 76.69%

Epoch: 11


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.4621, Train Acc: 84.29%
Test Loss: 0.4509, Test Acc: 86.62%

Epoch: 12


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.73it/s]


Train Loss: 0.4607, Train Acc: 85.48%
Test Loss: 0.4857, Test Acc: 83.03%

Epoch: 13


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.4522, Train Acc: 84.76%
Test Loss: 0.4861, Test Acc: 82.76%

Epoch: 14


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.75it/s]


Train Loss: 0.4665, Train Acc: 84.24%
Test Loss: 0.5137, Test Acc: 83.45%

Epoch: 15


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.4573, Train Acc: 84.50%
Test Loss: 0.4440, Test Acc: 85.24%

Epoch: 16


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.73it/s]


Train Loss: 0.4537, Train Acc: 84.91%
Test Loss: 0.4315, Test Acc: 86.21%

Epoch: 17


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.4221, Train Acc: 86.68%
Test Loss: 0.4471, Test Acc: 84.55%

Epoch: 18


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.72it/s]


Train Loss: 0.4397, Train Acc: 86.58%
Test Loss: 0.4354, Test Acc: 87.31%

Epoch: 19


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.4169, Train Acc: 87.83%
Test Loss: 0.4388, Test Acc: 86.34%

Epoch: 20


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.4024, Train Acc: 88.55%
Test Loss: 0.4381, Test Acc: 86.21%

Epoch: 21


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.4130, Train Acc: 87.57%
Test Loss: 0.4069, Test Acc: 88.41%

Epoch: 22


Training: 100%|██████████| 121/121 [00:49<00:00,  2.44it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.4096, Train Acc: 87.51%
Test Loss: 0.4044, Test Acc: 88.83%

Epoch: 23


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.61it/s]


Train Loss: 0.4120, Train Acc: 88.50%
Test Loss: 0.3823, Test Acc: 88.83%

Epoch: 24


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.3835, Train Acc: 89.39%
Test Loss: 0.3684, Test Acc: 90.48%

Epoch: 25


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.3806, Train Acc: 90.17%
Test Loss: 0.4005, Test Acc: 88.55%

Epoch: 26


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.3751, Train Acc: 89.65%
Test Loss: 0.3638, Test Acc: 90.34%

Epoch: 27


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.3637, Train Acc: 90.17%
Test Loss: 0.3584, Test Acc: 90.62%

Epoch: 28


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.3562, Train Acc: 90.69%
Test Loss: 0.3813, Test Acc: 88.69%

Epoch: 29


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.69it/s]


Train Loss: 0.3569, Train Acc: 90.37%
Test Loss: 0.4373, Test Acc: 83.72%

Epoch: 30


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.58it/s]


Train Loss: 0.3641, Train Acc: 90.17%
Test Loss: 0.3525, Test Acc: 90.62%

Epoch: 31


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.3396, Train Acc: 91.88%
Test Loss: 0.3345, Test Acc: 91.59%

Epoch: 32


Training: 100%|██████████| 121/121 [00:48<00:00,  2.52it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.3472, Train Acc: 91.05%
Test Loss: 0.3833, Test Acc: 89.93%

Epoch: 33


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.3498, Train Acc: 90.37%
Test Loss: 0.3570, Test Acc: 90.90%

Epoch: 34


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.3427, Train Acc: 91.42%
Test Loss: 0.3321, Test Acc: 91.86%

Epoch: 35


Training: 100%|██████████| 121/121 [00:49<00:00,  2.43it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.3384, Train Acc: 91.42%
Test Loss: 0.3297, Test Acc: 91.17%

Epoch: 36


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.3174, Train Acc: 92.98%
Test Loss: 0.3186, Test Acc: 92.69%

Epoch: 37


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.3296, Train Acc: 91.78%
Test Loss: 0.3366, Test Acc: 92.00%

Epoch: 38


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.3260, Train Acc: 92.66%
Test Loss: 0.3093, Test Acc: 93.79%

Epoch: 39


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.3234, Train Acc: 92.35%
Test Loss: 0.3342, Test Acc: 92.14%

Epoch: 40


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.3203, Train Acc: 92.56%
Test Loss: 0.3242, Test Acc: 93.52%

Epoch: 41


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.3291, Train Acc: 92.51%
Test Loss: 0.3300, Test Acc: 92.00%

Epoch: 42


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.76it/s]


Train Loss: 0.3305, Train Acc: 92.30%
Test Loss: 0.4144, Test Acc: 88.55%

Epoch: 43


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.3445, Train Acc: 90.69%
Test Loss: 0.3227, Test Acc: 92.41%

Epoch: 44


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.3103, Train Acc: 93.44%
Test Loss: 0.3152, Test Acc: 92.97%

Epoch: 45


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.3033, Train Acc: 93.70%
Test Loss: 0.2986, Test Acc: 94.21%

Epoch: 46


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.74it/s]


Train Loss: 0.3101, Train Acc: 92.82%
Test Loss: 0.2944, Test Acc: 94.07%

Epoch: 47


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.3032, Train Acc: 93.60%
Test Loss: 0.3047, Test Acc: 93.79%

Epoch: 48


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.71it/s]


Train Loss: 0.3026, Train Acc: 93.76%
Test Loss: 0.3201, Test Acc: 92.69%

Epoch: 49


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2966, Train Acc: 93.91%
Test Loss: 0.2936, Test Acc: 94.48%

Epoch: 50


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.2963, Train Acc: 93.86%
Test Loss: 0.3109, Test Acc: 92.55%

Epoch: 51


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.69it/s]


Train Loss: 0.2972, Train Acc: 93.86%
Test Loss: 0.2929, Test Acc: 94.21%

Epoch: 52


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.2892, Train Acc: 94.22%
Test Loss: 0.3085, Test Acc: 93.10%

Epoch: 53


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2918, Train Acc: 94.64%
Test Loss: 0.2960, Test Acc: 94.48%

Epoch: 54


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.2887, Train Acc: 94.48%
Test Loss: 0.2905, Test Acc: 94.62%

Epoch: 55


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.3009, Train Acc: 94.12%
Test Loss: 0.2993, Test Acc: 93.52%

Epoch: 56


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2826, Train Acc: 95.53%
Test Loss: 0.2802, Test Acc: 94.90%

Epoch: 57


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2852, Train Acc: 94.80%
Test Loss: 0.2991, Test Acc: 94.21%

Epoch: 58


Training: 100%|██████████| 121/121 [00:48<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2891, Train Acc: 95.21%
Test Loss: 0.3190, Test Acc: 92.97%

Epoch: 59


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.71it/s]


Train Loss: 0.2836, Train Acc: 94.64%
Test Loss: 0.2839, Test Acc: 95.17%

Epoch: 60


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.2703, Train Acc: 95.79%
Test Loss: 0.2830, Test Acc: 94.76%

Epoch: 61


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.76it/s]


Train Loss: 0.2764, Train Acc: 95.47%
Test Loss: 0.2873, Test Acc: 94.90%

Epoch: 62


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.68it/s]


Train Loss: 0.2779, Train Acc: 95.21%
Test Loss: 0.2771, Test Acc: 95.45%

Epoch: 63


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2738, Train Acc: 95.73%
Test Loss: 0.2869, Test Acc: 94.34%

Epoch: 64


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2838, Train Acc: 95.42%
Test Loss: 0.2905, Test Acc: 95.17%

Epoch: 65


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2690, Train Acc: 95.94%
Test Loss: 0.2827, Test Acc: 94.90%

Epoch: 66


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2701, Train Acc: 95.37%
Test Loss: 0.2902, Test Acc: 93.66%

Epoch: 67


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.2724, Train Acc: 95.11%
Test Loss: 0.2758, Test Acc: 95.59%

Epoch: 68


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.62it/s]


Train Loss: 0.2642, Train Acc: 95.94%
Test Loss: 0.2659, Test Acc: 96.14%

Epoch: 69


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2680, Train Acc: 95.53%
Test Loss: 0.2778, Test Acc: 95.45%

Epoch: 70


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2650, Train Acc: 96.20%
Test Loss: 0.2700, Test Acc: 96.00%

Epoch: 71


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2659, Train Acc: 95.99%
Test Loss: 0.2583, Test Acc: 96.69%

Epoch: 72


Training: 100%|██████████| 121/121 [00:49<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.71it/s]


Train Loss: 0.2640, Train Acc: 96.05%
Test Loss: 0.2512, Test Acc: 97.10%

Epoch: 73


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2580, Train Acc: 96.88%
Test Loss: 0.2805, Test Acc: 95.03%

Epoch: 74


Training: 100%|██████████| 121/121 [00:48<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2610, Train Acc: 96.41%
Test Loss: 0.2537, Test Acc: 96.83%

Epoch: 75


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.60it/s]


Train Loss: 0.2545, Train Acc: 96.72%
Test Loss: 0.2525, Test Acc: 96.83%

Epoch: 76


Training: 100%|██████████| 121/121 [00:49<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2548, Train Acc: 96.83%
Test Loss: 0.2653, Test Acc: 95.59%

Epoch: 77


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.59it/s]


Train Loss: 0.2547, Train Acc: 96.72%
Test Loss: 0.2606, Test Acc: 96.69%

Epoch: 78


Training: 100%|██████████| 121/121 [00:48<00:00,  2.48it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2561, Train Acc: 96.31%
Test Loss: 0.2695, Test Acc: 95.72%

Epoch: 79


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.2497, Train Acc: 96.88%
Test Loss: 0.2592, Test Acc: 96.83%

Epoch: 80


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2532, Train Acc: 96.67%
Test Loss: 0.2467, Test Acc: 97.24%

Epoch: 81


Training: 100%|██████████| 121/121 [00:49<00:00,  2.46it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.73it/s]


Train Loss: 0.2478, Train Acc: 97.24%
Test Loss: 0.2493, Test Acc: 96.97%

Epoch: 82


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.2468, Train Acc: 97.24%
Test Loss: 0.2575, Test Acc: 96.97%

Epoch: 83


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.64it/s]


Train Loss: 0.2437, Train Acc: 97.29%
Test Loss: 0.2563, Test Acc: 96.69%

Epoch: 84


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.2450, Train Acc: 97.71%
Test Loss: 0.2505, Test Acc: 97.24%

Epoch: 85


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2429, Train Acc: 97.35%
Test Loss: 0.2681, Test Acc: 95.86%

Epoch: 86


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.61it/s]


Train Loss: 0.2472, Train Acc: 96.83%
Test Loss: 0.2482, Test Acc: 97.10%

Epoch: 87


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2454, Train Acc: 97.14%
Test Loss: 0.2643, Test Acc: 96.00%

Epoch: 88


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2420, Train Acc: 97.35%
Test Loss: 0.2599, Test Acc: 96.41%

Epoch: 89


Training: 100%|██████████| 121/121 [00:47<00:00,  2.52it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.71it/s]


Train Loss: 0.2419, Train Acc: 97.29%
Test Loss: 0.2548, Test Acc: 97.10%

Epoch: 90


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.70it/s]


Train Loss: 0.2427, Train Acc: 97.45%
Test Loss: 0.2563, Test Acc: 96.83%

Epoch: 91


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.58it/s]


Train Loss: 0.2367, Train Acc: 98.13%
Test Loss: 0.2529, Test Acc: 96.83%

Epoch: 92


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2487, Train Acc: 97.24%
Test Loss: 0.2475, Test Acc: 96.83%

Epoch: 93


Training: 100%|██████████| 121/121 [00:48<00:00,  2.51it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.67it/s]


Train Loss: 0.2385, Train Acc: 97.87%
Test Loss: 0.2596, Test Acc: 96.41%

Epoch: 94


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]


Train Loss: 0.2372, Train Acc: 97.97%
Test Loss: 0.2423, Test Acc: 97.10%

Epoch: 95


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.69it/s]


Train Loss: 0.2381, Train Acc: 97.66%
Test Loss: 0.2613, Test Acc: 96.69%

Epoch: 96


Training: 100%|██████████| 121/121 [00:48<00:00,  2.49it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.63it/s]


Train Loss: 0.2469, Train Acc: 97.40%
Test Loss: 0.2583, Test Acc: 96.69%

Epoch: 97


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.2393, Train Acc: 97.81%
Test Loss: 0.2607, Test Acc: 95.59%

Epoch: 98


Training: 100%|██████████| 121/121 [00:49<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.60it/s]


Train Loss: 0.2437, Train Acc: 97.29%
Test Loss: 0.2377, Test Acc: 97.66%

Epoch: 99


Training: 100%|██████████| 121/121 [00:49<00:00,  2.47it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.65it/s]


Train Loss: 0.2406, Train Acc: 97.61%
Test Loss: 0.2532, Test Acc: 96.83%

Epoch: 100


Training: 100%|██████████| 121/121 [00:48<00:00,  2.50it/s]
Testing: 100%|██████████| 46/46 [00:12<00:00,  3.66it/s]

Train Loss: 0.2382, Train Acc: 98.13%
Test Loss: 0.2668, Test Acc: 96.14%

Training finished!
Best model performance:
Test Loss: 0.2377, Test Acc: 97.66%



