<a href="https://colab.research.google.com/github/kanaad-lims/AIRL-Internship-Questions/blob/main/q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import numpy as np
from tqdm import tqdm

torch.manual_seed(42)
np.random.seed(42)

In [31]:
class PatchEmbedding(nn.Module):
    """Split image into patches and embed them."""
    def __init__(self, img_size=32, patch_size=4, in_channels=3, embed_dim=256):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) ** 2

        # Conv layer acts as patch extraction + linear projection
        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        # x: (B, C, H, W) -> (B, embed_dim, n_patches_h, n_patches_w)
        x = self.proj(x)
        # Flatten: (B, embed_dim, n_patches_h, n_patches_w) -> (B, embed_dim, n_patches)
        x = x.flatten(2)
        # Transpose: (B, embed_dim, n_patches) -> (B, n_patches, embed_dim)
        x = x.transpose(1, 2)
        return x

In [32]:
class MultiHeadSelfAttention(nn.Module):
    """Multi-Head Self Attention mechanism."""
    def __init__(self, embed_dim=256, n_heads=8, dropout=0.1):
        super().__init__()
        self.embed_dim = embed_dim
        self.n_heads = n_heads
        self.head_dim = embed_dim // n_heads
        assert embed_dim % n_heads == 0, "embed_dim must be divisible by n_heads"

        self.qkv = nn.Linear(embed_dim, embed_dim * 3)
        self.proj = nn.Linear(embed_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, N, C = x.shape

        # Generate Q, K, V
        qkv = self.qkv(x).reshape(B, N, 3, self.n_heads, self.head_dim)
        qkv = qkv.permute(2, 0, 3, 1, 4)  # (3, B, n_heads, N, head_dim)
        q, k, v = qkv[0], qkv[1], qkv[2]

        # Attention scores
        attn = (q @ k.transpose(-2, -1)) * (self.head_dim ** -0.5)
        attn = F.softmax(attn, dim=-1)
        attn = self.dropout(attn)

        # Apply attention to values
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.dropout(x)

        return x

In [33]:
class MLP(nn.Module):
    """Feed-forward network."""
    def __init__(self, embed_dim=256, hidden_dim=512, dropout=0.1):
        super().__init__()
        self.fc1 = nn.Linear(embed_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.fc1(x)
        x = F.gelu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.dropout(x)
        return x

In [34]:
class TransformerBlock(nn.Module):
    """Transformer encoder block with MHSA + MLP."""
    def __init__(self, embed_dim=256, n_heads=8, mlp_ratio=2, dropout=0.1):
        super().__init__()
        self.norm1 = nn.LayerNorm(embed_dim)
        self.attn = MultiHeadSelfAttention(embed_dim, n_heads, dropout)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.mlp = MLP(embed_dim, int(embed_dim * mlp_ratio), dropout)

    def forward(self, x):
        # Residual connections with pre-normalization
        x = x + self.attn(self.norm1(x))
        x = x + self.mlp(self.norm2(x))
        return x

In [35]:
class VisionTransformer(nn.Module):
    """Vision Transformer for image classification."""
    def __init__(
        self,
        img_size=32,
        patch_size=4,
        in_channels=3,
        n_classes=10,
        embed_dim=256,
        depth=6,
        n_heads=8,
        mlp_ratio=2,
        dropout=0.1
    ):
        super().__init__()

        # Patch embedding
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
        n_patches = self.patch_embed.n_patches

        # CLS token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))

        # Positional embeddings
        self.pos_embed = nn.Parameter(torch.zeros(1, n_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(dropout)

        # Transformer blocks
        self.blocks = nn.ModuleList([
            TransformerBlock(embed_dim, n_heads, mlp_ratio, dropout)
            for _ in range(depth)
        ])

        # Classification head
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, n_classes)

        # Initialize weights
        nn.init.trunc_normal_(self.cls_token, std=0.02)
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward(self, x):
        B = x.shape[0]

        # Patch embedding
        x = self.patch_embed(x)

        # Prepend CLS token
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, x], dim=1)

        # Add positional embeddings
        x = x + self.pos_embed
        x = self.pos_drop(x)

        # Transformer blocks
        for block in self.blocks:
            x = block(x)

        # Classify from CLS token
        x = self.norm(x)
        cls_token_final = x[:, 0]
        x = self.head(cls_token_final)

        return x

In [36]:
def get_data_loaders(batch_size=128, num_workers=2):
    """Prepare CIFAR-10 data loaders with augmentation."""

    # Training transforms with augmentation
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
        transforms.RandomErasing(p=0.25)
    ])

    # Test transforms (no augmentation)
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    train_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=transform_train
    )

    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform_test
    )

    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True,
        num_workers=num_workers, pin_memory=True
    )

    test_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False,
        num_workers=num_workers, pin_memory=True
    )

    return train_loader, test_loader

def train_epoch(model, loader, criterion, optimizer, device):
    """Train for one epoch."""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(loader, desc='Training')
    for inputs, targets in pbar:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        pbar.set_postfix({
            'loss': running_loss / (pbar.n + 1),
            'acc': 100. * correct / total
        })

    return running_loss / len(loader), 100. * correct / total


In [37]:
def evaluate(model, loader, criterion, device):
    """Evaluate model on test set."""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in tqdm(loader, desc='Evaluating'):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return running_loss / len(loader), 100. * correct / total

In [None]:
def main():
    # Configuration
    config = {
        'img_size': 32,
        'patch_size': 4,
        'embed_dim': 384,
        'depth': 7,
        'n_heads': 6,
        'mlp_ratio': 3,
        'dropout': 0.1,
        'batch_size': 128,
        'epochs': 200,
        'lr': 3e-4,
        'weight_decay': 0.05
    }

    print("Configuration:")
    for k, v in config.items():
        print(f"  {k}: {v}")

    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\nUsing device: {device}")

    # Data
    train_loader, test_loader = get_data_loaders(config['batch_size'])

    # Model
    model = VisionTransformer(
        img_size=config['img_size'],
        patch_size=config['patch_size'],
        n_classes=10,
        embed_dim=config['embed_dim'],
        depth=config['depth'],
        n_heads=config['n_heads'],
        mlp_ratio=config['mlp_ratio'],
        dropout=config['dropout']
    ).to(device)

    print(f"\nModel parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Training setup
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = AdamW(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
    scheduler = CosineAnnealingLR(optimizer, T_max=config['epochs'])

    # Training loop
    best_acc = 0.0

    for epoch in range(config['epochs']):
        print(f"\nEpoch {epoch+1}/{config['epochs']}")

        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        scheduler.step()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

        if test_acc > best_acc:
            best_acc = test_acc
            torch.save(model.state_dict(), 'best_vit_cifar10.pth')
            print(f"✓ Best model saved! (Acc: {best_acc:.2f}%)")

    print(f"\n{'='*50}")
    print(f"Training complete!")
    print(f"Best test accuracy: {best_acc:.2f}%")
    print(f"{'='*50}")

if __name__ == '__main__':
    main()

Configuration:
  img_size: 32
  patch_size: 4
  embed_dim: 384
  depth: 7
  n_heads: 6
  mlp_ratio: 3
  dropout: 0.1
  batch_size: 128
  epochs: 200
  lr: 0.0003
  weight_decay: 0.05

Using device: cuda

Model parameters: 10,402,954

Epoch 1/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.05it/s, loss=2.07, acc=24.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.90it/s]


Train Loss: 2.0720, Train Acc: 24.88%
Test Loss: 1.8015, Test Acc: 37.41%
✓ Best model saved! (Acc: 37.41%)

Epoch 2/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.89, acc=34.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.97it/s]


Train Loss: 1.8860, Train Acc: 34.93%
Test Loss: 1.6705, Test Acc: 45.14%
✓ Best model saved! (Acc: 45.14%)

Epoch 3/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.8, acc=39.3]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.59it/s]


Train Loss: 1.8016, Train Acc: 39.25%
Test Loss: 1.5999, Test Acc: 48.51%
✓ Best model saved! (Acc: 48.51%)

Epoch 4/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.76, acc=41.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.54it/s]


Train Loss: 1.7595, Train Acc: 41.42%
Test Loss: 1.5279, Test Acc: 51.82%
✓ Best model saved! (Acc: 51.82%)

Epoch 5/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.72, acc=43.5]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 1.7208, Train Acc: 43.51%
Test Loss: 1.4790, Test Acc: 54.16%
✓ Best model saved! (Acc: 54.16%)

Epoch 6/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.69, acc=44.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.80it/s]


Train Loss: 1.6904, Train Acc: 44.91%
Test Loss: 1.4729, Test Acc: 55.26%
✓ Best model saved! (Acc: 55.26%)

Epoch 7/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.66, acc=46.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.36it/s]


Train Loss: 1.6590, Train Acc: 46.52%
Test Loss: 1.4568, Test Acc: 56.03%
✓ Best model saved! (Acc: 56.03%)

Epoch 8/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.64, acc=47.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 1.6397, Train Acc: 47.05%
Test Loss: 1.4015, Test Acc: 58.79%
✓ Best model saved! (Acc: 58.79%)

Epoch 9/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.61, acc=48.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.76it/s]


Train Loss: 1.6109, Train Acc: 48.84%
Test Loss: 1.3782, Test Acc: 59.19%
✓ Best model saved! (Acc: 59.19%)

Epoch 10/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.58, acc=50.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.63it/s]


Train Loss: 1.5832, Train Acc: 50.25%
Test Loss: 1.3524, Test Acc: 61.10%
✓ Best model saved! (Acc: 61.10%)

Epoch 11/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.56, acc=51.3]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Train Loss: 1.5607, Train Acc: 51.32%
Test Loss: 1.3091, Test Acc: 61.76%
✓ Best model saved! (Acc: 61.76%)

Epoch 12/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.54, acc=52.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.92it/s]


Train Loss: 1.5403, Train Acc: 52.39%
Test Loss: 1.2925, Test Acc: 63.14%
✓ Best model saved! (Acc: 63.14%)

Epoch 13/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.52, acc=53.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.63it/s]


Train Loss: 1.5190, Train Acc: 53.14%
Test Loss: 1.2900, Test Acc: 63.34%
✓ Best model saved! (Acc: 63.34%)

Epoch 14/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.5, acc=54.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.92it/s]


Train Loss: 1.5029, Train Acc: 54.08%
Test Loss: 1.2598, Test Acc: 65.17%
✓ Best model saved! (Acc: 65.17%)

Epoch 15/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.47, acc=55.5]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 1.4726, Train Acc: 55.53%
Test Loss: 1.2507, Test Acc: 65.50%
✓ Best model saved! (Acc: 65.50%)

Epoch 16/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.46, acc=56.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.73it/s]


Train Loss: 1.4623, Train Acc: 56.11%
Test Loss: 1.2061, Test Acc: 67.24%
✓ Best model saved! (Acc: 67.24%)

Epoch 17/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=1.44, acc=56.9]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.35it/s]


Train Loss: 1.4407, Train Acc: 56.90%
Test Loss: 1.1833, Test Acc: 68.20%
✓ Best model saved! (Acc: 68.20%)

Epoch 18/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.42, acc=57.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.92it/s]


Train Loss: 1.4248, Train Acc: 57.74%
Test Loss: 1.1752, Test Acc: 69.28%
✓ Best model saved! (Acc: 69.28%)

Epoch 19/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=1.41, acc=58.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.80it/s]


Train Loss: 1.4083, Train Acc: 58.57%
Test Loss: 1.1764, Test Acc: 68.49%

Epoch 20/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.39, acc=59.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.69it/s]


Train Loss: 1.3865, Train Acc: 59.46%
Test Loss: 1.1437, Test Acc: 70.58%
✓ Best model saved! (Acc: 70.58%)

Epoch 21/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=1.37, acc=60.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.82it/s]


Train Loss: 1.3709, Train Acc: 60.16%
Test Loss: 1.1201, Test Acc: 71.12%
✓ Best model saved! (Acc: 71.12%)

Epoch 22/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=1.36, acc=60.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 1.3602, Train Acc: 60.77%
Test Loss: 1.1439, Test Acc: 70.52%

Epoch 23/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.35, acc=61.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.60it/s]


Train Loss: 1.3467, Train Acc: 61.58%
Test Loss: 1.0959, Test Acc: 72.75%
✓ Best model saved! (Acc: 72.75%)

Epoch 24/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.33, acc=62.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.58it/s]


Train Loss: 1.3302, Train Acc: 62.24%
Test Loss: 1.1069, Test Acc: 73.03%
✓ Best model saved! (Acc: 73.03%)

Epoch 25/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.32, acc=62.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.90it/s]


Train Loss: 1.3175, Train Acc: 62.71%
Test Loss: 1.0764, Test Acc: 74.06%
✓ Best model saved! (Acc: 74.06%)

Epoch 26/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.3, acc=63.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.59it/s]


Train Loss: 1.3005, Train Acc: 63.64%
Test Loss: 1.1038, Test Acc: 72.96%

Epoch 27/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=1.29, acc=64.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.35it/s]


Train Loss: 1.2900, Train Acc: 64.09%
Test Loss: 1.0665, Test Acc: 74.27%
✓ Best model saved! (Acc: 74.27%)

Epoch 28/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.28, acc=64.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Train Loss: 1.2782, Train Acc: 64.39%
Test Loss: 1.0810, Test Acc: 73.51%

Epoch 29/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.27, acc=65]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.78it/s]


Train Loss: 1.2672, Train Acc: 64.98%
Test Loss: 1.0571, Test Acc: 74.38%
✓ Best model saved! (Acc: 74.38%)

Epoch 30/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.25, acc=65.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 14.78it/s]


Train Loss: 1.2527, Train Acc: 65.73%
Test Loss: 1.0247, Test Acc: 76.20%
✓ Best model saved! (Acc: 76.20%)

Epoch 31/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.25, acc=66.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.61it/s]


Train Loss: 1.2473, Train Acc: 66.12%
Test Loss: 1.0128, Test Acc: 76.96%
✓ Best model saved! (Acc: 76.96%)

Epoch 32/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.24, acc=66.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.92it/s]


Train Loss: 1.2351, Train Acc: 66.77%
Test Loss: 1.0424, Test Acc: 75.46%

Epoch 33/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.22, acc=67.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.65it/s]


Train Loss: 1.2249, Train Acc: 67.06%
Test Loss: 0.9988, Test Acc: 77.47%
✓ Best model saved! (Acc: 77.47%)

Epoch 34/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.22, acc=67.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.62it/s]


Train Loss: 1.2163, Train Acc: 67.67%
Test Loss: 1.0113, Test Acc: 76.70%

Epoch 35/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.21, acc=67.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.91it/s]


Train Loss: 1.2117, Train Acc: 67.73%
Test Loss: 0.9820, Test Acc: 78.55%
✓ Best model saved! (Acc: 78.55%)

Epoch 36/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.2, acc=68.3]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.82it/s]


Train Loss: 1.1992, Train Acc: 68.34%
Test Loss: 1.0043, Test Acc: 77.30%

Epoch 37/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.05it/s, loss=1.2, acc=68.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.47it/s]


Train Loss: 1.1961, Train Acc: 68.48%
Test Loss: 0.9785, Test Acc: 78.60%
✓ Best model saved! (Acc: 78.60%)

Epoch 38/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.19, acc=68.9]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.50it/s]


Train Loss: 1.1878, Train Acc: 68.89%
Test Loss: 0.9582, Test Acc: 79.51%
✓ Best model saved! (Acc: 79.51%)

Epoch 39/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.18, acc=69.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.85it/s]


Train Loss: 1.1753, Train Acc: 69.38%
Test Loss: 0.9698, Test Acc: 78.99%

Epoch 40/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.16, acc=70]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.79it/s]


Train Loss: 1.1609, Train Acc: 70.00%
Test Loss: 0.9571, Test Acc: 79.96%
✓ Best model saved! (Acc: 79.96%)

Epoch 41/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.15, acc=70.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.69it/s]


Train Loss: 1.1530, Train Acc: 70.36%
Test Loss: 0.9368, Test Acc: 80.39%
✓ Best model saved! (Acc: 80.39%)

Epoch 42/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.15, acc=70.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.85it/s]


Train Loss: 1.1486, Train Acc: 70.78%
Test Loss: 0.9350, Test Acc: 80.23%

Epoch 43/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.15, acc=70.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 1.1462, Train Acc: 70.75%
Test Loss: 0.9250, Test Acc: 80.91%
✓ Best model saved! (Acc: 80.91%)

Epoch 44/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=1.14, acc=71]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.78it/s]


Train Loss: 1.1372, Train Acc: 71.03%
Test Loss: 0.9287, Test Acc: 81.01%
✓ Best model saved! (Acc: 81.01%)

Epoch 45/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.13, acc=71.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.55it/s]


Train Loss: 1.1319, Train Acc: 71.45%
Test Loss: 0.9311, Test Acc: 80.85%

Epoch 46/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.12, acc=71.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.94it/s]


Train Loss: 1.1226, Train Acc: 71.80%
Test Loss: 0.9198, Test Acc: 81.26%
✓ Best model saved! (Acc: 81.26%)

Epoch 47/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.11, acc=72.3]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 1.1132, Train Acc: 72.31%
Test Loss: 0.9252, Test Acc: 81.10%

Epoch 48/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.11, acc=72.3]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.62it/s]


Train Loss: 1.1134, Train Acc: 72.27%
Test Loss: 0.9194, Test Acc: 81.38%
✓ Best model saved! (Acc: 81.38%)

Epoch 49/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.1, acc=72.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.51it/s]


Train Loss: 1.1025, Train Acc: 72.76%
Test Loss: 0.9185, Test Acc: 81.36%

Epoch 50/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.1, acc=72.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.89it/s]


Train Loss: 1.0995, Train Acc: 72.83%
Test Loss: 0.9209, Test Acc: 81.24%

Epoch 51/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.09, acc=72.9]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.70it/s]


Train Loss: 1.0937, Train Acc: 72.89%
Test Loss: 0.9207, Test Acc: 81.26%

Epoch 52/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=1.09, acc=73.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.43it/s]


Train Loss: 1.0855, Train Acc: 73.66%
Test Loss: 0.9128, Test Acc: 81.95%
✓ Best model saved! (Acc: 81.95%)

Epoch 53/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.08, acc=73.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.88it/s]


Train Loss: 1.0771, Train Acc: 73.62%
Test Loss: 0.9050, Test Acc: 82.24%
✓ Best model saved! (Acc: 82.24%)

Epoch 54/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.08, acc=74]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 1.0784, Train Acc: 74.04%
Test Loss: 0.9197, Test Acc: 81.51%

Epoch 55/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.07, acc=74.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.43it/s]


Train Loss: 1.0669, Train Acc: 74.42%
Test Loss: 0.9017, Test Acc: 82.55%
✓ Best model saved! (Acc: 82.55%)

Epoch 56/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.06, acc=74.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.53it/s]


Train Loss: 1.0637, Train Acc: 74.37%
Test Loss: 0.8815, Test Acc: 82.83%
✓ Best model saved! (Acc: 82.83%)

Epoch 57/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.06, acc=74.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.80it/s]


Train Loss: 1.0590, Train Acc: 74.57%
Test Loss: 0.9023, Test Acc: 81.99%

Epoch 58/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.06, acc=74.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.81it/s]


Train Loss: 1.0558, Train Acc: 74.92%
Test Loss: 0.8883, Test Acc: 82.71%

Epoch 59/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=1.05, acc=75.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.56it/s]


Train Loss: 1.0459, Train Acc: 75.39%
Test Loss: 0.8799, Test Acc: 82.82%

Epoch 60/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=1.05, acc=75.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.95it/s]


Train Loss: 1.0463, Train Acc: 75.36%
Test Loss: 0.8700, Test Acc: 83.51%
✓ Best model saved! (Acc: 83.51%)

Epoch 61/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.04, acc=75.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 1.0430, Train Acc: 75.37%
Test Loss: 0.8660, Test Acc: 83.67%
✓ Best model saved! (Acc: 83.67%)

Epoch 62/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.03, acc=76]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.64it/s]


Train Loss: 1.0317, Train Acc: 76.01%
Test Loss: 0.8618, Test Acc: 84.15%
✓ Best model saved! (Acc: 84.15%)

Epoch 63/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=1.03, acc=76]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.44it/s]


Train Loss: 1.0299, Train Acc: 75.96%
Test Loss: 0.8667, Test Acc: 84.05%

Epoch 64/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1.02, acc=76.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.88it/s]


Train Loss: 1.0231, Train Acc: 76.23%
Test Loss: 0.8857, Test Acc: 83.23%

Epoch 65/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.02, acc=76.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.89it/s]


Train Loss: 1.0169, Train Acc: 76.39%
Test Loss: 0.8625, Test Acc: 83.92%

Epoch 66/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.01, acc=77]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.52it/s]


Train Loss: 1.0118, Train Acc: 76.99%
Test Loss: 0.8557, Test Acc: 84.58%
✓ Best model saved! (Acc: 84.58%)

Epoch 67/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=1.01, acc=76.9]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.61it/s]


Train Loss: 1.0086, Train Acc: 76.90%
Test Loss: 0.8700, Test Acc: 83.96%

Epoch 68/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=1, acc=77.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.89it/s]


Train Loss: 1.0016, Train Acc: 77.24%
Test Loss: 0.8711, Test Acc: 83.68%

Epoch 69/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.997, acc=77.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.77it/s]


Train Loss: 0.9968, Train Acc: 77.50%
Test Loss: 0.8548, Test Acc: 84.51%

Epoch 70/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.05it/s, loss=0.996, acc=77.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.35it/s]


Train Loss: 0.9964, Train Acc: 77.57%
Test Loss: 0.8346, Test Acc: 85.23%
✓ Best model saved! (Acc: 85.23%)

Epoch 71/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.991, acc=77.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.70it/s]


Train Loss: 0.9915, Train Acc: 77.66%
Test Loss: 0.8268, Test Acc: 85.47%
✓ Best model saved! (Acc: 85.47%)

Epoch 72/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.987, acc=78]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.85it/s]


Train Loss: 0.9873, Train Acc: 78.02%
Test Loss: 0.8307, Test Acc: 85.69%
✓ Best model saved! (Acc: 85.69%)

Epoch 73/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.977, acc=78.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.68it/s]


Train Loss: 0.9774, Train Acc: 78.42%
Test Loss: 0.8351, Test Acc: 85.27%

Epoch 74/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.973, acc=78.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.38it/s]


Train Loss: 0.9734, Train Acc: 78.53%
Test Loss: 0.8526, Test Acc: 84.71%

Epoch 75/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.972, acc=78.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 0.9718, Train Acc: 78.66%
Test Loss: 0.8300, Test Acc: 85.69%

Epoch 76/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.962, acc=79.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.82it/s]


Train Loss: 0.9622, Train Acc: 79.08%
Test Loss: 0.8232, Test Acc: 85.67%

Epoch 77/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.962, acc=79.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.46it/s]


Train Loss: 0.9616, Train Acc: 79.19%
Test Loss: 0.8308, Test Acc: 85.57%

Epoch 78/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.95, acc=79.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 0.9497, Train Acc: 79.80%
Test Loss: 0.8209, Test Acc: 85.72%
✓ Best model saved! (Acc: 85.72%)

Epoch 79/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.947, acc=79.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.88it/s]


Train Loss: 0.9470, Train Acc: 79.65%
Test Loss: 0.8195, Test Acc: 86.35%
✓ Best model saved! (Acc: 86.35%)

Epoch 80/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.943, acc=80.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.55it/s]


Train Loss: 0.9431, Train Acc: 80.06%
Test Loss: 0.8207, Test Acc: 86.37%
✓ Best model saved! (Acc: 86.37%)

Epoch 81/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.94, acc=80.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.47it/s]


Train Loss: 0.9402, Train Acc: 80.07%
Test Loss: 0.8119, Test Acc: 86.72%
✓ Best model saved! (Acc: 86.72%)

Epoch 82/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.938, acc=80.3]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 0.9382, Train Acc: 80.28%
Test Loss: 0.8320, Test Acc: 85.59%

Epoch 83/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.931, acc=80.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.61it/s]


Train Loss: 0.9313, Train Acc: 80.44%
Test Loss: 0.8116, Test Acc: 86.81%
✓ Best model saved! (Acc: 86.81%)

Epoch 84/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.05it/s, loss=0.927, acc=80.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.48it/s]


Train Loss: 0.9266, Train Acc: 80.69%
Test Loss: 0.8231, Test Acc: 86.29%

Epoch 85/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.919, acc=81.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.45it/s]


Train Loss: 0.9190, Train Acc: 81.07%
Test Loss: 0.8033, Test Acc: 86.87%
✓ Best model saved! (Acc: 86.87%)

Epoch 86/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.922, acc=80.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.84it/s]


Train Loss: 0.9216, Train Acc: 80.93%
Test Loss: 0.8200, Test Acc: 86.28%

Epoch 87/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.919, acc=81]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.67it/s]


Train Loss: 0.9187, Train Acc: 80.98%
Test Loss: 0.8229, Test Acc: 86.23%

Epoch 88/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.913, acc=81.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.26it/s]


Train Loss: 0.9130, Train Acc: 81.42%
Test Loss: 0.8188, Test Acc: 86.62%

Epoch 89/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.908, acc=81.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.42it/s]


Train Loss: 0.9082, Train Acc: 81.70%
Test Loss: 0.8088, Test Acc: 86.91%
✓ Best model saved! (Acc: 86.91%)

Epoch 90/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.909, acc=81.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.78it/s]


Train Loss: 0.9092, Train Acc: 81.57%
Test Loss: 0.8121, Test Acc: 86.69%

Epoch 91/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.901, acc=81.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.65it/s]


Train Loss: 0.9012, Train Acc: 81.83%
Test Loss: 0.8125, Test Acc: 87.21%
✓ Best model saved! (Acc: 87.21%)

Epoch 92/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.896, acc=82]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.88it/s]


Train Loss: 0.8960, Train Acc: 82.02%
Test Loss: 0.8131, Test Acc: 86.74%

Epoch 93/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.9, acc=82]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.76it/s]


Train Loss: 0.9003, Train Acc: 81.97%
Test Loss: 0.8002, Test Acc: 87.22%
✓ Best model saved! (Acc: 87.22%)

Epoch 94/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.885, acc=82.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.66it/s]


Train Loss: 0.8851, Train Acc: 82.62%
Test Loss: 0.8165, Test Acc: 86.79%

Epoch 95/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.885, acc=82.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.43it/s]


Train Loss: 0.8854, Train Acc: 82.53%
Test Loss: 0.8185, Test Acc: 86.86%

Epoch 96/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.882, acc=82.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.67it/s]


Train Loss: 0.8817, Train Acc: 82.76%
Test Loss: 0.8095, Test Acc: 86.94%

Epoch 97/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.881, acc=82.9]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.89it/s]


Train Loss: 0.8806, Train Acc: 82.90%
Test Loss: 0.8074, Test Acc: 87.34%
✓ Best model saved! (Acc: 87.34%)

Epoch 98/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.874, acc=83.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.50it/s]


Train Loss: 0.8742, Train Acc: 83.18%
Test Loss: 0.8145, Test Acc: 87.02%

Epoch 99/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.871, acc=83.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.33it/s]


Train Loss: 0.8711, Train Acc: 83.17%
Test Loss: 0.8062, Test Acc: 87.22%

Epoch 100/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.871, acc=83.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.26it/s]


Train Loss: 0.8713, Train Acc: 83.15%
Test Loss: 0.7930, Test Acc: 87.69%
✓ Best model saved! (Acc: 87.69%)

Epoch 101/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.866, acc=83.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.79it/s]


Train Loss: 0.8656, Train Acc: 83.52%
Test Loss: 0.8038, Test Acc: 87.30%

Epoch 102/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.859, acc=83.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.45it/s]


Train Loss: 0.8594, Train Acc: 83.73%
Test Loss: 0.8061, Test Acc: 87.44%

Epoch 103/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.861, acc=83.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.48it/s]


Train Loss: 0.8612, Train Acc: 83.67%
Test Loss: 0.8060, Test Acc: 87.58%

Epoch 104/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.854, acc=84.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 0.8536, Train Acc: 84.11%
Test Loss: 0.8044, Test Acc: 87.49%

Epoch 105/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.852, acc=84.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.64it/s]


Train Loss: 0.8519, Train Acc: 84.10%
Test Loss: 0.8045, Test Acc: 87.77%
✓ Best model saved! (Acc: 87.77%)

Epoch 106/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.848, acc=84.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.52it/s]


Train Loss: 0.8479, Train Acc: 84.22%
Test Loss: 0.8105, Test Acc: 87.63%

Epoch 107/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.846, acc=84.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.91it/s]


Train Loss: 0.8463, Train Acc: 84.38%
Test Loss: 0.8031, Test Acc: 87.53%

Epoch 108/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.844, acc=84.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.91it/s]


Train Loss: 0.8440, Train Acc: 84.45%
Test Loss: 0.8132, Test Acc: 87.64%

Epoch 109/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.84, acc=84.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.71it/s]


Train Loss: 0.8396, Train Acc: 84.70%
Test Loss: 0.7954, Test Acc: 87.76%

Epoch 110/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.838, acc=84.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.43it/s]


Train Loss: 0.8375, Train Acc: 84.64%
Test Loss: 0.8003, Test Acc: 87.62%

Epoch 111/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.835, acc=84.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.83it/s]


Train Loss: 0.8351, Train Acc: 84.78%
Test Loss: 0.8021, Test Acc: 87.77%

Epoch 112/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.829, acc=85.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.82it/s]


Train Loss: 0.8293, Train Acc: 85.07%
Test Loss: 0.7921, Test Acc: 88.13%
✓ Best model saved! (Acc: 88.13%)

Epoch 113/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.829, acc=85.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.57it/s]


Train Loss: 0.8288, Train Acc: 85.13%
Test Loss: 0.7946, Test Acc: 88.10%

Epoch 114/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.822, acc=85.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.42it/s]


Train Loss: 0.8219, Train Acc: 85.52%
Test Loss: 0.8022, Test Acc: 87.82%

Epoch 115/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.82, acc=85.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.83it/s]


Train Loss: 0.8203, Train Acc: 85.63%
Test Loss: 0.7876, Test Acc: 88.33%
✓ Best model saved! (Acc: 88.33%)

Epoch 116/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.821, acc=85.5]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.91it/s]


Train Loss: 0.8209, Train Acc: 85.53%
Test Loss: 0.8038, Test Acc: 87.53%

Epoch 117/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.819, acc=85.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.49it/s]


Train Loss: 0.8189, Train Acc: 85.49%
Test Loss: 0.7942, Test Acc: 88.56%
✓ Best model saved! (Acc: 88.56%)

Epoch 118/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.06it/s, loss=0.815, acc=85.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.62it/s]


Train Loss: 0.8151, Train Acc: 85.81%
Test Loss: 0.7921, Test Acc: 88.03%

Epoch 119/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.811, acc=86.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.76it/s]


Train Loss: 0.8112, Train Acc: 86.10%
Test Loss: 0.8090, Test Acc: 87.58%

Epoch 120/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.806, acc=86.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.89it/s]


Train Loss: 0.8061, Train Acc: 86.23%
Test Loss: 0.7994, Test Acc: 88.36%

Epoch 121/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.803, acc=86.2]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.49it/s]


Train Loss: 0.8030, Train Acc: 86.25%
Test Loss: 0.7951, Test Acc: 88.31%

Epoch 122/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.05it/s, loss=0.8, acc=86.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.57it/s]


Train Loss: 0.7999, Train Acc: 86.50%
Test Loss: 0.7965, Test Acc: 88.30%

Epoch 123/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.801, acc=86.5]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Train Loss: 0.8010, Train Acc: 86.49%
Test Loss: 0.8049, Test Acc: 87.97%

Epoch 124/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.795, acc=86.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.39it/s]


Train Loss: 0.7947, Train Acc: 86.79%
Test Loss: 0.8003, Test Acc: 88.41%

Epoch 125/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.794, acc=86.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.71it/s]


Train Loss: 0.7938, Train Acc: 86.78%
Test Loss: 0.7927, Test Acc: 88.50%

Epoch 126/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.799, acc=86.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.44it/s]


Train Loss: 0.7989, Train Acc: 86.54%
Test Loss: 0.7936, Test Acc: 88.31%

Epoch 127/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.792, acc=86.8]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 0.7921, Train Acc: 86.81%
Test Loss: 0.8023, Test Acc: 88.26%

Epoch 128/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.785, acc=87.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.90it/s]


Train Loss: 0.7854, Train Acc: 87.07%
Test Loss: 0.7973, Test Acc: 88.34%

Epoch 129/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.786, acc=87.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.73it/s]


Train Loss: 0.7857, Train Acc: 87.07%
Test Loss: 0.7880, Test Acc: 88.75%
✓ Best model saved! (Acc: 88.75%)

Epoch 130/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.782, acc=87.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Train Loss: 0.7824, Train Acc: 87.21%
Test Loss: 0.7944, Test Acc: 88.66%

Epoch 131/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.778, acc=87.4]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.84it/s]


Train Loss: 0.7782, Train Acc: 87.38%
Test Loss: 0.8081, Test Acc: 88.22%

Epoch 132/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.777, acc=87.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.78it/s]


Train Loss: 0.7768, Train Acc: 87.70%
Test Loss: 0.7887, Test Acc: 88.67%

Epoch 133/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.775, acc=87.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.63it/s]


Train Loss: 0.7751, Train Acc: 87.75%
Test Loss: 0.7830, Test Acc: 88.96%
✓ Best model saved! (Acc: 88.96%)

Epoch 134/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.774, acc=87.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.94it/s]


Train Loss: 0.7736, Train Acc: 87.63%
Test Loss: 0.7928, Test Acc: 88.53%

Epoch 135/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.775, acc=87.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.74it/s]


Train Loss: 0.7750, Train Acc: 87.65%
Test Loss: 0.7896, Test Acc: 88.76%

Epoch 136/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.77, acc=87.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.54it/s]


Train Loss: 0.7695, Train Acc: 87.84%
Test Loss: 0.7944, Test Acc: 88.66%

Epoch 137/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.763, acc=88.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Train Loss: 0.7633, Train Acc: 88.23%
Test Loss: 0.7944, Test Acc: 88.73%

Epoch 138/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.08it/s, loss=0.766, acc=88]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.80it/s]


Train Loss: 0.7658, Train Acc: 88.04%
Test Loss: 0.8052, Test Acc: 88.41%

Epoch 139/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.765, acc=87.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.54it/s]


Train Loss: 0.7653, Train Acc: 87.81%
Test Loss: 0.7972, Test Acc: 88.78%

Epoch 140/200


Training: 100%|██████████| 391/391 [01:17<00:00,  5.07it/s, loss=0.76, acc=88.3]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.94it/s]


Train Loss: 0.7601, Train Acc: 88.31%
Test Loss: 0.7869, Test Acc: 89.00%
✓ Best model saved! (Acc: 89.00%)

Epoch 141/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.761, acc=88.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.82it/s]


Train Loss: 0.7613, Train Acc: 88.22%
Test Loss: 0.7909, Test Acc: 88.75%

Epoch 142/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.754, acc=88.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.75it/s]


Train Loss: 0.7537, Train Acc: 88.72%
Test Loss: 0.7936, Test Acc: 88.87%

Epoch 143/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.11it/s, loss=0.753, acc=88.5]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.96it/s]


Train Loss: 0.7532, Train Acc: 88.45%
Test Loss: 0.8075, Test Acc: 88.36%

Epoch 144/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.754, acc=88.6]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.88it/s]


Train Loss: 0.7540, Train Acc: 88.56%
Test Loss: 0.8050, Test Acc: 88.43%

Epoch 145/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.751, acc=88.7]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.72it/s]


Train Loss: 0.7512, Train Acc: 88.65%
Test Loss: 0.7895, Test Acc: 89.12%
✓ Best model saved! (Acc: 89.12%)

Epoch 146/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.747, acc=88.9]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.61it/s]


Train Loss: 0.7474, Train Acc: 88.86%
Test Loss: 0.7977, Test Acc: 88.91%

Epoch 147/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.745, acc=89]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.93it/s]


Train Loss: 0.7452, Train Acc: 88.99%
Test Loss: 0.7868, Test Acc: 89.06%

Epoch 148/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.748, acc=88.8]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.76it/s]


Train Loss: 0.7481, Train Acc: 88.80%
Test Loss: 0.7943, Test Acc: 88.91%

Epoch 149/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.741, acc=89.1]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.59it/s]


Train Loss: 0.7415, Train Acc: 89.10%
Test Loss: 0.7994, Test Acc: 88.80%

Epoch 150/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.742, acc=89.1]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.95it/s]


Train Loss: 0.7416, Train Acc: 89.12%
Test Loss: 0.8001, Test Acc: 88.77%

Epoch 151/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.743, acc=89.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.87it/s]


Train Loss: 0.7425, Train Acc: 89.24%
Test Loss: 0.8037, Test Acc: 88.55%

Epoch 152/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.08it/s, loss=0.74, acc=89.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.83it/s]


Train Loss: 0.7398, Train Acc: 89.18%
Test Loss: 0.7853, Test Acc: 89.10%

Epoch 153/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.739, acc=89.2]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.84it/s]


Train Loss: 0.7395, Train Acc: 89.25%
Test Loss: 0.7880, Test Acc: 89.26%
✓ Best model saved! (Acc: 89.26%)

Epoch 154/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.10it/s, loss=0.735, acc=89.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.78it/s]


Train Loss: 0.7350, Train Acc: 89.37%
Test Loss: 0.7863, Test Acc: 89.20%

Epoch 155/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.732, acc=89.5]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.69it/s]


Train Loss: 0.7325, Train Acc: 89.55%
Test Loss: 0.7933, Test Acc: 89.05%

Epoch 156/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.737, acc=89.4]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.60it/s]


Train Loss: 0.7366, Train Acc: 89.45%
Test Loss: 0.7818, Test Acc: 89.33%
✓ Best model saved! (Acc: 89.33%)

Epoch 157/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.73, acc=89.7]
Evaluating: 100%|██████████| 79/79 [00:04<00:00, 15.91it/s]


Train Loss: 0.7299, Train Acc: 89.66%
Test Loss: 0.7920, Test Acc: 89.23%

Epoch 158/200


Training: 100%|██████████| 391/391 [01:16<00:00,  5.09it/s, loss=0.732, acc=89.6]
Evaluating: 100%|██████████| 79/79 [00:05<00:00, 15.68it/s]


Train Loss: 0.7318, Train Acc: 89.57%
Test Loss: 0.7823, Test Acc: 89.39%
✓ Best model saved! (Acc: 89.39%)

Epoch 159/200


Training:  33%|███▎      | 129/391 [00:25<00:52,  5.01it/s, loss=0.731, acc=89.6]