In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import v2

FLIP_PROBABILITY = 0.1

data_transform = v2.Compose([
    v2.Resize(size=(224,224)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=(-10,10)),
    v2.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
    v2.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    v2.ToTensor(),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Add normalization
])



In [4]:
train_data = datasets.ImageFolder(root="/content/drive/MyDrive/dataset_v3/train",
                                  transform=data_transform,
                                  target_transform=None)
test_data = datasets.ImageFolder(root="/content/drive/MyDrive/dataset_v3/test",
                                  transform=data_transform)
class_names = train_data.classes

In [5]:
from sklearn.utils import resample

def balance_dataset(dataset, size):
    # Separate data by class
    class_data = {i: [] for i in range(8)}  # For 8 classes

    # Group data by class
    for idx in range(len(dataset)):
        _, label = dataset[idx]
        class_data[label].append(idx)

    # Randomly sample indices from each class
    balanced_indices = []
    for class_idx, indices in class_data.items():
        # If class has more samples than min_size, downsample it
        if len(indices) > size:
            balanced_indices.extend(np.random.choice(indices, size=size, replace=False))
        else:
            balanced_indices.extend(indices)

    # Create a subset dataset
    from torch.utils.data import Subset
    balanced_dataset = Subset(dataset, balanced_indices)

    return balanced_dataset

# Use it like this:
balanced_train_data = balance_dataset(train_data, 200)

balanced_test_data = balance_dataset(test_data, 50)

In [6]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16
train_dataloader = DataLoader(balanced_train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(balanced_test_data, batch_size=BATCH_SIZE, shuffle=False)

In [7]:
train_transform = v2.Compose([
    v2.Resize(size=(224,224)),
    v2.TrivialAugmentWide(num_magnitude_bins=5),
    v2.ToTensor()
])

test_transform = v2.Compose([
    v2.Resize(size=(224,224)),
    v2.ToTensor()
])



In [8]:
import torch
import torch.nn as nn

class PatchEmbed(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) ** 2

        self.proj = nn.Conv2d(
            in_channels,
            embed_dim,
            kernel_size=patch_size,
            stride=patch_size
        )

    def forward(self, x):
        x = self.proj(x)  # (B, E, H', W')
        x = x.flatten(2)  # (B, E, N)
        x = x.transpose(1, 2)  # (B, N, E)
        return x

In [9]:
class Attention(nn.Module):
    def __init__(self, dim, n_heads=12, qkv_bias=True, attn_drop=0.1, proj_drop=0.1):
        super().__init__()
        self.n_heads = n_heads
        self.scale = (dim // n_heads) ** -0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.n_heads, C // self.n_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

In [10]:
class MLP(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, drop=0.1):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = nn.GELU()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

In [11]:
class Block(nn.Module):
    def __init__(self, dim, n_heads, mlp_ratio=4., qkv_bias=True, drop=0.1, attn_drop=0.1):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = Attention(dim, n_heads=n_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim, int(dim * mlp_ratio), dim, drop)

    def forward(self, x):
        x = x + self.attn(self.norm1(x))
        x = x + self.mlp(self.norm2(x))
        return x

In [12]:
class VisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=len(class_names),
                 embed_dim=768, depth=12, n_heads=12, mlp_ratio=4., qkv_bias=True,
                 drop_rate=0.1, attn_drop_rate=0.1):
        super().__init__()
        self.patch_embed = PatchEmbed(img_size, patch_size, in_channels, embed_dim)
        num_patches = self.patch_embed.n_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        torch.nn.init.trunc_normal_(self.pos_embed, std=0.02)
        torch.nn.init.trunc_normal_(self.cls_token, std=0.02)
        self.pos_drop = nn.Dropout(drop_rate)

        self.blocks = nn.Sequential(*[
            Block(embed_dim, n_heads, mlp_ratio, qkv_bias, drop_rate, attn_drop_rate)
            for _ in range(depth)
        ])

        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_token = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_token, x), dim=1)
        x = x + self.pos_embed
        x = self.pos_drop(x)

        x = self.blocks(x)
        x = self.norm(x)

        x = x[:, 0]  # take cls token
        x = self.head(x)
        return x

In [13]:
import torch
from torch.nn import functional as F
from tqdm import tqdm
import numpy as np
from sklearn.metrics import accuracy_score

class AverageMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    losses = AverageMeter()
    progress_bar = tqdm(train_loader, desc='Training')

    all_predictions = []
    all_labels = []

    for batch in progress_bar:
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        losses.update(loss.item(), images.size(0))
        progress_bar.set_postfix({'train_loss': f'{losses.avg:.4f}'})

        predictions = torch.argmax(outputs, dim=1)
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    epoch_accuracy = accuracy_score(all_labels, all_predictions)
    return losses.avg, epoch_accuracy

def evaluate(model, test_loader, criterion, device):
    model.eval()
    losses = AverageMeter()

    all_predictions = []
    all_labels = []

    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc='Testing')
        for batch in progress_bar:
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            losses.update(loss.item(), images.size(0))
            progress_bar.set_postfix({'test_loss': f'{losses.avg:.4f}'})

            predictions = torch.argmax(outputs, dim=1)
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_predictions)
    return losses.avg, accuracy

def train_model(model, train_loader, test_loader, num_epochs=100):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=5e-5,
        weight_decay=0.02,
        betas=(0.9, 0.999)
    )
    steps_per_epoch = len(train_loader)
    total_steps = steps_per_epoch * num_epochs
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=5e-4,
        total_steps=total_steps,
        pct_start=0.2,
        anneal_strategy='cos',
        cycle_momentum=True,
        base_momentum=0.85,
        max_momentum=0.95,
        div_factor=10.0,
        final_div_factor=1000.0
    )

    # Add tracking for best model
    best_accuracy = 0.0
    best_model_state = None
    best_epoch = 0
    best_test_loss = float('inf')

    print("Starting training...")
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        train_loss, train_accuracy = train_epoch(
            model, train_loader, criterion, optimizer, device
        )
        test_loss, test_accuracy = evaluate(
            model, test_loader, criterion, device
        )
        scheduler.step()

        # Save best model
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_test_loss = test_loss
            best_model_state = model.state_dict().copy()
            best_epoch = epoch + 1
            print(f"New best model saved! Accuracy: {best_accuracy:.4f}")

        if epoch % 5 == 0:
            print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f}")
            print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f}")

    # Load best model before returning
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\nLoaded best model from epoch {best_epoch}")
        print(f"Best Test Metrics:")
        print(f"Test Loss: {best_test_loss:.4f}")
        print(f"Test Accuracy: {best_accuracy:.4f}")

    return model, best_accuracy

# Initialize model
model = VisionTransformer(
    img_size=224,
    patch_size=16,
    in_channels=3,
    num_classes=len(class_names),  # set this to your number of classes
    embed_dim=384,
    depth=8,
    n_heads=8,
    mlp_ratio=4,
    qkv_bias=True,
    drop_rate=0.15
)

# Train the model
trained_model, best_accuracy = train_model(
    model,
    train_dataloader,
    test_dataloader,
    num_epochs=100
)
print(f"\nTraining completed!")
print(f"Best test accuracy achieved: {best_accuracy:.4f}")

Starting training...

Epoch 1/100


Training: 100%|██████████| 64/64 [00:21<00:00,  2.95it/s, train_loss=2.0345]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.45it/s, test_loss=2.0483]


New best model saved! Accuracy: 0.2355
Train Loss: 2.0345 | Train Accuracy: 0.2434
Test Loss: 2.0483 | Test Accuracy: 0.2355

Epoch 2/100


Training: 100%|██████████| 64/64 [00:22<00:00,  2.90it/s, train_loss=1.9094]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.40it/s, test_loss=1.9282]


New best model saved! Accuracy: 0.3050

Epoch 3/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.09it/s, train_loss=1.8563]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.44it/s, test_loss=1.8775]


New best model saved! Accuracy: 0.3205

Epoch 4/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=1.8143]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.63it/s, test_loss=1.8875]


New best model saved! Accuracy: 0.3475

Epoch 5/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.23it/s, train_loss=1.7891]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.34it/s, test_loss=1.8458]


New best model saved! Accuracy: 0.3514

Epoch 6/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s, train_loss=1.7674]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.44it/s, test_loss=1.7804]


New best model saved! Accuracy: 0.3745
Train Loss: 1.7674 | Train Accuracy: 0.3685
Test Loss: 1.7804 | Test Accuracy: 0.3745

Epoch 7/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=1.7190]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.31it/s, test_loss=1.7405]


New best model saved! Accuracy: 0.4054

Epoch 8/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=1.6357]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.65it/s, test_loss=1.6950]


New best model saved! Accuracy: 0.4208

Epoch 9/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.20it/s, train_loss=1.6430]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.49it/s, test_loss=1.7008]


New best model saved! Accuracy: 0.4324

Epoch 10/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=1.6056]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.44it/s, test_loss=1.7578]



Epoch 11/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.09it/s, train_loss=1.5594]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.37it/s, test_loss=1.7121]


Train Loss: 1.5594 | Train Accuracy: 0.4761
Test Loss: 1.7121 | Test Accuracy: 0.4131

Epoch 12/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=1.5521]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.61it/s, test_loss=1.6126]


New best model saved! Accuracy: 0.4710

Epoch 13/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=1.4868]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.27it/s, test_loss=1.7490]



Epoch 14/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=1.4256]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.35it/s, test_loss=1.5883]



Epoch 15/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=1.4609]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.42it/s, test_loss=1.5302]



Epoch 16/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=1.4241]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.84it/s, test_loss=1.6678]


New best model saved! Accuracy: 0.4942
Train Loss: 1.4241 | Train Accuracy: 0.5367
Test Loss: 1.6678 | Test Accuracy: 0.4942

Epoch 17/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=1.3760]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.08it/s, test_loss=1.4599]


New best model saved! Accuracy: 0.5598

Epoch 18/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=1.3356]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.37it/s, test_loss=1.4805]



Epoch 19/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=1.3299]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.50it/s, test_loss=1.5457]



Epoch 20/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=1.3123]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.05it/s, test_loss=1.5359]



Epoch 21/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.20it/s, train_loss=1.2415]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.81it/s, test_loss=1.3806]


New best model saved! Accuracy: 0.5946
Train Loss: 1.2415 | Train Accuracy: 0.6373
Test Loss: 1.3806 | Test Accuracy: 0.5946

Epoch 22/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=1.2690]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.43it/s, test_loss=1.4091]



Epoch 23/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=1.2365]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.33it/s, test_loss=1.3644]


New best model saved! Accuracy: 0.6062

Epoch 24/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=1.2001]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.49it/s, test_loss=1.4609]



Epoch 25/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=1.1622]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.71it/s, test_loss=1.3459]



Epoch 26/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=1.1257]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.21it/s, test_loss=1.4991]


Train Loss: 1.1257 | Train Accuracy: 0.6813
Test Loss: 1.4991 | Test Accuracy: 0.5753

Epoch 27/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s, train_loss=1.0951]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.45it/s, test_loss=1.3837]



Epoch 28/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=1.0775]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.36it/s, test_loss=1.2991]


New best model saved! Accuracy: 0.6564

Epoch 29/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=1.0842]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.81it/s, test_loss=1.2648]



Epoch 30/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=1.0440]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.88it/s, test_loss=1.3632]



Epoch 31/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=1.1087]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.41it/s, test_loss=1.2971]


Train Loss: 1.1087 | Train Accuracy: 0.6989
Test Loss: 1.2971 | Test Accuracy: 0.6448

Epoch 32/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.09it/s, train_loss=1.0515]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.55it/s, test_loss=1.3490]



Epoch 33/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=1.0431]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.19it/s, test_loss=1.3646]



Epoch 34/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=1.0123]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.72it/s, test_loss=1.2202]



Epoch 35/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=1.0421]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.25it/s, test_loss=1.2090]


New best model saved! Accuracy: 0.6834

Epoch 36/100


Training: 100%|██████████| 64/64 [00:22<00:00,  2.88it/s, train_loss=1.0060]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.10it/s, test_loss=1.5028]


Train Loss: 1.0060 | Train Accuracy: 0.7595
Test Loss: 1.5028 | Test Accuracy: 0.5792

Epoch 37/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.07it/s, train_loss=0.9752]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.43it/s, test_loss=1.2287]



Epoch 38/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.14it/s, train_loss=1.0061]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.00it/s, test_loss=1.1867]



Epoch 39/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=0.9942]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.70it/s, test_loss=1.2532]



Epoch 40/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.20it/s, train_loss=1.0018]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.46it/s, test_loss=1.3539]



Epoch 41/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.9719]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.33it/s, test_loss=1.2086]


Train Loss: 0.9719 | Train Accuracy: 0.7615
Test Loss: 1.2086 | Test Accuracy: 0.6602

Epoch 42/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=0.9504]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.47it/s, test_loss=1.1916]



Epoch 43/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=0.9457]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.69it/s, test_loss=1.2873]



Epoch 44/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.20it/s, train_loss=0.9486]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.99it/s, test_loss=1.1292]


New best model saved! Accuracy: 0.7104

Epoch 45/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=0.9005]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.48it/s, test_loss=1.1593]



Epoch 46/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.08it/s, train_loss=0.9086]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.51it/s, test_loss=1.3083]


Train Loss: 0.9086 | Train Accuracy: 0.7879
Test Loss: 1.3083 | Test Accuracy: 0.6448

Epoch 47/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.9055]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.88it/s, test_loss=1.1599]



Epoch 48/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=0.9076]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.05it/s, test_loss=1.3521]



Epoch 49/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=0.9112]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.45it/s, test_loss=1.1497]


New best model saved! Accuracy: 0.7220

Epoch 50/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.9169]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.46it/s, test_loss=1.2072]



Epoch 51/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s, train_loss=0.8511]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.11it/s, test_loss=1.0520]


New best model saved! Accuracy: 0.7645
Train Loss: 0.8511 | Train Accuracy: 0.8240
Test Loss: 1.0520 | Test Accuracy: 0.7645

Epoch 52/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=0.8891]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.82it/s, test_loss=1.1421]



Epoch 53/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.8453]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.34it/s, test_loss=1.1470]



Epoch 54/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.8810]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.56it/s, test_loss=1.2755]



Epoch 55/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=0.8886]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.29it/s, test_loss=1.1010]



Epoch 56/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.20it/s, train_loss=0.8751]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.68it/s, test_loss=1.0498]


Train Loss: 0.8751 | Train Accuracy: 0.8143
Test Loss: 1.0498 | Test Accuracy: 0.7375

Epoch 57/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.21it/s, train_loss=0.8843]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.24it/s, test_loss=1.0937]



Epoch 58/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=0.8399]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.37it/s, test_loss=1.1699]



Epoch 59/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=0.8235]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.21it/s, test_loss=1.2127]



Epoch 60/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=0.8098]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.83it/s, test_loss=0.9512]


New best model saved! Accuracy: 0.7838

Epoch 61/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.17it/s, train_loss=0.8333]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.83it/s, test_loss=1.1917]


Train Loss: 0.8333 | Train Accuracy: 0.8289
Test Loss: 1.1917 | Test Accuracy: 0.6873

Epoch 62/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=0.8415]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.45it/s, test_loss=1.2258]



Epoch 63/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=0.8109]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.46it/s, test_loss=1.1911]



Epoch 64/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.17it/s, train_loss=0.8278]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.96it/s, test_loss=1.0143]



Epoch 65/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.20it/s, train_loss=0.8176]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.86it/s, test_loss=1.3385]



Epoch 66/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.8088]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.41it/s, test_loss=1.0063]


Train Loss: 0.8088 | Train Accuracy: 0.8368
Test Loss: 1.0063 | Test Accuracy: 0.7722

Epoch 67/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=0.8288]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.54it/s, test_loss=1.0931]



Epoch 68/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=0.8581]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.06it/s, test_loss=1.2650]



Epoch 69/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.7930]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.71it/s, test_loss=1.0751]



Epoch 70/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=0.8274]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.29it/s, test_loss=1.1300]



Epoch 71/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=0.7998]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.48it/s, test_loss=1.2363]


Train Loss: 0.7998 | Train Accuracy: 0.8446
Test Loss: 1.2363 | Test Accuracy: 0.6834

Epoch 72/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, train_loss=0.8188]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.60it/s, test_loss=1.0969]



Epoch 73/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.20it/s, train_loss=0.7555]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.64it/s, test_loss=1.0529]



Epoch 74/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.17it/s, train_loss=0.7592]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.96it/s, test_loss=1.0477]



Epoch 75/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=0.7586]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.47it/s, test_loss=1.2458]



Epoch 76/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.8102]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.46it/s, test_loss=1.0691]


Train Loss: 0.8102 | Train Accuracy: 0.8514
Test Loss: 1.0691 | Test Accuracy: 0.7490

Epoch 77/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.17it/s, train_loss=0.7865]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.97it/s, test_loss=0.9559]



Epoch 78/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.7904]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.73it/s, test_loss=1.0829]



Epoch 79/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=0.7574]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.44it/s, test_loss=1.0674]



Epoch 80/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.7841]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.47it/s, test_loss=1.0919]



Epoch 81/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.10it/s, train_loss=0.7596]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.53it/s, test_loss=1.0980]


Train Loss: 0.7596 | Train Accuracy: 0.8641
Test Loss: 1.0980 | Test Accuracy: 0.7375

Epoch 82/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.20it/s, train_loss=0.7728]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.60it/s, test_loss=1.0262]



Epoch 83/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.7575]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.18it/s, test_loss=1.1199]



Epoch 84/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.16it/s, train_loss=0.7672]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.43it/s, test_loss=1.1867]



Epoch 85/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.09it/s, train_loss=0.7409]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.46it/s, test_loss=1.0072]



Epoch 86/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=0.7500]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.86it/s, test_loss=1.3100]


Train Loss: 0.7500 | Train Accuracy: 0.8749
Test Loss: 1.3100 | Test Accuracy: 0.6757

Epoch 87/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.20it/s, train_loss=0.7505]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.99it/s, test_loss=1.1235]



Epoch 88/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=0.7335]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.49it/s, test_loss=1.0898]



Epoch 89/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s, train_loss=0.7302]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.47it/s, test_loss=1.0554]



Epoch 90/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=0.7575]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.98it/s, test_loss=1.2029]



Epoch 91/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=0.7430]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.85it/s, test_loss=1.0518]


Train Loss: 0.7430 | Train Accuracy: 0.8788
Test Loss: 1.0518 | Test Accuracy: 0.7568

Epoch 92/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, train_loss=0.7596]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.53it/s, test_loss=1.3516]



Epoch 93/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.09it/s, train_loss=0.7745]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.35it/s, test_loss=1.0959]



Epoch 94/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.13it/s, train_loss=0.6905]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.25it/s, test_loss=0.9997]


New best model saved! Accuracy: 0.8224

Epoch 95/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=0.7414]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.59it/s, test_loss=1.1507]



Epoch 96/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, train_loss=0.7064]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.28it/s, test_loss=1.2012]


Train Loss: 0.7064 | Train Accuracy: 0.8876
Test Loss: 1.2012 | Test Accuracy: 0.6950

Epoch 97/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.15it/s, train_loss=0.7522]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.39it/s, test_loss=1.0006]



Epoch 98/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.11it/s, train_loss=0.7215]
Testing: 100%|██████████| 17/17 [00:03<00:00,  4.49it/s, test_loss=1.0738]



Epoch 99/100


Training: 100%|██████████| 64/64 [00:19<00:00,  3.23it/s, train_loss=0.7190]
Testing: 100%|██████████| 17/17 [00:04<00:00,  3.79it/s, test_loss=1.1110]



Epoch 100/100


Training: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, train_loss=0.7357]
Testing: 100%|██████████| 17/17 [00:04<00:00,  4.15it/s, test_loss=1.1176]


Loaded best model from epoch 94
Best Test Metrics:
Test Loss: 0.9997
Test Accuracy: 0.8224

Training completed!
Best test accuracy achieved: 0.8224



