# Three-Stream Model Experiments (SimCLR + SwAV + DINO)

This notebook implements a multi-stream architecture that fuses features from three different self-supervised learning models:
1. **SimCLR** (ResNet50 backbone)
2. **SwAV** (ResNet50 backbone)
3. **DINO** (ViT-S/16 backbone)

The features are fused using a custom **Self-Attention** mechanism before final classification on the **Ouhands** dataset.

In [9]:
# 1. Import Libraries and Configure Device
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision import transforms
import timm
from tqdm import tqdm
from torch.optim import AdamW
from torch.amp import autocast, GradScaler
import os
import gc

# Set memory management environment variable
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Import custom dataset loader
from ouhands_loader import OuhandsDS

# Setup Device
device = (
    torch.device("cuda") if torch.cuda.is_available()
    else torch.device("mps") if torch.backends.mps.is_available()
    else torch.device("cpu")
)
print("Using device:", device)

Using device: cuda


In [10]:
# 2. Prepare Dataset and DataLoaders

# Define transforms (Standard ImageNet normalization)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

batch_size = 16
num_workers = 0
use_bbox = False

# Instantiate Datasets
train_ds = OuhandsDS(split='train',
                     transform=transform,
                     use_bounding_box=use_bbox,
                     crop_to_bbox=use_bbox)
val_ds = OuhandsDS(split='validation',
                   transform=transform,
                   use_bounding_box=use_bbox,
                   crop_to_bbox=use_bbox)
test_ds = OuhandsDS(split='test',
                    transform=transform,
                    use_bounding_box=use_bbox,
                    crop_to_bbox=use_bbox)

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, 
                          num_workers=num_workers, pin_memory=False)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, 
                        num_workers=num_workers, pin_memory=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, 
                         num_workers=num_workers, pin_memory=False)

print(f"Train size: {len(train_ds)}")
print(f"Val size: {len(val_ds)}")
print(f"Test size: {len(test_ds)}")

Loaded 1600 samples for train split
Class distribution: {'A': 160, 'B': 160, 'C': 160, 'D': 160, 'E': 160, 'F': 160, 'H': 160, 'I': 160, 'J': 160, 'K': 160}
Loaded 400 samples for validation split
Class distribution: {'A': 40, 'B': 40, 'C': 40, 'D': 40, 'E': 40, 'F': 40, 'H': 40, 'I': 40, 'J': 40, 'K': 40}
Loaded 1000 samples for test split
Class distribution: {'A': 100, 'B': 100, 'C': 100, 'D': 100, 'E': 100, 'F': 100, 'H': 100, 'I': 100, 'J': 100, 'K': 100}
Train size: 1600
Val size: 400
Test size: 1000
Loaded 400 samples for validation split
Class distribution: {'A': 40, 'B': 40, 'C': 40, 'D': 40, 'E': 40, 'F': 40, 'H': 40, 'I': 40, 'J': 40, 'K': 40}
Loaded 1000 samples for test split
Class distribution: {'A': 100, 'B': 100, 'C': 100, 'D': 100, 'E': 100, 'F': 100, 'H': 100, 'I': 100, 'J': 100, 'K': 100}
Train size: 1600
Val size: 400
Test size: 1000


In [11]:
# 4. Define Three-Stream Model

class ThreeStreamModel(nn.Module):
    def __init__(self, num_classes=10, common_dim=512, simclr_path=None):
        super(ThreeStreamModel, self).__init__()
        
        # --- Stream 1: SimCLR (ResNet50) ---
        # Load ResNet50 structure
        self.simclr_backbone = models.resnet50(weights=None)
        self.simclr_backbone.fc = nn.Identity() # Remove classification head
        
        if simclr_path and os.path.exists(simclr_path):
            print(f"Loading SimCLR weights from {simclr_path}")
            try:
                checkpoint = torch.load(simclr_path, map_location="cpu")
                # Try to find state_dict
                if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
                    state_dict = checkpoint['state_dict']
                else:
                    state_dict = checkpoint
                
                # Clean up state dict keys
                new_state_dict = {}
                for k, v in state_dict.items():
                    # Remove 'module.' prefix if present (DataParallel)
                    name = k.replace("module.", "")
                    # Remove 'backbone.' prefix if present (common in SimCLR repos)
                    if name.startswith("backbone."):
                        name = name.replace("backbone.", "")
                    # Remove 'resnet.' prefix if present
                    if name.startswith("resnet."):
                        name = name.replace("resnet.", "")
                    new_state_dict[name] = v
                
                msg = self.simclr_backbone.load_state_dict(new_state_dict, strict=False)
                print(f"SimCLR weights loaded: {msg}")
            except Exception as e:
                print(f"Error loading SimCLR weights: {e}")
        else:
            print(f"Warning: SimCLR path {simclr_path} not found. Using random init.")
            
        self.simclr_proj = nn.Linear(2048, common_dim)
        
        # --- Stream 2: SwAV (ResNet50) ---
        # Using torch.hub as requested
        print("Loading SwAV weights from torch.hub (facebookresearch/swav:main)...")
        self.swav_backbone = torch.hub.load('facebookresearch/swav:main', 'resnet50')
        self.swav_backbone.fc = nn.Identity()
        self.swav_proj = nn.Linear(2048, common_dim)
        
        # --- Stream 3: DINO (ViT-S/16) ---
        # Loading from torch hub
        print("Loading DINO weights from torch.hub (facebookresearch/dino:main)...")
        self.dino_backbone = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')
        self.dino_proj = nn.Linear(384, common_dim) # DINO ViT-S/16 has embed dim 384
        
        # --- Fusion & Classification ---
        self.attention = CustomSelfAttention(embed_dim=common_dim, num_heads=8)
        
        # Final classification head
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(3 * common_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        # x shape: (B, 3, H, W)
        
        # Stream 1: SimCLR
        # Use autocast to reduce memory usage
        with torch.cuda.amp.autocast():
            f1 = self.simclr_backbone(x) # (B, 2048)
            p1 = self.simclr_proj(f1)    # (B, common_dim)
            
            # Stream 2: SwAV
            f2 = self.swav_backbone(x)   # (B, 2048)
            p2 = self.swav_proj(f2)      # (B, common_dim)
            
            # Stream 3: DINO
            f3 = self.dino_backbone(x)   # (B, 384)
            p3 = self.dino_proj(f3)      # (B, common_dim)
            
            # Stack features for attention: (B, 3, common_dim)
            stacked_features = torch.stack([p1, p2, p3], dim=1)
            
            # Apply Self-Attention
            attn_out = self.attention(stacked_features) # (B, 3, common_dim)
            
            # Classification
            logits = self.classifier(attn_out)
        
        return logits.float() # Ensure output is float32 for loss calculation

# Initialize Model
simclr_checkpoint_path = r"D:\Courses\Csc2503\proj\CSC2503-Project\notebooks\checkpoint_100"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ThreeStreamModel(num_classes=10, simclr_path=simclr_checkpoint_path).to(device)

print(model)

Loading SimCLR weights from D:\Courses\Csc2503\proj\CSC2503-Project\notebooks\checkpoint_100
SimCLR weights loaded: _IncompatibleKeys(missing_keys=['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.conv3.weight', 'layer1.0.bn3.weight', 'layer1.0.bn3.bias', 'layer1.0.bn3.running_mean', 'layer1.0.bn3.running_var', 'layer1.0.downsample.0.weight', 'layer1.0.downsample.1.weight', 'layer1.0.downsample.1.bias', 'layer1.0.downsample.1.running_mean', 'layer1.0.downsample.1.running_var', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 

  checkpoint = torch.load(simclr_path, map_location="cpu")
Using cache found in C:\Users\24912/.cache\torch\hub\facebookresearch_swav_main


Loading DINO weights from torch.hub (facebookresearch/dino:main)...


Using cache found in C:\Users\24912/.cache\torch\hub\facebookresearch_dino_main


ThreeStreamModel(
  (simclr_backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Seque

In [12]:
# 5. Training and Evaluation Loop

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Clear cache before training
    torch.cuda.empty_cache()
    
    for images, labels in tqdm(loader, desc="Training"):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# Main Training Loop
num_epochs = 20
best_acc = 0.0

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)
    
    scheduler.step()
    
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_three_stream_model.pth")
        print("Saved Best Model!")
        
print(f"Training Complete. Best Validation Accuracy: {best_acc:.2f}%")

# Final Test
model.load_state_dict(torch.load("best_three_stream_model.pth"))
test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"Test Accuracy: {test_acc:.2f}%")

Epoch 1/20


Training:   0%|          | 0/100 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast():
Training: 100%|██████████| 100/100 [00:33<00:00,  2.99it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  2.99it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.35it/s]



Train Loss: 2.3071 | Train Acc: 11.75%
Val Loss: 1.9068 | Val Acc: 25.25%
Saved Best Model!
Epoch 2/20
Saved Best Model!
Epoch 2/20


Training: 100%|██████████| 100/100 [00:32<00:00,  3.04it/s]
Training: 100%|██████████| 100/100 [00:32<00:00,  3.04it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.31it/s]



Train Loss: 1.3593 | Train Acc: 48.31%
Val Loss: 0.7689 | Val Acc: 65.25%
Saved Best Model!
Epoch 3/20
Saved Best Model!
Epoch 3/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.31it/s]



Train Loss: 0.5922 | Train Acc: 77.81%
Val Loss: 0.3035 | Val Acc: 91.00%
Saved Best Model!
Epoch 4/20
Saved Best Model!
Epoch 4/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.14it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.14it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.42it/s]



Train Loss: 0.1983 | Train Acc: 95.06%
Val Loss: 0.1159 | Val Acc: 96.75%
Saved Best Model!
Epoch 5/20
Saved Best Model!
Epoch 5/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.20it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.20it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.33it/s]



Train Loss: 0.0813 | Train Acc: 98.50%
Val Loss: 0.0892 | Val Acc: 97.50%
Saved Best Model!
Epoch 6/20
Saved Best Model!
Epoch 6/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.29it/s]



Train Loss: 0.0489 | Train Acc: 99.12%
Val Loss: 0.0580 | Val Acc: 98.50%
Saved Best Model!
Epoch 7/20
Saved Best Model!
Epoch 7/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.15it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.15it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.37it/s]



Train Loss: 0.0430 | Train Acc: 99.31%
Val Loss: 0.0933 | Val Acc: 97.00%
Epoch 8/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.18it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.18it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.32it/s]



Train Loss: 0.0303 | Train Acc: 99.56%
Val Loss: 0.0684 | Val Acc: 97.25%
Epoch 9/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.37it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.37it/s]


Train Loss: 0.0198 | Train Acc: 99.69%
Val Loss: 0.0804 | Val Acc: 96.75%
Epoch 10/20


Training: 100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
Training: 100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.35it/s]



Train Loss: 0.0234 | Train Acc: 99.56%
Val Loss: 0.0865 | Val Acc: 96.75%
Epoch 11/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.32it/s]



Train Loss: 0.0170 | Train Acc: 99.81%
Val Loss: 0.0597 | Val Acc: 97.75%
Epoch 12/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.38it/s]



Train Loss: 0.0178 | Train Acc: 99.69%
Val Loss: 0.0485 | Val Acc: 98.25%
Epoch 13/20


Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Training: 100%|██████████| 100/100 [00:31<00:00,  3.19it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.32it/s]



Train Loss: 0.0159 | Train Acc: 99.94%
Val Loss: 0.0526 | Val Acc: 98.25%
Epoch 14/20


Training: 100%|██████████| 100/100 [00:32<00:00,  3.11it/s]
Training: 100%|██████████| 100/100 [00:32<00:00,  3.11it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.33it/s]



Train Loss: 0.0142 | Train Acc: 99.88%
Val Loss: 0.0557 | Val Acc: 98.00%
Epoch 15/20


Training: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Evaluating: 100%|██████████| 25/25 [00:06<00:00,  4.08it/s]



Train Loss: 0.0242 | Train Acc: 99.44%
Val Loss: 0.1035 | Val Acc: 97.50%
Epoch 16/20


Training: 100%|██████████| 100/100 [00:33<00:00,  3.00it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  3.00it/s]
Evaluating: 100%|██████████| 25/25 [00:06<00:00,  4.16it/s]



Train Loss: 0.0195 | Train Acc: 99.75%
Val Loss: 0.0543 | Val Acc: 98.25%
Epoch 17/20


Training: 100%|██████████| 100/100 [00:33<00:00,  3.02it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  3.02it/s]
Evaluating: 100%|██████████| 25/25 [00:06<00:00,  4.10it/s]



Train Loss: 0.0328 | Train Acc: 99.06%
Val Loss: 0.1310 | Val Acc: 96.50%
Epoch 18/20


Training: 100%|██████████| 100/100 [00:33<00:00,  2.96it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  2.96it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.22it/s]



Train Loss: 0.0388 | Train Acc: 99.31%
Val Loss: 0.0700 | Val Acc: 97.75%
Epoch 19/20


Training: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Training: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.25it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.25it/s]


Train Loss: 0.0755 | Train Acc: 97.62%
Val Loss: 0.0696 | Val Acc: 98.00%
Epoch 20/20


Training: 100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
Training: 100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
Evaluating: 100%|██████████| 25/25 [00:05<00:00,  4.40it/s]
  model.load_state_dict(torch.load("best_three_stream_model.pth"))

  model.load_state_dict(torch.load("best_three_stream_model.pth"))


Train Loss: 0.0768 | Train Acc: 98.06%
Val Loss: 0.0998 | Val Acc: 96.50%
Training Complete. Best Validation Accuracy: 98.50%


Evaluating: 100%|██████████| 63/63 [00:14<00:00,  4.21it/s]

Test Accuracy: 85.00%





In [13]:
# 6. Calculate Final Metrics (Top-1, Macro-F1, Params, FLOPs)
from sklearn.metrics import f1_score
import numpy as np

def calculate_metrics(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Calculating Metrics"):
            images = images.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())
            
    # Top-1 Accuracy
    top1_acc = np.mean(np.array(all_preds) == np.array(all_labels)) * 100
    
    # Macro-F1
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    
    return top1_acc, macro_f1

# Load best model
if os.path.exists("best_three_stream_model.pth"):
    model.load_state_dict(torch.load("best_three_stream_model.pth"))
    print("Loaded best model for evaluation.")
else:
    print("Warning: Best model file not found. Using current model weights.")

model.to(device)

# Calculate Accuracy and F1
test_acc, test_f1 = calculate_metrics(model, test_loader, device)

# Calculate Parameters
params_m = sum(p.numel() for p in model.parameters()) / 1e6

# Calculate FLOPs (requires thop)
flops_g = "N/A"
try:
    from thop import profile
    # Create a dummy input
    dummy_input = torch.randn(1, 3, 224, 224).to(device)
    flops, params = profile(model, inputs=(dummy_input,), verbose=False)
    flops_g = flops / 1e9
except ImportError:
    print("Library 'thop' not found. To calculate FLOPs, please install it: pip install thop")

print("\n" + "="*30)
print(f"FINAL RESULTS")
print("="*30)
print(f"{'Metric':<15} | {'Value':<10}")
print("-" * 30)
print(f"{'Top-1 Acc (%)':<15} | {test_acc:.2f}")
print(f"{'Macro-F1':<15} | {test_f1:.4f}")
print(f"{'Params (M)':<15} | {params_m:.2f}")
print(f"{'FLOPs (G)':<15} | {flops_g if isinstance(flops_g, str) else f'{flops_g:.2f}'}")
print("="*30)

  model.load_state_dict(torch.load("best_three_stream_model.pth"))


Loaded best model for evaluation.


  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
Calculating Metrics: 100%|██████████| 63/63 [00:15<00:00,  4.11it/s]

  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():



FINAL RESULTS
Metric          | Value     
------------------------------
Top-1 Acc (%)   | 85.00
Macro-F1        | 0.8513
Params (M)      | 73.87
FLOPs (G)       | 12.52
