In [1]:
# Advanced PyTorch ResNet-50 Training Pipeline (From-scratch, >78% recipe)
# -----------------------------------------------------------
# Pure PyTorch implementation with full recipe to reach >78% Top-1 on ImageNet-100
# Features included:
# - Full customizable ResNet-50 (all classes visible and editable)
# - Stochastic Depth (DropPath) in Bottleneck blocks
# - Strong augmentations: RandomResizedCrop, RandAugment, MixUp, CutMix, ColorJitter, RandomErasing
# - Label smoothing
# - AMP (torch.amp) mixed precision training
# - Gradient accumulation to simulate large global batch
# - EMA (Exponential Moving Average) of model weights
# - Cosine LR scheduler with linear warmup
# - Checkpointing and resume support
# - Designed to run on single GPU (RTX 3070 / Colab Free) with sensible defaults

# %%
# 0) Install dependencies (run in Colab/local once)
!pip install --upgrade pip
!pip install torch torchvision datasets --quiet

# %%
# 1) Imports
import os
import math
import time
from pathlib import Path
from typing import Optional

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from datasets import load_dataset

# %%
# 2) Configuration / Hyperparameters (tweakable)
CFG = {
    'data_hf_repo': 'clane9/imagenet-100',
    'data_dir': './imagenet100_hf',
    'num_classes': 100,
    'img_size': 224,
    'batch_size': 32,                # per-step batch size
    'accum_steps': 8,                # accumulation to simulate effective batch (32*8=256)
    'epochs': 120,
    'base_lr': 0.002,                # good starting LR for AdamW with effective batch 256
    'weight_decay': 0.05,
    'opt': 'adamw',                  # 'sgd' or 'adamw'
    'momentum': 0.9,
    'warmup_epochs': 5,
    'min_lr': 1e-6,
    'label_smoothing': 0.1,
    'mixup_alpha': 0.8,
    'cutmix_alpha': 1.0,
    'use_mixup': True,
    'use_cutmix': True,
    'randaugment_n': 2,
    'randaugment_m': 9,
    'reprob': 0.25,                  # random erase prob
    'drop_path_prob': 0.2,           # stochastic depth max probability
    'ema_decay': 0.9999,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'save_dir': './checkpoints',
    'save_every': 5,
}

os.makedirs(CFG['data_dir'], exist_ok=True)
os.makedirs(CFG['save_dir'], exist_ok=True)

# %%
# 3) Prepare dataset from Hugging Face (robust conversion)
print('Loading HuggingFace dataset...')
dataset = load_dataset(CFG['data_hf_repo'])

TRAIN_DIR = Path(CFG['data_dir']) / 'train'
VAL_DIR = Path(CFG['data_dir']) / 'val'

for split_name, split_path in [('train', TRAIN_DIR), ('validation', VAL_DIR)]:
    split_path.mkdir(parents=True, exist_ok=True)
    split_data = dataset[split_name]
    print(f'Converting {split_name}, num_samples={len(split_data)} ->', split_path)
    for idx, item in enumerate(split_data):
        label = item['label']
        label_dir = split_path / str(label)
        label_dir.mkdir(parents=True, exist_ok=True)
        img = item['image']
        # ensure PIL Image
        if not isinstance(img, Image.Image):
            img = Image.fromarray(img)
        img.save(label_dir / f'{idx}.jpg')

print('Dataset conversion done.')


Collecting pip
  Using cached pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Using cached pip-25.2-py3-none-any.whl (1.8 MB)



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: To modify pip, please run the following command:
E:\ML\ERA\s9\venv_RESNET\Scripts\python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Loading HuggingFace dataset...


Resolving data files:   0%|          | 0/17 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/17 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/17 [00:00<?, ?it/s]

Converting train, num_samples=126689 -> imagenet100_hf\train
Converting validation, num_samples=5000 -> imagenet100_hf\val
Dataset conversion done.


In [2]:
import torch

# Check if a CUDA-enabled GPU is available
if torch.cuda.is_available():
    # Get the name of the GPU
    gpu_name = torch.cuda.get_device_name(0)
    print(f"GPU Device: {gpu_name}")
else:
    print("No GPU available.")

GPU Device: NVIDIA GeForce RTX 3070 Laptop GPU


In [3]:

# %%
# 4) Transforms & Dataloaders
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(CFG['img_size']),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
    transforms.RandAugment(num_ops=CFG['randaugment_n'], magnitude=CFG['randaugment_m']),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=CFG['reprob']),
])

val_transform = transforms.Compose([
    transforms.Resize(int(CFG['img_size'] * 256 / 224)),
    transforms.CenterCrop(CFG['img_size']),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class ImageFolderFromHF(Dataset):
    def __init__(self, root, transform=None):
        self.root = Path(root)
        self.transform = transform
        self.samples = []
        for class_dir in sorted([p for p in self.root.iterdir() if p.is_dir()], key=lambda x: int(x.name)):
            label = int(class_dir.name)
            for img_path in class_dir.glob('*.*'):
                self.samples.append((img_path, label))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        p, label = self.samples[idx]
        img = Image.open(p).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

train_ds = ImageFolderFromHF(TRAIN_DIR, transform=train_transform)
val_ds = ImageFolderFromHF(VAL_DIR, transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=CFG['batch_size'], shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=CFG['batch_size'], shuffle=False, num_workers=4, pin_memory=True)

print('Train samples:', len(train_ds), 'Val samples:', len(val_ds))


Train samples: 126689 Val samples: 5000


In [4]:

# %%
# 5) Utilities: Mixup / CutMix / Label Smoothing
import random
from torch.distributions.beta import Beta

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = math.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    # uniform
    cx = random.randint(0, W)
    cy = random.randint(0, H)

    bbx1 = max(0, cx - cut_w // 2)
    bby1 = max(0, cy - cut_h // 2)
    bbx2 = min(W, cx + cut_w // 2)
    bby2 = min(H, cy + cut_h // 2)

    return bbx1, bby1, bbx2, bby2


def mixup_data(x, y, alpha=0.8):
    if alpha > 0:
        lam = Beta(alpha, alpha).sample().item()
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def cutmix_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = Beta(alpha, alpha).sample().item()
    else:
        lam = 1
    index = torch.randperm(x.size(0)).to(x.device)
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bby1:bby2, bbx1:bbx2] = x[index, :, bby1:bby2, bbx1:bbx2]
    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size()[-1] * x.size()[-2]))
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
    def forward(self, x, target):
        logprobs = F.log_softmax(x, dim=-1)
        with torch.no_grad():
            true_dist = torch.zeros_like(logprobs)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * logprobs, dim=-1))


In [5]:

# %%
# 6) Model: ResNet-50 with Stochastic Depth (DropPath)
class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (as in https://arxiv.org/abs/1603.09382)
    """
    def __init__(self, drop_prob: float = 0.0):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob
    def forward(self, x):
        if self.drop_prob == 0. or not self.training:
            return x
        keep_prob = 1 - self.drop_prob
        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
        random_tensor.floor_()
        return x.div(keep_prob) * random_tensor

class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_planes, planes, stride=1, downsample=None, drop_prob=0.0):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.drop_path = DropPath(drop_prob) if drop_prob > 0.0 else nn.Identity()
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.drop_path(out)
        out += identity
        out = self.relu(out)
        return out

class ResNet50Custom(nn.Module):
    def __init__(self, num_classes=100, drop_path_prob=0.0):
        super(ResNet50Custom, self).__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # layers
        self.layer1 = self._make_layer(64, 3, drop_path_prob)
        self.layer2 = self._make_layer(128, 4, drop_path_prob, stride=2)
        self.layer3 = self._make_layer(256, 6, drop_path_prob, stride=2)
        self.layer4 = self._make_layer(512, 3, drop_path_prob, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * Bottleneck.expansion, num_classes)
    def _make_layer(self, planes, blocks, drop_path_prob, stride=1):
        downsample = None
        if stride != 1 or self.in_planes != planes * Bottleneck.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_planes, planes * Bottleneck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * Bottleneck.expansion),
            )
        layers = []
        # linearly scale drop path probability across blocks
        for i in range(blocks):
            prob = drop_path_prob * (i / max(1, blocks - 1))
            if i == 0:
                layers.append(Bottleneck(self.in_planes, planes, stride, downsample, drop_prob=prob))
            else:
                layers.append(Bottleneck(self.in_planes, planes, 1, None, drop_prob=prob))
            self.in_planes = planes * Bottleneck.expansion
        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# %%
# 7) EMA Helper
class ModelEMA:
    """Simple EMA of model parameters"""
    def __init__(self, model, decay=0.9999, device=None):
        self.ema_model = self._clone_model(model)
        self.decay = decay
        self.device = device
        if device:
            self.ema_model.to(device)
        for p in self.ema_model.parameters():
            p.requires_grad_(False)
    def _clone_model(self, model):
        import copy
        m = copy.deepcopy(model)
        return m
    def update(self, model):
        with torch.no_grad():
            msd = model.state_dict()
            for k, ema_v in self.ema_model.state_dict().items():
                model_v = msd[k].detach()
                ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v)
    def state_dict(self):
        return self.ema_model.state_dict()
    def load_state_dict(self, sd):
        self.ema_model.load_state_dict(sd)

# %%


In [6]:
# 8) Create model, optimizer, criterion, scaler, scheduler
device = torch.device(CFG['device'])
model = ResNet50Custom(num_classes=CFG['num_classes'], drop_path_prob=CFG['drop_path_prob']).to(device)

if CFG['opt'] == 'adamw':
    optimizer = optim.AdamW(model.parameters(), lr=CFG['base_lr'], weight_decay=CFG['weight_decay'])
else:
    optimizer = optim.SGD(model.parameters(), lr=CFG['base_lr'], momentum=CFG['momentum'], weight_decay=CFG['weight_decay'])

# label smoothing criterion
criterion = LabelSmoothingLoss(CFG['num_classes'], smoothing=CFG['label_smoothing'])
scaler = torch.cuda.amp.GradScaler()

# LR scheduler: cosine with linear warmup
total_steps = math.ceil(len(train_loader) / 1) * CFG['epochs']  # approximate steps
warmup_steps = max(1, int(len(train_loader) * CFG['warmup_epochs']))

def get_lr(step):
    if step < warmup_steps:
        return float(step) / float(max(1, warmup_steps))
    # cosine decay after warmup
    progress = float(step - warmup_steps) / float(max(1, total_steps - warmup_steps))
    return 0.5 * (1. + math.cos(math.pi * progress))

# EMA
ema = ModelEMA(model, decay=CFG['ema_decay'], device=device)

# optionally load checkpoint
start_epoch = 0
ckpt_path = os.path.join(CFG['save_dir'], 'last_checkpoint.pth')
if os.path.exists(ckpt_path):
    print('Loading checkpoint', ckpt_path)
    ckpt = torch.load(ckpt_path, map_location=device)
    model.load_state_dict(ckpt['model_state'])
    optimizer.load_state_dict(ckpt['opt_state'])
    start_epoch = ckpt.get('epoch', 0) + 1
    ema.load_state_dict(ckpt.get('ema_state', ema.state_dict()))


  scaler = torch.cuda.amp.GradScaler()


Loading checkpoint ./checkpoints\last_checkpoint.pth


In [7]:


# %%
# 9) Training loop (with AMP, accumulation, mixup/cutmix, dynamic LR)
# Phase 2 fixes applied:
# - LR warmup logging & per-step lr set
# - Disable MixUp/CutMix during warmup_epochs
# - EMA decay ramp (start smaller during warmup, then increase)\# - Log both EMA and raw model validation accuracies
# - Ensure correct GradScaler usage order and gradient clipping

def get_ema_decay(epoch, base_decay=CFG['ema_decay'], warmup_epochs=CFG['warmup_epochs']):
    # Ramp EMA decay from a smaller value to base_decay over warmup_epochs
    if epoch < warmup_epochs:
        # start with weaker EMA (so it doesn't dominate early noisy weights)
        start = 0.9
        return start + (base_decay - start) * (epoch / max(1, warmup_epochs))
    return base_decay


def train_one_epoch(epoch):
    import sys
    print(f"  [DEBUG] Entered train_one_epoch function", flush=True)
    sys.stdout.flush()
    
    model.train()
    print(f"  [DEBUG] Set model to train mode", flush=True)
    sys.stdout.flush()
    
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    from tqdm.auto import tqdm
    print(f"  [DEBUG] Imported tqdm", flush=True)
    sys.stdout.flush()
    
    print(f"  Training epoch {epoch+1}...", flush=True)
    print(f"  Loading first batch (this may take 10-30 seconds)...", flush=True)
    print(f"  [DEBUG] About to create iterator from train_loader...", flush=True)
    sys.stdout.flush()
    
    # Test: Try to get first batch without tqdm
    try:
        print(f"  [DEBUG] Calling iter(train_loader)...", flush=True)
        sys.stdout.flush()
        train_iter = iter(train_loader)
        print(f"  [DEBUG] Iterator created successfully!", flush=True)
        sys.stdout.flush()
        
        print(f"  [DEBUG] Calling next(train_iter)...", flush=True)
        sys.stdout.flush()
        first_images, first_labels = next(train_iter)
        print(f"  ✓ First batch loaded! Shape: {first_images.shape}, Labels: {first_labels.shape}", flush=True)
        print(f"  Batch size actual: {first_images.size(0)}, Expected: {CFG['batch_size']}", flush=True)
        del train_iter  # Clean up
    except Exception as e:
        print(f"  ✗ ERROR loading first batch: {e}", flush=True)
        import traceback
        traceback.print_exc()
        return
    
    sys.stdout.flush()
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CFG["epochs"]}', ncols=100, mininterval=2.0)
    for i, (images, labels) in enumerate(pbar):
        
        step = epoch * len(train_loader) + i
        # compute LR multiplier (cosine schedule with warmup)
        lr_scale = get_lr(step)
        for param_group in optimizer.param_groups:
            param_group['lr'] = max(CFG['min_lr'], CFG['base_lr'] * lr_scale)
        current_lr = optimizer.param_groups[0]['lr']

        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        # Disable heavy regularization (mixup/cutmix) during warmup epochs
        use_mix = (CFG['use_mixup'] and (epoch >= CFG['warmup_epochs']) and random.random() < 0.5)
        use_cut = (CFG['use_cutmix'] and (epoch >= CFG['warmup_epochs']) and random.random() < 0.5)

        if use_mix:
            images, y_a, y_b, lam = mixup_data(images, labels, alpha=CFG['mixup_alpha'])
        elif use_cut:
            images, y_a, y_b, lam = cutmix_data(images, labels, alpha=CFG['cutmix_alpha'])
        else:
            y_a, y_b, lam = labels, None, None

        with torch.amp.autocast(device_type='cuda' if device.type=='cuda' else None):
            outputs = model(images)
            if use_mix or use_cut:
                loss = lam * criterion(outputs, y_a) + (1 - lam) * criterion(outputs, y_b)
            else:
                loss = criterion(outputs, labels)
            loss = loss / CFG['accum_steps']

        # Backward + optimization (correct scaler order)
        scaler.scale(loss).backward()

        if (i + 1) % CFG['accum_steps'] == 0:
            # unscale before clipping
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            scaler.step(optimizer)       # scaler steps the optimizer
            scaler.update()              # then scaler updates
            optimizer.zero_grad()

            # update EMA with ramped decay
            ema.decay = get_ema_decay(epoch)
            ema.update(model)

        running_loss += loss.item() * images.size(0) * CFG['accum_steps']
        _, preds = outputs.max(1)
        running_total += labels.size(0)
        running_correct += (preds == labels).sum().item()
        
        # Update progress bar and print status
        if i % 10 == 0:
            pbar.set_postfix({
                'loss': f'{running_loss/running_total:.4f}',
                'acc': f'{running_correct/running_total:.4f}',
                'lr': f'{current_lr:.6f}'
            })
        
        # Print every 100 batches for extra visibility
        if i > 0 and i % 100 == 0:
            print(f"    Batch {i}/{len(train_loader)}: loss={running_loss/running_total:.4f}, acc={running_correct/running_total:.4f}", flush=True)
            sys.stdout.flush()

    epoch_loss = running_loss / running_total
    epoch_acc = running_correct / running_total
    print(f'Epoch {epoch+1} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} LR: {current_lr:.6f}', flush=True)


def validate(epoch, use_ema=True):
    # Evaluate both EMA-weighted model and raw model for debugging
    import sys
    from tqdm.auto import tqdm
    print(f"  Starting validation...", flush=True)
    results = {}
    for name, model_to_eval in [('raw', model), ('ema', ema.ema_model)]:
        model_to_eval.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f'Validation ({name})', leave=False)
            for images, labels in val_pbar:
                images = images.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                with torch.amp.autocast(device_type='cuda' if device.type=='cuda' else None):
                    outputs = model_to_eval(images)
                _, preds = outputs.max(1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()
                
                # Update validation progress
                val_pbar.set_postfix({'acc': f'{correct/total:.4f}'})
        acc = correct / total
        print(f'Validation Acc ({name}): {acc:.4f}', flush=True)
        results[name] = acc
    return results


### PATCH: Fix DataLoader for Windows (Run this cell before training!)


In [8]:
# CRITICAL FIX: Recreate DataLoaders with num_workers=0 for Windows
print("Recreating DataLoaders with num_workers=0 for Windows compatibility...", flush=True)

train_loader = DataLoader(
    train_ds, 
    batch_size=CFG['batch_size'], 
    shuffle=True, 
    num_workers=0,  # CHANGED FROM 4 TO 0
    pin_memory=True
)

val_loader = DataLoader(
    val_ds, 
    batch_size=CFG['batch_size'], 
    shuffle=False, 
    num_workers=0,  # CHANGED FROM 4 TO 0
    pin_memory=True
)

print(f"✓ DataLoaders recreated successfully", flush=True)
print(f"  Train batches: {len(train_loader)}", flush=True)
print(f"  Val batches: {len(val_loader)}", flush=True)
print(f"  Num workers: {train_loader.num_workers}", flush=True)
print("Now run the training cell!", flush=True)


Recreating DataLoaders with num_workers=0 for Windows compatibility...
✓ DataLoaders recreated successfully
  Train batches: 3960
  Val batches: 157
  Num workers: 0
Now run the training cell!


In [None]:

# 10) Run training with new debug guards
import sys
sys.stdout.flush()  # Clear any buffered output
print("="*60, flush=True)
print("Training Started - Outputs will appear in real-time", flush=True)
print("="*60, flush=True)

# Diagnostic checks
print(f"Device: {device}", flush=True)
print(f"Model on device: {next(model.parameters()).device}", flush=True)
print(f"Total epochs to run: {CFG['epochs']}", flush=True)
print(f"Start epoch: {start_epoch}", flush=True)
print(f"Train loader batches: {len(train_loader)}", flush=True)
print(f"Train loader num_workers: {train_loader.num_workers}", flush=True)
print(f"Train dataset length: {len(train_loader.dataset)}", flush=True)

# Quick test: Can we access the dataset directly?
print("\n[DEBUG] Testing dataset access...", flush=True)
try:
    test_sample = train_loader.dataset[0]
    print(f"  ✓ Dataset[0] accessible, type: {type(test_sample)}", flush=True)
except Exception as e:
    print(f"  ✗ ERROR accessing dataset[0]: {e}", flush=True)

print("Starting training loop...", flush=True)
sys.stdout.flush()

best_val = 0.0
for epoch in range(start_epoch, CFG['epochs']):
    print(f"\n>>> Starting Epoch {epoch+1}/{CFG['epochs']} <<<", flush=True)
    sys.stdout.flush()
    
    tic = time.time()
    
    try:
        train_one_epoch(epoch)
        print(f">>> Training epoch {epoch+1} completed <<<", flush=True)
    except Exception as e:
        print(f"ERROR in train_one_epoch: {e}", flush=True)
        import traceback
        traceback.print_exc()
        break
    
    try:
        val_results = validate(epoch, use_ema=True)
        print(f">>> Validation completed <<<", flush=True)
    except Exception as e:
        print(f"ERROR in validate: {e}", flush=True)
        import traceback
        traceback.print_exc()
        val_results = {'ema': 0.0}
    toc = time.time()
    print(f'Epoch time: {(toc-tic)/60:.2f} mins', flush=True)

    # checkpoint
    val_acc = val_results.get('ema', 0.0)
    if (epoch + 1) % CFG['save_every'] == 0 or val_acc > best_val:
        ckpt = {
            'epoch': epoch,
            'model_state': model.state_dict(),
            'opt_state': optimizer.state_dict(),
            'ema_state': ema.state_dict(),
            'cfg': CFG,
        }
        torch.save(ckpt, os.path.join(CFG['save_dir'], f'checkpoint_epoch{epoch+1}.pth'))
        torch.save(ckpt, os.path.join(CFG['save_dir'], 'last_checkpoint.pth'))
        print('Saved checkpoint', flush=True)
    if val_acc > best_val:
        best_val = val_acc

print('Training finished. Best Val Acc (ema):', best_val, flush=True)

# End of Phase 2 updates: debug guards applied (EMA ramp, disabled mixup in warmup, raw+ema eval).


Training Started - Outputs will appear in real-time
Device: cuda
Model on device: cuda:0
Total epochs to run: 120
Start epoch: 15
Train loader batches: 3960
Train loader num_workers: 0
Train dataset length: 126689

[DEBUG] Testing dataset access...
  ✓ Dataset[0] accessible, type: <class 'tuple'>
Starting training loop...

>>> Starting Epoch 16/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 16...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 16/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.0444, acc=0.3530
    Batch 200/3960: loss=3.0807, acc=0.3134
    Batch 300/3960: loss=3.1121, acc=0.3173
    Batch 400/3960: loss=3.0747, acc=0.3321
    Batch 500/3960: loss=3.0763, acc=0.3379
    Batch 600/3960: loss=3.0734, acc=0.3396
    Batch 700/3960: loss=3.0764, acc=0.3361
    Batch 800/3960: loss=3.0825, acc=0.3333
    Batch 900/3960: loss=3.0905, acc=0.3345
    Batch 1000/3960: loss=3.0917, acc=0.3313
    Batch 1100/3960: loss=3.0892, acc=0.3349
    Batch 1200/3960: loss=3.0901, acc=0.3336
    Batch 1300/3960: loss=3.0887, acc=0.3325
    Batch 1400/3960: loss=3.0899, acc=0.3308
    Batch 1500/3960: loss=3.0900, acc=0.3303
    Batch 1600/3960: loss=3.0891, acc=0.3309
    Batch 1700/3960: loss=3.0935, acc=0.3284
    Batch 1800/3960: loss=3.0961, acc=0.3274
    Batch 1900/3960: loss=3.0934, acc=0.3254
    Batch 2000/3960: loss=3.0905, acc=0.3263
    Batch 2100/3960: loss=3.0887, acc=0.3263
    Batch 2200/3960: loss=3.0918, acc=0.3256
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6324


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4898
>>> Validation completed <<<
Epoch time: 45.37 mins
Saved checkpoint

>>> Starting Epoch 17/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 17...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 17/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.1769, acc=0.3048
    Batch 200/3960: loss=3.1629, acc=0.3066
    Batch 300/3960: loss=3.1491, acc=0.3129
    Batch 400/3960: loss=3.1431, acc=0.3164
    Batch 500/3960: loss=3.1483, acc=0.3180
    Batch 600/3960: loss=3.1381, acc=0.3159
    Batch 700/3960: loss=3.1350, acc=0.3141
    Batch 800/3960: loss=3.1370, acc=0.3097
    Batch 900/3960: loss=3.1298, acc=0.3139
    Batch 1000/3960: loss=3.1233, acc=0.3174
    Batch 1100/3960: loss=3.1248, acc=0.3184
    Batch 1200/3960: loss=3.1215, acc=0.3218
    Batch 1300/3960: loss=3.1150, acc=0.3240
    Batch 1400/3960: loss=3.1175, acc=0.3234
    Batch 1500/3960: loss=3.1121, acc=0.3246
    Batch 1600/3960: loss=3.1095, acc=0.3266
    Batch 1700/3960: loss=3.1142, acc=0.3261
    Batch 1800/3960: loss=3.1149, acc=0.3262
    Batch 1900/3960: loss=3.1124, acc=0.3258
    Batch 2000/3960: loss=3.1098, acc=0.3256
    Batch 2100/3960: loss=3.1081, acc=0.3257
    Batch 2200/3960: loss=3.1067, acc=0.3264
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6580


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4932
>>> Validation completed <<<
Epoch time: 44.44 mins
Saved checkpoint

>>> Starting Epoch 18/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 18...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 18/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.1261, acc=0.3326
    Batch 200/3960: loss=3.0792, acc=0.3419
    Batch 300/3960: loss=3.0874, acc=0.3316
    Batch 400/3960: loss=3.1065, acc=0.3275
    Batch 500/3960: loss=3.0841, acc=0.3331
    Batch 600/3960: loss=3.0788, acc=0.3319
    Batch 700/3960: loss=3.0796, acc=0.3300
    Batch 800/3960: loss=3.0716, acc=0.3353
    Batch 900/3960: loss=3.0751, acc=0.3349
    Batch 1000/3960: loss=3.0745, acc=0.3379
    Batch 1100/3960: loss=3.0717, acc=0.3377
    Batch 1200/3960: loss=3.0697, acc=0.3397
    Batch 1300/3960: loss=3.0675, acc=0.3406
    Batch 1400/3960: loss=3.0638, acc=0.3397
    Batch 1500/3960: loss=3.0691, acc=0.3385
    Batch 1600/3960: loss=3.0670, acc=0.3409
    Batch 1700/3960: loss=3.0671, acc=0.3413
    Batch 1800/3960: loss=3.0652, acc=0.3403
    Batch 1900/3960: loss=3.0657, acc=0.3402
    Batch 2000/3960: loss=3.0645, acc=0.3413
    Batch 2100/3960: loss=3.0665, acc=0.3404
    Batch 2200/3960: loss=3.0661, acc=0.3398
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6590


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4962
>>> Validation completed <<<
Epoch time: 44.41 mins
Saved checkpoint

>>> Starting Epoch 19/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 19...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 19/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.0717, acc=0.3428
    Batch 200/3960: loss=3.0499, acc=0.3372
    Batch 300/3960: loss=3.0326, acc=0.3511
    Batch 400/3960: loss=3.0457, acc=0.3529
    Batch 500/3960: loss=3.0304, acc=0.3534
    Batch 600/3960: loss=3.0201, acc=0.3585
    Batch 700/3960: loss=3.0236, acc=0.3569
    Batch 800/3960: loss=3.0343, acc=0.3558
    Batch 900/3960: loss=3.0322, acc=0.3539
    Batch 1000/3960: loss=3.0388, acc=0.3506
    Batch 1100/3960: loss=3.0452, acc=0.3516
    Batch 1200/3960: loss=3.0422, acc=0.3514
    Batch 1300/3960: loss=3.0402, acc=0.3483
    Batch 1400/3960: loss=3.0331, acc=0.3497
    Batch 1500/3960: loss=3.0249, acc=0.3482
    Batch 1600/3960: loss=3.0198, acc=0.3502
    Batch 1700/3960: loss=3.0192, acc=0.3513
    Batch 1800/3960: loss=3.0206, acc=0.3516
    Batch 1900/3960: loss=3.0191, acc=0.3502
    Batch 2000/3960: loss=3.0203, acc=0.3512
    Batch 2100/3960: loss=3.0196, acc=0.3501
    Batch 2200/3960: loss=3.0212, acc=0.3489
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6606


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4986
>>> Validation completed <<<
Epoch time: 43.87 mins
Saved checkpoint

>>> Starting Epoch 20/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 20...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 20/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.0426, acc=0.3255
    Batch 200/3960: loss=2.9575, acc=0.3507
    Batch 300/3960: loss=2.9434, acc=0.3483
    Batch 400/3960: loss=2.9471, acc=0.3570
    Batch 500/3960: loss=2.9704, acc=0.3510
    Batch 600/3960: loss=2.9937, acc=0.3518
    Batch 700/3960: loss=2.9930, acc=0.3532
    Batch 800/3960: loss=3.0024, acc=0.3537
    Batch 900/3960: loss=3.0069, acc=0.3508
    Batch 1000/3960: loss=3.0016, acc=0.3533
    Batch 1100/3960: loss=3.0046, acc=0.3522
    Batch 1200/3960: loss=3.0090, acc=0.3512
    Batch 1300/3960: loss=3.0091, acc=0.3511
    Batch 1400/3960: loss=3.0160, acc=0.3492
    Batch 1500/3960: loss=3.0163, acc=0.3494
    Batch 1600/3960: loss=3.0175, acc=0.3476
    Batch 1700/3960: loss=3.0191, acc=0.3467
    Batch 1800/3960: loss=3.0152, acc=0.3468
    Batch 1900/3960: loss=3.0131, acc=0.3485
    Batch 2000/3960: loss=3.0114, acc=0.3476
    Batch 2100/3960: loss=3.0161, acc=0.3458
    Batch 2200/3960: loss=3.0199, acc=0.3453
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6754


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5044
>>> Validation completed <<<
Epoch time: 38.82 mins
Saved checkpoint

>>> Starting Epoch 21/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 21...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 21/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=3.1116, acc=0.3456
    Batch 200/3960: loss=3.0993, acc=0.3329
    Batch 300/3960: loss=3.0565, acc=0.3419
    Batch 400/3960: loss=3.0428, acc=0.3453
    Batch 500/3960: loss=3.0351, acc=0.3467
    Batch 600/3960: loss=3.0309, acc=0.3480
    Batch 700/3960: loss=3.0204, acc=0.3467
    Batch 800/3960: loss=3.0121, acc=0.3496
    Batch 900/3960: loss=3.0180, acc=0.3479
    Batch 1000/3960: loss=3.0043, acc=0.3496
    Batch 1100/3960: loss=3.0062, acc=0.3476
    Batch 1200/3960: loss=3.0013, acc=0.3494
    Batch 1300/3960: loss=3.0000, acc=0.3487
    Batch 1400/3960: loss=3.0014, acc=0.3480
    Batch 1500/3960: loss=2.9927, acc=0.3506
    Batch 1600/3960: loss=2.9953, acc=0.3489
    Batch 1700/3960: loss=2.9977, acc=0.3493
    Batch 1800/3960: loss=2.9968, acc=0.3511
    Batch 1900/3960: loss=2.9937, acc=0.3526
    Batch 2000/3960: loss=2.9940, acc=0.3510
    Batch 2100/3960: loss=2.9931, acc=0.3511
    Batch 2200/3960: loss=2.9955, acc=0.3523
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6856


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5098
>>> Validation completed <<<
Epoch time: 38.09 mins
Saved checkpoint

>>> Starting Epoch 22/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 22...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 22/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.9985, acc=0.3394
    Batch 200/3960: loss=2.9566, acc=0.3573
    Batch 300/3960: loss=2.9697, acc=0.3526
    Batch 400/3960: loss=2.9663, acc=0.3541
    Batch 500/3960: loss=2.9507, acc=0.3600
    Batch 600/3960: loss=2.9594, acc=0.3562
    Batch 700/3960: loss=2.9747, acc=0.3520
    Batch 800/3960: loss=2.9781, acc=0.3505
    Batch 900/3960: loss=2.9732, acc=0.3508
    Batch 1000/3960: loss=2.9671, acc=0.3538
    Batch 1100/3960: loss=2.9680, acc=0.3549
    Batch 1200/3960: loss=2.9743, acc=0.3525
    Batch 1300/3960: loss=2.9786, acc=0.3527
    Batch 1400/3960: loss=2.9818, acc=0.3519
    Batch 1500/3960: loss=2.9867, acc=0.3521
    Batch 1600/3960: loss=2.9857, acc=0.3528
    Batch 1700/3960: loss=2.9848, acc=0.3523
    Batch 1800/3960: loss=2.9875, acc=0.3510
    Batch 1900/3960: loss=2.9826, acc=0.3531
    Batch 2000/3960: loss=2.9835, acc=0.3527
    Batch 2100/3960: loss=2.9816, acc=0.3525
    Batch 2200/3960: loss=2.9827, acc=0.3526
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.6996


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5102
>>> Validation completed <<<
Epoch time: 28.01 mins
Saved checkpoint

>>> Starting Epoch 23/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 23...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 23/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.9432, acc=0.3886
    Batch 200/3960: loss=2.9616, acc=0.3669
    Batch 300/3960: loss=2.9677, acc=0.3656
    Batch 400/3960: loss=2.9463, acc=0.3662
    Batch 500/3960: loss=2.9498, acc=0.3678
    Batch 600/3960: loss=2.9539, acc=0.3622
    Batch 700/3960: loss=2.9381, acc=0.3717
    Batch 800/3960: loss=2.9318, acc=0.3718
    Batch 900/3960: loss=2.9252, acc=0.3731
    Batch 1000/3960: loss=2.9224, acc=0.3723
    Batch 1100/3960: loss=2.9215, acc=0.3698
    Batch 1200/3960: loss=2.9290, acc=0.3672
    Batch 1300/3960: loss=2.9256, acc=0.3655
    Batch 1400/3960: loss=2.9276, acc=0.3657
    Batch 1500/3960: loss=2.9294, acc=0.3645
    Batch 1600/3960: loss=2.9287, acc=0.3660
    Batch 1700/3960: loss=2.9296, acc=0.3661
    Batch 1800/3960: loss=2.9265, acc=0.3666
    Batch 1900/3960: loss=2.9246, acc=0.3670
    Batch 2000/3960: loss=2.9287, acc=0.3669
    Batch 2100/3960: loss=2.9277, acc=0.3686
    Batch 2200/3960: loss=2.9290, acc=0.3685
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7070


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5142
>>> Validation completed <<<
Epoch time: 26.57 mins
Saved checkpoint

>>> Starting Epoch 24/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 24...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 24/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8741, acc=0.3756
    Batch 200/3960: loss=2.8731, acc=0.3706
    Batch 300/3960: loss=2.9030, acc=0.3576
    Batch 400/3960: loss=2.9338, acc=0.3530
    Batch 500/3960: loss=2.9288, acc=0.3608
    Batch 600/3960: loss=2.9170, acc=0.3687
    Batch 700/3960: loss=2.9045, acc=0.3709
    Batch 800/3960: loss=2.9163, acc=0.3668
    Batch 900/3960: loss=2.9193, acc=0.3681
    Batch 1000/3960: loss=2.9118, acc=0.3698
    Batch 1100/3960: loss=2.9003, acc=0.3714
    Batch 1200/3960: loss=2.9116, acc=0.3663
    Batch 1300/3960: loss=2.9054, acc=0.3670
    Batch 1400/3960: loss=2.9087, acc=0.3671
    Batch 1500/3960: loss=2.9070, acc=0.3672
    Batch 1600/3960: loss=2.9047, acc=0.3668
    Batch 1700/3960: loss=2.9061, acc=0.3679
    Batch 1800/3960: loss=2.9115, acc=0.3676
    Batch 1900/3960: loss=2.9115, acc=0.3680
    Batch 2000/3960: loss=2.9159, acc=0.3676
    Batch 2100/3960: loss=2.9175, acc=0.3671
    Batch 2200/3960: loss=2.9131, acc=0.3666
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7068


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5174
>>> Validation completed <<<
Epoch time: 34.39 mins
Saved checkpoint

>>> Starting Epoch 25/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 25...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 25/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8922, acc=0.3834
    Batch 200/3960: loss=2.8841, acc=0.3682
    Batch 300/3960: loss=2.9171, acc=0.3726
    Batch 400/3960: loss=2.8799, acc=0.3833
    Batch 500/3960: loss=2.8783, acc=0.3789
    Batch 600/3960: loss=2.8829, acc=0.3803
    Batch 700/3960: loss=2.8817, acc=0.3815
    Batch 800/3960: loss=2.8785, acc=0.3809
    Batch 900/3960: loss=2.8878, acc=0.3780
    Batch 1000/3960: loss=2.8910, acc=0.3797
    Batch 1100/3960: loss=2.8990, acc=0.3799
    Batch 1200/3960: loss=2.9024, acc=0.3786
    Batch 1300/3960: loss=2.9079, acc=0.3780
    Batch 1400/3960: loss=2.9138, acc=0.3742
    Batch 1500/3960: loss=2.9126, acc=0.3751
    Batch 1600/3960: loss=2.9110, acc=0.3751
    Batch 1700/3960: loss=2.9127, acc=0.3732
    Batch 1800/3960: loss=2.9128, acc=0.3751
    Batch 1900/3960: loss=2.9086, acc=0.3756
    Batch 2000/3960: loss=2.9141, acc=0.3743
    Batch 2100/3960: loss=2.9134, acc=0.3756
    Batch 2200/3960: loss=2.9120, acc=0.3757
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7062


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5168
>>> Validation completed <<<
Epoch time: 42.21 mins
Saved checkpoint

>>> Starting Epoch 26/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 26...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 26/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.9715, acc=0.3533
    Batch 200/3960: loss=2.9393, acc=0.3850
    Batch 300/3960: loss=2.9255, acc=0.3833
    Batch 400/3960: loss=2.9134, acc=0.3852
    Batch 500/3960: loss=2.9055, acc=0.3860
    Batch 600/3960: loss=2.9026, acc=0.3858
    Batch 700/3960: loss=2.8939, acc=0.3871
    Batch 800/3960: loss=2.8973, acc=0.3879
    Batch 900/3960: loss=2.8950, acc=0.3884
    Batch 1000/3960: loss=2.8851, acc=0.3874
    Batch 1100/3960: loss=2.8861, acc=0.3871
    Batch 1200/3960: loss=2.8697, acc=0.3888
    Batch 1300/3960: loss=2.8735, acc=0.3884
    Batch 1400/3960: loss=2.8798, acc=0.3862
    Batch 1500/3960: loss=2.8808, acc=0.3857
    Batch 1600/3960: loss=2.8825, acc=0.3856
    Batch 1700/3960: loss=2.8781, acc=0.3875
    Batch 1800/3960: loss=2.8814, acc=0.3873
    Batch 1900/3960: loss=2.8859, acc=0.3862
    Batch 2000/3960: loss=2.8925, acc=0.3828
    Batch 2100/3960: loss=2.8957, acc=0.3824
    Batch 2200/3960: loss=2.9017, acc=0.3795
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7046


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5184
>>> Validation completed <<<
Epoch time: 30.32 mins
Saved checkpoint

>>> Starting Epoch 27/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 27...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 27/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8252, acc=0.3998
    Batch 200/3960: loss=2.8799, acc=0.4003
    Batch 300/3960: loss=2.8859, acc=0.3831
    Batch 400/3960: loss=2.8777, acc=0.3825
    Batch 500/3960: loss=2.8849, acc=0.3786
    Batch 600/3960: loss=2.8852, acc=0.3811
    Batch 700/3960: loss=2.8904, acc=0.3830
    Batch 800/3960: loss=2.8801, acc=0.3860
    Batch 900/3960: loss=2.8707, acc=0.3834
    Batch 1000/3960: loss=2.8871, acc=0.3790
    Batch 1100/3960: loss=2.8821, acc=0.3804
    Batch 1200/3960: loss=2.8895, acc=0.3797
    Batch 1300/3960: loss=2.8905, acc=0.3795
    Batch 1400/3960: loss=2.8850, acc=0.3783
    Batch 1500/3960: loss=2.8864, acc=0.3779
    Batch 1600/3960: loss=2.8915, acc=0.3789
    Batch 1700/3960: loss=2.8888, acc=0.3784
    Batch 1800/3960: loss=2.8910, acc=0.3800
    Batch 1900/3960: loss=2.8871, acc=0.3810
    Batch 2000/3960: loss=2.8857, acc=0.3811
    Batch 2100/3960: loss=2.8857, acc=0.3806
    Batch 2200/3960: loss=2.8874, acc=0.3804
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7186


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5226
>>> Validation completed <<<
Epoch time: 37.58 mins
Saved checkpoint

>>> Starting Epoch 28/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 28...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 28/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7884, acc=0.3462
    Batch 200/3960: loss=2.7809, acc=0.3750
    Batch 300/3960: loss=2.8079, acc=0.3819
    Batch 400/3960: loss=2.8387, acc=0.3780
    Batch 500/3960: loss=2.8492, acc=0.3827
    Batch 600/3960: loss=2.8465, acc=0.3857
    Batch 700/3960: loss=2.8481, acc=0.3854
    Batch 800/3960: loss=2.8527, acc=0.3871
    Batch 900/3960: loss=2.8580, acc=0.3893
    Batch 1000/3960: loss=2.8684, acc=0.3884
    Batch 1100/3960: loss=2.8611, acc=0.3910
    Batch 1200/3960: loss=2.8525, acc=0.3922
    Batch 1300/3960: loss=2.8470, acc=0.3926
    Batch 1400/3960: loss=2.8469, acc=0.3916
    Batch 1500/3960: loss=2.8490, acc=0.3920
    Batch 1600/3960: loss=2.8478, acc=0.3925
    Batch 1700/3960: loss=2.8511, acc=0.3911
    Batch 1800/3960: loss=2.8523, acc=0.3916
    Batch 1900/3960: loss=2.8585, acc=0.3891
    Batch 2000/3960: loss=2.8589, acc=0.3891
    Batch 2100/3960: loss=2.8603, acc=0.3872
    Batch 2200/3960: loss=2.8603, acc=0.3873
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7234


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5222
>>> Validation completed <<<
Epoch time: 48.95 mins

>>> Starting Epoch 29/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 29...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 29/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8678, acc=0.3895
    Batch 200/3960: loss=2.8942, acc=0.3752
    Batch 300/3960: loss=2.8821, acc=0.3753
    Batch 400/3960: loss=2.9057, acc=0.3683
    Batch 500/3960: loss=2.9077, acc=0.3691
    Batch 600/3960: loss=2.9071, acc=0.3748
    Batch 700/3960: loss=2.8900, acc=0.3738
    Batch 800/3960: loss=2.8858, acc=0.3731
    Batch 900/3960: loss=2.8873, acc=0.3765
    Batch 1000/3960: loss=2.8786, acc=0.3788
    Batch 1100/3960: loss=2.8734, acc=0.3834
    Batch 1200/3960: loss=2.8760, acc=0.3853
    Batch 1300/3960: loss=2.8788, acc=0.3831
    Batch 1400/3960: loss=2.8755, acc=0.3817
    Batch 1500/3960: loss=2.8683, acc=0.3853
    Batch 1600/3960: loss=2.8636, acc=0.3876
    Batch 1700/3960: loss=2.8658, acc=0.3848
    Batch 1800/3960: loss=2.8649, acc=0.3848
    Batch 1900/3960: loss=2.8612, acc=0.3840
    Batch 2000/3960: loss=2.8650, acc=0.3831
    Batch 2100/3960: loss=2.8617, acc=0.3859
    Batch 2200/3960: loss=2.8597, acc=0.3862
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7204


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5208
>>> Validation completed <<<
Epoch time: 37.21 mins

>>> Starting Epoch 30/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 30...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 30/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8392, acc=0.3515
    Batch 200/3960: loss=2.8634, acc=0.3711
    Batch 300/3960: loss=2.8367, acc=0.3785
    Batch 400/3960: loss=2.8294, acc=0.3836
    Batch 500/3960: loss=2.8387, acc=0.3814
    Batch 600/3960: loss=2.8253, acc=0.3919
    Batch 700/3960: loss=2.8194, acc=0.3964
    Batch 800/3960: loss=2.8296, acc=0.3944
    Batch 900/3960: loss=2.8384, acc=0.3914
    Batch 1000/3960: loss=2.8454, acc=0.3910
    Batch 1100/3960: loss=2.8396, acc=0.3950
    Batch 1200/3960: loss=2.8488, acc=0.3936
    Batch 1300/3960: loss=2.8514, acc=0.3947
    Batch 1400/3960: loss=2.8563, acc=0.3929
    Batch 1500/3960: loss=2.8559, acc=0.3931
    Batch 1600/3960: loss=2.8495, acc=0.3947
    Batch 1700/3960: loss=2.8512, acc=0.3947
    Batch 1800/3960: loss=2.8551, acc=0.3941
    Batch 1900/3960: loss=2.8489, acc=0.3947
    Batch 2000/3960: loss=2.8455, acc=0.3974
    Batch 2100/3960: loss=2.8451, acc=0.3950
    Batch 2200/3960: loss=2.8460, acc=0.3947
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7290


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5196
>>> Validation completed <<<
Epoch time: 31.97 mins
Saved checkpoint

>>> Starting Epoch 31/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 31...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 31/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.9470, acc=0.3530
    Batch 200/3960: loss=2.9118, acc=0.3638
    Batch 300/3960: loss=2.9083, acc=0.3665
    Batch 400/3960: loss=2.8824, acc=0.3720
    Batch 500/3960: loss=2.8647, acc=0.3810
    Batch 600/3960: loss=2.8578, acc=0.3886
    Batch 700/3960: loss=2.8572, acc=0.3888
    Batch 800/3960: loss=2.8506, acc=0.3886
    Batch 900/3960: loss=2.8526, acc=0.3918
    Batch 1000/3960: loss=2.8489, acc=0.3910
    Batch 1100/3960: loss=2.8436, acc=0.3931
    Batch 1200/3960: loss=2.8447, acc=0.3936
    Batch 1300/3960: loss=2.8431, acc=0.3910
    Batch 1400/3960: loss=2.8487, acc=0.3922
    Batch 1500/3960: loss=2.8515, acc=0.3932
    Batch 1600/3960: loss=2.8468, acc=0.3946
    Batch 1700/3960: loss=2.8479, acc=0.3940
    Batch 1800/3960: loss=2.8515, acc=0.3935
    Batch 1900/3960: loss=2.8470, acc=0.3941
    Batch 2000/3960: loss=2.8486, acc=0.3956
    Batch 2100/3960: loss=2.8425, acc=0.3952
    Batch 2200/3960: loss=2.8385, acc=0.3945
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7282


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5206
>>> Validation completed <<<
Epoch time: 40.78 mins

>>> Starting Epoch 32/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 32...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 32/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.9061, acc=0.3889
    Batch 200/3960: loss=2.8333, acc=0.3896
    Batch 300/3960: loss=2.8344, acc=0.3985
    Batch 400/3960: loss=2.8483, acc=0.3999
    Batch 500/3960: loss=2.8156, acc=0.4071
    Batch 600/3960: loss=2.8240, acc=0.3975
    Batch 700/3960: loss=2.8113, acc=0.4029
    Batch 800/3960: loss=2.8083, acc=0.4012
    Batch 900/3960: loss=2.8243, acc=0.3973
    Batch 1000/3960: loss=2.8330, acc=0.3954
    Batch 1100/3960: loss=2.8349, acc=0.3962
    Batch 1200/3960: loss=2.8441, acc=0.3938
    Batch 1300/3960: loss=2.8392, acc=0.3952
    Batch 1400/3960: loss=2.8413, acc=0.3937
    Batch 1500/3960: loss=2.8325, acc=0.3944
    Batch 1600/3960: loss=2.8281, acc=0.3937
    Batch 1700/3960: loss=2.8232, acc=0.3933
    Batch 1800/3960: loss=2.8155, acc=0.3942
    Batch 1900/3960: loss=2.8173, acc=0.3940
    Batch 2000/3960: loss=2.8242, acc=0.3937
    Batch 2100/3960: loss=2.8299, acc=0.3921
    Batch 2200/3960: loss=2.8316, acc=0.3922
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7408


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5234
>>> Validation completed <<<
Epoch time: 43.40 mins
Saved checkpoint

>>> Starting Epoch 33/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 33...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 33/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8298, acc=0.4032
    Batch 200/3960: loss=2.8407, acc=0.4047
    Batch 300/3960: loss=2.8462, acc=0.4011
    Batch 400/3960: loss=2.8355, acc=0.4108
    Batch 500/3960: loss=2.8340, acc=0.4122
    Batch 600/3960: loss=2.8399, acc=0.4122
    Batch 700/3960: loss=2.8333, acc=0.4111
    Batch 800/3960: loss=2.8287, acc=0.4136
    Batch 900/3960: loss=2.8244, acc=0.4123
    Batch 1000/3960: loss=2.8151, acc=0.4111
    Batch 1100/3960: loss=2.8131, acc=0.4116
    Batch 1200/3960: loss=2.8130, acc=0.4110
    Batch 1300/3960: loss=2.8146, acc=0.4105
    Batch 1400/3960: loss=2.8211, acc=0.4089
    Batch 1500/3960: loss=2.8196, acc=0.4080
    Batch 1600/3960: loss=2.8296, acc=0.4049
    Batch 1700/3960: loss=2.8258, acc=0.4044
    Batch 1800/3960: loss=2.8238, acc=0.4057
    Batch 1900/3960: loss=2.8238, acc=0.4065
    Batch 2000/3960: loss=2.8202, acc=0.4071
    Batch 2100/3960: loss=2.8202, acc=0.4053
    Batch 2200/3960: loss=2.8179, acc=0.4051
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7342


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5256
>>> Validation completed <<<
Epoch time: 38.55 mins
Saved checkpoint

>>> Starting Epoch 34/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 34...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 34/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.8764, acc=0.3834
    Batch 200/3960: loss=2.8501, acc=0.3966
    Batch 300/3960: loss=2.8409, acc=0.3988
    Batch 400/3960: loss=2.8205, acc=0.4006
    Batch 500/3960: loss=2.8056, acc=0.4038
    Batch 600/3960: loss=2.8223, acc=0.3979
    Batch 700/3960: loss=2.8309, acc=0.3924
    Batch 800/3960: loss=2.8195, acc=0.3948
    Batch 900/3960: loss=2.8150, acc=0.3948
    Batch 1000/3960: loss=2.8177, acc=0.3958
    Batch 1100/3960: loss=2.8132, acc=0.3948
    Batch 1200/3960: loss=2.8108, acc=0.3992
    Batch 1300/3960: loss=2.8065, acc=0.3999
    Batch 1400/3960: loss=2.8053, acc=0.4009
    Batch 1500/3960: loss=2.8071, acc=0.4023
    Batch 1600/3960: loss=2.8066, acc=0.4021
    Batch 1700/3960: loss=2.8007, acc=0.4035
    Batch 1800/3960: loss=2.7944, acc=0.4045
    Batch 1900/3960: loss=2.7937, acc=0.4041
    Batch 2000/3960: loss=2.7915, acc=0.4035
    Batch 2100/3960: loss=2.7915, acc=0.4026
    Batch 2200/3960: loss=2.7896, acc=0.4018
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7320


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5246
>>> Validation completed <<<
Epoch time: 38.74 mins

>>> Starting Epoch 35/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 35...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 35/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7813, acc=0.3537
    Batch 200/3960: loss=2.8118, acc=0.3618
    Batch 300/3960: loss=2.7940, acc=0.3794
    Batch 400/3960: loss=2.8026, acc=0.3758
    Batch 500/3960: loss=2.8103, acc=0.3805
    Batch 600/3960: loss=2.7898, acc=0.3899
    Batch 700/3960: loss=2.8026, acc=0.3908
    Batch 800/3960: loss=2.7981, acc=0.3892
    Batch 900/3960: loss=2.8130, acc=0.3867
    Batch 1000/3960: loss=2.8087, acc=0.3906
    Batch 1100/3960: loss=2.8099, acc=0.3912
    Batch 1200/3960: loss=2.8043, acc=0.3941
    Batch 1300/3960: loss=2.8006, acc=0.3972
    Batch 1400/3960: loss=2.7962, acc=0.4015
    Batch 1500/3960: loss=2.7952, acc=0.3998
    Batch 1600/3960: loss=2.8016, acc=0.3995
    Batch 1700/3960: loss=2.8001, acc=0.4006
    Batch 1800/3960: loss=2.7999, acc=0.3993
    Batch 1900/3960: loss=2.7956, acc=0.4013
    Batch 2000/3960: loss=2.7982, acc=0.4000
    Batch 2100/3960: loss=2.7922, acc=0.4017
    Batch 2200/3960: loss=2.7955, acc=0.4017
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7400


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5282
>>> Validation completed <<<
Epoch time: 47.27 mins
Saved checkpoint

>>> Starting Epoch 36/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 36...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 36/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7281, acc=0.4353
    Batch 200/3960: loss=2.7220, acc=0.4213
    Batch 300/3960: loss=2.7548, acc=0.4136
    Batch 400/3960: loss=2.7707, acc=0.4129
    Batch 500/3960: loss=2.7877, acc=0.4002
    Batch 600/3960: loss=2.7809, acc=0.4080
    Batch 700/3960: loss=2.7809, acc=0.4097
    Batch 800/3960: loss=2.7947, acc=0.4065
    Batch 900/3960: loss=2.7851, acc=0.4115
    Batch 1000/3960: loss=2.7874, acc=0.4092
    Batch 1100/3960: loss=2.7924, acc=0.4061
    Batch 1200/3960: loss=2.7926, acc=0.4056
    Batch 1300/3960: loss=2.7969, acc=0.4048
    Batch 1400/3960: loss=2.7987, acc=0.4036
    Batch 1500/3960: loss=2.7908, acc=0.4069
    Batch 1600/3960: loss=2.7910, acc=0.4061
    Batch 1700/3960: loss=2.7870, acc=0.4065
    Batch 1800/3960: loss=2.7894, acc=0.4062
    Batch 1900/3960: loss=2.7941, acc=0.4047
    Batch 2000/3960: loss=2.7923, acc=0.4053
    Batch 2100/3960: loss=2.7938, acc=0.4035
    Batch 2200/3960: loss=2.7958, acc=0.4052
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7328


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5282
>>> Validation completed <<<
Epoch time: 35.66 mins

>>> Starting Epoch 37/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 37...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 37/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7633, acc=0.3994
    Batch 200/3960: loss=2.7651, acc=0.4036
    Batch 300/3960: loss=2.7910, acc=0.3978
    Batch 400/3960: loss=2.7852, acc=0.4051
    Batch 500/3960: loss=2.7739, acc=0.4065
    Batch 600/3960: loss=2.7821, acc=0.4053
    Batch 700/3960: loss=2.7857, acc=0.4040
    Batch 800/3960: loss=2.7746, acc=0.4086
    Batch 900/3960: loss=2.7684, acc=0.4110
    Batch 1000/3960: loss=2.7753, acc=0.4138
    Batch 1100/3960: loss=2.7776, acc=0.4116
    Batch 1200/3960: loss=2.7648, acc=0.4134
    Batch 1300/3960: loss=2.7585, acc=0.4134
    Batch 1400/3960: loss=2.7507, acc=0.4144
    Batch 1500/3960: loss=2.7481, acc=0.4161
    Batch 1600/3960: loss=2.7461, acc=0.4181
    Batch 1700/3960: loss=2.7460, acc=0.4187
    Batch 1800/3960: loss=2.7479, acc=0.4184
    Batch 1900/3960: loss=2.7481, acc=0.4179
    Batch 2000/3960: loss=2.7472, acc=0.4158
    Batch 2100/3960: loss=2.7508, acc=0.4142
    Batch 2200/3960: loss=2.7512, acc=0.4145
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7386


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5284
>>> Validation completed <<<
Epoch time: 26.50 mins
Saved checkpoint

>>> Starting Epoch 38/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 38...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 38/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6819, acc=0.4251
    Batch 200/3960: loss=2.7401, acc=0.4014
    Batch 300/3960: loss=2.7506, acc=0.4018
    Batch 400/3960: loss=2.7638, acc=0.4021
    Batch 500/3960: loss=2.7659, acc=0.4074
    Batch 600/3960: loss=2.7834, acc=0.4042
    Batch 700/3960: loss=2.7764, acc=0.3988
    Batch 800/3960: loss=2.7688, acc=0.4060
    Batch 900/3960: loss=2.7659, acc=0.4062
    Batch 1000/3960: loss=2.7735, acc=0.4049
    Batch 1100/3960: loss=2.7656, acc=0.4049
    Batch 1200/3960: loss=2.7715, acc=0.4058
    Batch 1300/3960: loss=2.7769, acc=0.4058
    Batch 1400/3960: loss=2.7739, acc=0.4058
    Batch 1500/3960: loss=2.7729, acc=0.4045
    Batch 1600/3960: loss=2.7767, acc=0.4047
    Batch 1700/3960: loss=2.7713, acc=0.4066
    Batch 1800/3960: loss=2.7692, acc=0.4063
    Batch 1900/3960: loss=2.7702, acc=0.4055
    Batch 2000/3960: loss=2.7689, acc=0.4071
    Batch 2100/3960: loss=2.7640, acc=0.4074
    Batch 2200/3960: loss=2.7626, acc=0.4070
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7568


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5302
>>> Validation completed <<<
Epoch time: 38.43 mins
Saved checkpoint

>>> Starting Epoch 39/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 39...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 39/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6953, acc=0.3756
    Batch 200/3960: loss=2.7021, acc=0.4072
    Batch 300/3960: loss=2.7144, acc=0.4128
    Batch 400/3960: loss=2.7157, acc=0.4052
    Batch 500/3960: loss=2.7506, acc=0.4062
    Batch 600/3960: loss=2.7365, acc=0.4047
    Batch 700/3960: loss=2.7386, acc=0.4108
    Batch 800/3960: loss=2.7338, acc=0.4131
    Batch 900/3960: loss=2.7423, acc=0.4094
    Batch 1000/3960: loss=2.7442, acc=0.4065
    Batch 1100/3960: loss=2.7435, acc=0.4068
    Batch 1200/3960: loss=2.7478, acc=0.4062
    Batch 1300/3960: loss=2.7497, acc=0.4055
    Batch 1400/3960: loss=2.7523, acc=0.4071
    Batch 1500/3960: loss=2.7603, acc=0.4067
    Batch 1600/3960: loss=2.7645, acc=0.4049
    Batch 1700/3960: loss=2.7644, acc=0.4039
    Batch 1800/3960: loss=2.7618, acc=0.4039
    Batch 1900/3960: loss=2.7613, acc=0.4057
    Batch 2000/3960: loss=2.7673, acc=0.4050
    Batch 2100/3960: loss=2.7649, acc=0.4053
    Batch 2200/3960: loss=2.7620, acc=0.4057
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7510


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5302
>>> Validation completed <<<
Epoch time: 40.09 mins

>>> Starting Epoch 40/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 40...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 40/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7423, acc=0.4270
    Batch 200/3960: loss=2.7645, acc=0.4188
    Batch 300/3960: loss=2.7697, acc=0.4279
    Batch 400/3960: loss=2.7601, acc=0.4244
    Batch 500/3960: loss=2.7568, acc=0.4260
    Batch 600/3960: loss=2.7481, acc=0.4277
    Batch 700/3960: loss=2.7603, acc=0.4241
    Batch 800/3960: loss=2.7529, acc=0.4235
    Batch 900/3960: loss=2.7650, acc=0.4211
    Batch 1000/3960: loss=2.7682, acc=0.4188
    Batch 1100/3960: loss=2.7605, acc=0.4199
    Batch 1200/3960: loss=2.7675, acc=0.4187
    Batch 1300/3960: loss=2.7587, acc=0.4208
    Batch 1400/3960: loss=2.7590, acc=0.4199
    Batch 1500/3960: loss=2.7562, acc=0.4190
    Batch 1600/3960: loss=2.7591, acc=0.4181
    Batch 1700/3960: loss=2.7562, acc=0.4193
    Batch 1800/3960: loss=2.7524, acc=0.4210
    Batch 1900/3960: loss=2.7526, acc=0.4186
    Batch 2000/3960: loss=2.7542, acc=0.4183
    Batch 2100/3960: loss=2.7485, acc=0.4200
    Batch 2200/3960: loss=2.7527, acc=0.4197
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7606


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5318
>>> Validation completed <<<
Epoch time: 30.17 mins
Saved checkpoint

>>> Starting Epoch 41/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 41...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 41/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7496, acc=0.4028
    Batch 200/3960: loss=2.7685, acc=0.4008
    Batch 300/3960: loss=2.7596, acc=0.4083
    Batch 400/3960: loss=2.7532, acc=0.4031
    Batch 500/3960: loss=2.7404, acc=0.4034
    Batch 600/3960: loss=2.7498, acc=0.4058
    Batch 700/3960: loss=2.7559, acc=0.4109
    Batch 800/3960: loss=2.7523, acc=0.4119
    Batch 900/3960: loss=2.7434, acc=0.4143
    Batch 1000/3960: loss=2.7434, acc=0.4129
    Batch 1100/3960: loss=2.7449, acc=0.4141
    Batch 1200/3960: loss=2.7444, acc=0.4146
    Batch 1300/3960: loss=2.7460, acc=0.4176
    Batch 1400/3960: loss=2.7410, acc=0.4185
    Batch 1500/3960: loss=2.7377, acc=0.4204
    Batch 1600/3960: loss=2.7358, acc=0.4209
    Batch 1700/3960: loss=2.7362, acc=0.4204
    Batch 1800/3960: loss=2.7464, acc=0.4162
    Batch 1900/3960: loss=2.7486, acc=0.4141
    Batch 2000/3960: loss=2.7451, acc=0.4163
    Batch 2100/3960: loss=2.7482, acc=0.4164
    Batch 2200/3960: loss=2.7446, acc=0.4163
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7640


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5318
>>> Validation completed <<<
Epoch time: 32.25 mins

>>> Starting Epoch 42/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 42...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 42/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6069, acc=0.4325
    Batch 200/3960: loss=2.6424, acc=0.4403
    Batch 300/3960: loss=2.6847, acc=0.4353
    Batch 400/3960: loss=2.7035, acc=0.4298
    Batch 500/3960: loss=2.7164, acc=0.4191
    Batch 600/3960: loss=2.7197, acc=0.4203
    Batch 700/3960: loss=2.7066, acc=0.4245
    Batch 800/3960: loss=2.7131, acc=0.4261
    Batch 900/3960: loss=2.7253, acc=0.4235
    Batch 1000/3960: loss=2.7317, acc=0.4234
    Batch 1100/3960: loss=2.7283, acc=0.4226
    Batch 1200/3960: loss=2.7354, acc=0.4234
    Batch 1300/3960: loss=2.7408, acc=0.4200
    Batch 1400/3960: loss=2.7389, acc=0.4202
    Batch 1500/3960: loss=2.7357, acc=0.4204
    Batch 1600/3960: loss=2.7351, acc=0.4209
    Batch 1700/3960: loss=2.7391, acc=0.4196
    Batch 1800/3960: loss=2.7358, acc=0.4190
    Batch 1900/3960: loss=2.7357, acc=0.4201
    Batch 2000/3960: loss=2.7355, acc=0.4211
    Batch 2100/3960: loss=2.7351, acc=0.4203
    Batch 2200/3960: loss=2.7353, acc=0.4212
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7652


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5300
>>> Validation completed <<<
Epoch time: 24.52 mins

>>> Starting Epoch 43/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 43...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 43/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7249, acc=0.4548
    Batch 200/3960: loss=2.7747, acc=0.4244
    Batch 300/3960: loss=2.7470, acc=0.4329
    Batch 400/3960: loss=2.7392, acc=0.4361
    Batch 500/3960: loss=2.7345, acc=0.4357
    Batch 600/3960: loss=2.7355, acc=0.4301
    Batch 700/3960: loss=2.7403, acc=0.4246
    Batch 800/3960: loss=2.7430, acc=0.4223
    Batch 900/3960: loss=2.7406, acc=0.4271
    Batch 1000/3960: loss=2.7280, acc=0.4307
    Batch 1100/3960: loss=2.7233, acc=0.4345
    Batch 1200/3960: loss=2.7239, acc=0.4326
    Batch 1300/3960: loss=2.7276, acc=0.4317
    Batch 1400/3960: loss=2.7340, acc=0.4269
    Batch 1500/3960: loss=2.7311, acc=0.4252
    Batch 1600/3960: loss=2.7291, acc=0.4249
    Batch 1700/3960: loss=2.7239, acc=0.4268
    Batch 1800/3960: loss=2.7260, acc=0.4266
    Batch 1900/3960: loss=2.7276, acc=0.4258
    Batch 2000/3960: loss=2.7308, acc=0.4247
    Batch 2100/3960: loss=2.7314, acc=0.4260
    Batch 2200/3960: loss=2.7288, acc=0.4261
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7600


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5304
>>> Validation completed <<<
Epoch time: 33.81 mins

>>> Starting Epoch 44/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 44...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 44/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7078, acc=0.3883
    Batch 200/3960: loss=2.7135, acc=0.3750
    Batch 300/3960: loss=2.7294, acc=0.3885
    Batch 400/3960: loss=2.7039, acc=0.4038
    Batch 500/3960: loss=2.6973, acc=0.4169
    Batch 600/3960: loss=2.7017, acc=0.4235
    Batch 700/3960: loss=2.6931, acc=0.4301
    Batch 800/3960: loss=2.6874, acc=0.4315
    Batch 900/3960: loss=2.6871, acc=0.4376
    Batch 1000/3960: loss=2.6870, acc=0.4411
    Batch 1100/3960: loss=2.6901, acc=0.4397
    Batch 1200/3960: loss=2.6765, acc=0.4432
    Batch 1300/3960: loss=2.6807, acc=0.4404
    Batch 1400/3960: loss=2.6848, acc=0.4401
    Batch 1500/3960: loss=2.6889, acc=0.4407
    Batch 1600/3960: loss=2.6856, acc=0.4392
    Batch 1700/3960: loss=2.6849, acc=0.4377
    Batch 1800/3960: loss=2.6883, acc=0.4369
    Batch 1900/3960: loss=2.6880, acc=0.4352
    Batch 2000/3960: loss=2.6888, acc=0.4325
    Batch 2100/3960: loss=2.6899, acc=0.4316
    Batch 2200/3960: loss=2.6921, acc=0.4308
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7634


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5276
>>> Validation completed <<<
Epoch time: 36.82 mins

>>> Starting Epoch 45/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 45...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 45/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6779, acc=0.4186
    Batch 200/3960: loss=2.6840, acc=0.3954
    Batch 300/3960: loss=2.7230, acc=0.3958
    Batch 400/3960: loss=2.7304, acc=0.3957
    Batch 500/3960: loss=2.7115, acc=0.4001
    Batch 600/3960: loss=2.7035, acc=0.4051
    Batch 700/3960: loss=2.6917, acc=0.4116
    Batch 800/3960: loss=2.7065, acc=0.4028
    Batch 900/3960: loss=2.6994, acc=0.4082
    Batch 1000/3960: loss=2.6986, acc=0.4049
    Batch 1100/3960: loss=2.6966, acc=0.4030
    Batch 1200/3960: loss=2.6897, acc=0.4048
    Batch 1300/3960: loss=2.6868, acc=0.4108
    Batch 1400/3960: loss=2.6933, acc=0.4115
    Batch 1500/3960: loss=2.6902, acc=0.4125
    Batch 1600/3960: loss=2.6934, acc=0.4153
    Batch 1700/3960: loss=2.6968, acc=0.4156
    Batch 1800/3960: loss=2.6938, acc=0.4184
    Batch 1900/3960: loss=2.6918, acc=0.4191
    Batch 2000/3960: loss=2.6952, acc=0.4174
    Batch 2100/3960: loss=2.6937, acc=0.4195
    Batch 2200/3960: loss=2.6934, acc=0.4197
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7796


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5280
>>> Validation completed <<<
Epoch time: 25.27 mins
Saved checkpoint

>>> Starting Epoch 46/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 46...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 46/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7275, acc=0.4288
    Batch 200/3960: loss=2.7424, acc=0.4213
    Batch 300/3960: loss=2.6985, acc=0.4282
    Batch 400/3960: loss=2.7091, acc=0.4251
    Batch 500/3960: loss=2.7224, acc=0.4187
    Batch 600/3960: loss=2.7125, acc=0.4224
    Batch 700/3960: loss=2.7162, acc=0.4200
    Batch 800/3960: loss=2.7184, acc=0.4196
    Batch 900/3960: loss=2.7076, acc=0.4186
    Batch 1000/3960: loss=2.7083, acc=0.4197
    Batch 1100/3960: loss=2.7093, acc=0.4184
    Batch 1200/3960: loss=2.7176, acc=0.4149
    Batch 1300/3960: loss=2.7226, acc=0.4138
    Batch 1400/3960: loss=2.7288, acc=0.4141
    Batch 1500/3960: loss=2.7194, acc=0.4149
    Batch 1600/3960: loss=2.7237, acc=0.4128
    Batch 1700/3960: loss=2.7213, acc=0.4139
    Batch 1800/3960: loss=2.7184, acc=0.4143
    Batch 1900/3960: loss=2.7179, acc=0.4142
    Batch 2000/3960: loss=2.7223, acc=0.4130
    Batch 2100/3960: loss=2.7242, acc=0.4136
    Batch 2200/3960: loss=2.7282, acc=0.4122
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7720


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5284
>>> Validation completed <<<
Epoch time: 34.88 mins

>>> Starting Epoch 47/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 47...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 47/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6414, acc=0.4675
    Batch 200/3960: loss=2.7111, acc=0.4303
    Batch 300/3960: loss=2.6875, acc=0.4309
    Batch 400/3960: loss=2.6814, acc=0.4314
    Batch 500/3960: loss=2.6954, acc=0.4264
    Batch 600/3960: loss=2.6868, acc=0.4306
    Batch 700/3960: loss=2.6805, acc=0.4285
    Batch 800/3960: loss=2.6826, acc=0.4330
    Batch 900/3960: loss=2.6828, acc=0.4320
    Batch 1000/3960: loss=2.6887, acc=0.4274
    Batch 1100/3960: loss=2.6908, acc=0.4276
    Batch 1200/3960: loss=2.6891, acc=0.4291
    Batch 1300/3960: loss=2.6876, acc=0.4308
    Batch 1400/3960: loss=2.6793, acc=0.4336
    Batch 1500/3960: loss=2.6729, acc=0.4368
    Batch 1600/3960: loss=2.6805, acc=0.4346
    Batch 1700/3960: loss=2.6776, acc=0.4351
    Batch 1800/3960: loss=2.6782, acc=0.4344
    Batch 1900/3960: loss=2.6809, acc=0.4322
    Batch 2000/3960: loss=2.6811, acc=0.4321
    Batch 2100/3960: loss=2.6890, acc=0.4317
    Batch 2200/3960: loss=2.6909, acc=0.4303
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7710


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5282
>>> Validation completed <<<
Epoch time: 24.36 mins

>>> Starting Epoch 48/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 48...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 48/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6904, acc=0.4189
    Batch 200/3960: loss=2.6908, acc=0.4142
    Batch 300/3960: loss=2.7077, acc=0.4200
    Batch 400/3960: loss=2.7102, acc=0.4101
    Batch 500/3960: loss=2.7041, acc=0.4174
    Batch 600/3960: loss=2.7037, acc=0.4151
    Batch 700/3960: loss=2.7056, acc=0.4151
    Batch 800/3960: loss=2.6975, acc=0.4186
    Batch 900/3960: loss=2.6931, acc=0.4218
    Batch 1000/3960: loss=2.6917, acc=0.4255
    Batch 1100/3960: loss=2.7001, acc=0.4236
    Batch 1200/3960: loss=2.6976, acc=0.4258
    Batch 1300/3960: loss=2.6970, acc=0.4261
    Batch 1400/3960: loss=2.7006, acc=0.4282
    Batch 1500/3960: loss=2.7044, acc=0.4285
    Batch 1600/3960: loss=2.6987, acc=0.4283
    Batch 1700/3960: loss=2.6957, acc=0.4295
    Batch 1800/3960: loss=2.6918, acc=0.4306
    Batch 1900/3960: loss=2.6909, acc=0.4298
    Batch 2000/3960: loss=2.6850, acc=0.4307
    Batch 2100/3960: loss=2.6860, acc=0.4291
    Batch 2200/3960: loss=2.6832, acc=0.4300
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7732


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5262
>>> Validation completed <<<
Epoch time: 24.27 mins

>>> Starting Epoch 49/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 49...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 49/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.7119, acc=0.4319
    Batch 200/3960: loss=2.7093, acc=0.4297
    Batch 300/3960: loss=2.6275, acc=0.4338
    Batch 400/3960: loss=2.6218, acc=0.4428
    Batch 500/3960: loss=2.6494, acc=0.4335
    Batch 600/3960: loss=2.6744, acc=0.4252
    Batch 700/3960: loss=2.6772, acc=0.4288
    Batch 800/3960: loss=2.6618, acc=0.4345
    Batch 900/3960: loss=2.6603, acc=0.4322
    Batch 1000/3960: loss=2.6581, acc=0.4320
    Batch 1100/3960: loss=2.6632, acc=0.4314
    Batch 1200/3960: loss=2.6687, acc=0.4304
    Batch 1300/3960: loss=2.6798, acc=0.4270
    Batch 1400/3960: loss=2.6865, acc=0.4255
    Batch 1500/3960: loss=2.6908, acc=0.4249
    Batch 1600/3960: loss=2.6910, acc=0.4262
    Batch 1700/3960: loss=2.6888, acc=0.4265
    Batch 1800/3960: loss=2.6851, acc=0.4278
    Batch 1900/3960: loss=2.6806, acc=0.4301
    Batch 2000/3960: loss=2.6814, acc=0.4311
    Batch 2100/3960: loss=2.6802, acc=0.4322
    Batch 2200/3960: loss=2.6797, acc=0.4292
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7776


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5224
>>> Validation completed <<<
Epoch time: 36.85 mins

>>> Starting Epoch 50/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 50...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 50/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6192, acc=0.4511
    Batch 200/3960: loss=2.6593, acc=0.4414
    Batch 300/3960: loss=2.6304, acc=0.4349
    Batch 400/3960: loss=2.6251, acc=0.4332
    Batch 500/3960: loss=2.6482, acc=0.4260
    Batch 600/3960: loss=2.6516, acc=0.4235
    Batch 700/3960: loss=2.6580, acc=0.4217
    Batch 800/3960: loss=2.6411, acc=0.4251
    Batch 900/3960: loss=2.6444, acc=0.4257
    Batch 1000/3960: loss=2.6501, acc=0.4271
    Batch 1100/3960: loss=2.6461, acc=0.4284
    Batch 1200/3960: loss=2.6447, acc=0.4313
    Batch 1300/3960: loss=2.6465, acc=0.4327
    Batch 1400/3960: loss=2.6473, acc=0.4331
    Batch 1500/3960: loss=2.6457, acc=0.4305
    Batch 1600/3960: loss=2.6429, acc=0.4328
    Batch 1700/3960: loss=2.6473, acc=0.4297
    Batch 1800/3960: loss=2.6448, acc=0.4310
    Batch 1900/3960: loss=2.6374, acc=0.4333
    Batch 2000/3960: loss=2.6384, acc=0.4330
    Batch 2100/3960: loss=2.6386, acc=0.4334
    Batch 2200/3960: loss=2.6432, acc=0.4313
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7828


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5204
>>> Validation completed <<<
Epoch time: 32.90 mins
Saved checkpoint

>>> Starting Epoch 51/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 51...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 51/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6638, acc=0.4672
    Batch 200/3960: loss=2.6290, acc=0.4540
    Batch 300/3960: loss=2.6356, acc=0.4471
    Batch 400/3960: loss=2.6552, acc=0.4384
    Batch 500/3960: loss=2.6508, acc=0.4336
    Batch 600/3960: loss=2.6562, acc=0.4305
    Batch 700/3960: loss=2.6591, acc=0.4364
    Batch 800/3960: loss=2.6651, acc=0.4406
    Batch 900/3960: loss=2.6568, acc=0.4393
    Batch 1000/3960: loss=2.6576, acc=0.4409
    Batch 1100/3960: loss=2.6533, acc=0.4412
    Batch 1200/3960: loss=2.6563, acc=0.4358
    Batch 1300/3960: loss=2.6538, acc=0.4352
    Batch 1400/3960: loss=2.6573, acc=0.4348
    Batch 1500/3960: loss=2.6515, acc=0.4359
    Batch 1600/3960: loss=2.6503, acc=0.4340
    Batch 1700/3960: loss=2.6555, acc=0.4329
    Batch 1800/3960: loss=2.6542, acc=0.4325
    Batch 1900/3960: loss=2.6518, acc=0.4340
    Batch 2000/3960: loss=2.6550, acc=0.4332
    Batch 2100/3960: loss=2.6536, acc=0.4323
    Batch 2200/3960: loss=2.6507, acc=0.4336
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7728


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5170
>>> Validation completed <<<
Epoch time: 39.41 mins

>>> Starting Epoch 52/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 52...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 52/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6825, acc=0.4613
    Batch 200/3960: loss=2.6686, acc=0.4445
    Batch 300/3960: loss=2.7046, acc=0.4433
    Batch 400/3960: loss=2.6932, acc=0.4416
    Batch 500/3960: loss=2.6794, acc=0.4467
    Batch 600/3960: loss=2.6539, acc=0.4504
    Batch 700/3960: loss=2.6439, acc=0.4494
    Batch 800/3960: loss=2.6458, acc=0.4498
    Batch 900/3960: loss=2.6304, acc=0.4492
    Batch 1000/3960: loss=2.6359, acc=0.4472
    Batch 1100/3960: loss=2.6365, acc=0.4447
    Batch 1200/3960: loss=2.6328, acc=0.4460
    Batch 1300/3960: loss=2.6303, acc=0.4467
    Batch 1400/3960: loss=2.6335, acc=0.4436
    Batch 1500/3960: loss=2.6297, acc=0.4471
    Batch 1600/3960: loss=2.6351, acc=0.4461
    Batch 1700/3960: loss=2.6323, acc=0.4467
    Batch 1800/3960: loss=2.6379, acc=0.4456
    Batch 1900/3960: loss=2.6396, acc=0.4450
    Batch 2000/3960: loss=2.6437, acc=0.4452
    Batch 2100/3960: loss=2.6410, acc=0.4441
    Batch 2200/3960: loss=2.6458, acc=0.4430
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7832


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5148
>>> Validation completed <<<
Epoch time: 35.31 mins

>>> Starting Epoch 53/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 53...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 53/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6498, acc=0.4127
    Batch 200/3960: loss=2.6798, acc=0.4052
    Batch 300/3960: loss=2.6640, acc=0.4259
    Batch 400/3960: loss=2.6578, acc=0.4320
    Batch 500/3960: loss=2.6714, acc=0.4249
    Batch 600/3960: loss=2.6706, acc=0.4273
    Batch 700/3960: loss=2.6856, acc=0.4298
    Batch 800/3960: loss=2.6873, acc=0.4310
    Batch 900/3960: loss=2.6860, acc=0.4278
    Batch 1000/3960: loss=2.6896, acc=0.4286
    Batch 1100/3960: loss=2.6830, acc=0.4305
    Batch 1200/3960: loss=2.6818, acc=0.4290
    Batch 1300/3960: loss=2.6671, acc=0.4328
    Batch 1400/3960: loss=2.6625, acc=0.4354
    Batch 1500/3960: loss=2.6675, acc=0.4345
    Batch 1600/3960: loss=2.6713, acc=0.4332
    Batch 1700/3960: loss=2.6713, acc=0.4333
    Batch 1800/3960: loss=2.6675, acc=0.4339
    Batch 1900/3960: loss=2.6669, acc=0.4351
    Batch 2000/3960: loss=2.6700, acc=0.4363
    Batch 2100/3960: loss=2.6631, acc=0.4374
    Batch 2200/3960: loss=2.6604, acc=0.4388
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7856


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5106
>>> Validation completed <<<
Epoch time: 26.28 mins

>>> Starting Epoch 54/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 54...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 54/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6241, acc=0.4180
    Batch 200/3960: loss=2.6245, acc=0.4209
    Batch 300/3960: loss=2.6189, acc=0.4218
    Batch 400/3960: loss=2.6138, acc=0.4256
    Batch 500/3960: loss=2.6235, acc=0.4346
    Batch 600/3960: loss=2.6192, acc=0.4418
    Batch 700/3960: loss=2.6224, acc=0.4443
    Batch 800/3960: loss=2.6417, acc=0.4426
    Batch 900/3960: loss=2.6373, acc=0.4425
    Batch 1000/3960: loss=2.6313, acc=0.4419
    Batch 1100/3960: loss=2.6297, acc=0.4392
    Batch 1200/3960: loss=2.6288, acc=0.4403
    Batch 1300/3960: loss=2.6347, acc=0.4384
    Batch 1400/3960: loss=2.6413, acc=0.4361
    Batch 1500/3960: loss=2.6403, acc=0.4378
    Batch 1600/3960: loss=2.6357, acc=0.4390
    Batch 1700/3960: loss=2.6293, acc=0.4430
    Batch 1800/3960: loss=2.6289, acc=0.4433
    Batch 1900/3960: loss=2.6247, acc=0.4446
    Batch 2000/3960: loss=2.6172, acc=0.4464
    Batch 2100/3960: loss=2.6142, acc=0.4459
    Batch 2200/3960: loss=2.6121, acc=0.4476
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7864


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5048
>>> Validation completed <<<
Epoch time: 26.86 mins

>>> Starting Epoch 55/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 55...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 55/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6635, acc=0.3976
    Batch 200/3960: loss=2.6444, acc=0.4153
    Batch 300/3960: loss=2.6775, acc=0.4215
    Batch 400/3960: loss=2.6482, acc=0.4200
    Batch 500/3960: loss=2.6371, acc=0.4204
    Batch 600/3960: loss=2.6255, acc=0.4244
    Batch 700/3960: loss=2.6251, acc=0.4289
    Batch 800/3960: loss=2.6290, acc=0.4328
    Batch 900/3960: loss=2.6337, acc=0.4315
    Batch 1000/3960: loss=2.6301, acc=0.4356
    Batch 1100/3960: loss=2.6340, acc=0.4326
    Batch 1200/3960: loss=2.6322, acc=0.4335
    Batch 1300/3960: loss=2.6265, acc=0.4356
    Batch 1400/3960: loss=2.6292, acc=0.4352
    Batch 1500/3960: loss=2.6224, acc=0.4371
    Batch 1600/3960: loss=2.6241, acc=0.4392
    Batch 1700/3960: loss=2.6181, acc=0.4442
    Batch 1800/3960: loss=2.6219, acc=0.4433
    Batch 1900/3960: loss=2.6309, acc=0.4417
    Batch 2000/3960: loss=2.6274, acc=0.4400
    Batch 2100/3960: loss=2.6232, acc=0.4414
    Batch 2200/3960: loss=2.6265, acc=0.4406
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7874


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.5012
>>> Validation completed <<<
Epoch time: 28.92 mins
Saved checkpoint

>>> Starting Epoch 56/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 56...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 56/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6370, acc=0.4115
    Batch 200/3960: loss=2.6080, acc=0.4285
    Batch 300/3960: loss=2.6049, acc=0.4385
    Batch 400/3960: loss=2.5952, acc=0.4504
    Batch 500/3960: loss=2.6078, acc=0.4537
    Batch 600/3960: loss=2.5940, acc=0.4537
    Batch 700/3960: loss=2.5857, acc=0.4595
    Batch 800/3960: loss=2.5906, acc=0.4581
    Batch 900/3960: loss=2.5956, acc=0.4567
    Batch 1000/3960: loss=2.5984, acc=0.4582
    Batch 1100/3960: loss=2.6003, acc=0.4586
    Batch 1200/3960: loss=2.6001, acc=0.4572
    Batch 1300/3960: loss=2.6060, acc=0.4578
    Batch 1400/3960: loss=2.6146, acc=0.4566
    Batch 1500/3960: loss=2.6089, acc=0.4535
    Batch 1600/3960: loss=2.6095, acc=0.4522
    Batch 1700/3960: loss=2.6150, acc=0.4498
    Batch 1800/3960: loss=2.6130, acc=0.4494
    Batch 1900/3960: loss=2.6127, acc=0.4490
    Batch 2000/3960: loss=2.6133, acc=0.4499
    Batch 2100/3960: loss=2.6105, acc=0.4500
    Batch 2200/3960: loss=2.6078, acc=0.4496
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7904


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4946
>>> Validation completed <<<
Epoch time: 38.53 mins

>>> Starting Epoch 57/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 57...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 57/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6710, acc=0.4678
    Batch 200/3960: loss=2.6291, acc=0.4723
    Batch 300/3960: loss=2.6171, acc=0.4600
    Batch 400/3960: loss=2.6243, acc=0.4525
    Batch 500/3960: loss=2.6211, acc=0.4512
    Batch 600/3960: loss=2.6061, acc=0.4574
    Batch 700/3960: loss=2.6034, acc=0.4541
    Batch 800/3960: loss=2.6034, acc=0.4579
    Batch 900/3960: loss=2.5990, acc=0.4570
    Batch 1000/3960: loss=2.6082, acc=0.4521
    Batch 1100/3960: loss=2.6023, acc=0.4532
    Batch 1200/3960: loss=2.6081, acc=0.4507
    Batch 1300/3960: loss=2.6110, acc=0.4510
    Batch 1400/3960: loss=2.6166, acc=0.4492
    Batch 1500/3960: loss=2.6210, acc=0.4481
    Batch 1600/3960: loss=2.6113, acc=0.4530
    Batch 1700/3960: loss=2.6057, acc=0.4520
    Batch 1800/3960: loss=2.6091, acc=0.4517
    Batch 1900/3960: loss=2.6105, acc=0.4527
    Batch 2000/3960: loss=2.6119, acc=0.4513
    Batch 2100/3960: loss=2.6123, acc=0.4490
    Batch 2200/3960: loss=2.6111, acc=0.4498
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7896


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4840
>>> Validation completed <<<
Epoch time: 23.57 mins

>>> Starting Epoch 58/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 58...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 58/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6046, acc=0.4452
    Batch 200/3960: loss=2.5765, acc=0.4555
    Batch 300/3960: loss=2.5712, acc=0.4540
    Batch 400/3960: loss=2.5977, acc=0.4454
    Batch 500/3960: loss=2.5658, acc=0.4530
    Batch 600/3960: loss=2.5777, acc=0.4487
    Batch 700/3960: loss=2.5788, acc=0.4507
    Batch 800/3960: loss=2.5865, acc=0.4492
    Batch 900/3960: loss=2.5865, acc=0.4461
    Batch 1000/3960: loss=2.5841, acc=0.4481
    Batch 1100/3960: loss=2.5830, acc=0.4492
    Batch 1200/3960: loss=2.5811, acc=0.4498
    Batch 1300/3960: loss=2.5822, acc=0.4492
    Batch 1400/3960: loss=2.5816, acc=0.4490
    Batch 1500/3960: loss=2.5840, acc=0.4475
    Batch 1600/3960: loss=2.5877, acc=0.4453
    Batch 1700/3960: loss=2.5914, acc=0.4443
    Batch 1800/3960: loss=2.5943, acc=0.4417
    Batch 1900/3960: loss=2.5921, acc=0.4409
    Batch 2000/3960: loss=2.5943, acc=0.4400
    Batch 2100/3960: loss=2.5950, acc=0.4412
    Batch 2200/3960: loss=2.5948, acc=0.4402
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7972


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4738
>>> Validation completed <<<
Epoch time: 22.19 mins

>>> Starting Epoch 59/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 59...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 59/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6371, acc=0.4564
    Batch 200/3960: loss=2.6111, acc=0.4576
    Batch 300/3960: loss=2.5823, acc=0.4512
    Batch 400/3960: loss=2.5925, acc=0.4427
    Batch 500/3960: loss=2.5988, acc=0.4416
    Batch 600/3960: loss=2.5894, acc=0.4438
    Batch 700/3960: loss=2.5987, acc=0.4416
    Batch 800/3960: loss=2.6091, acc=0.4384
    Batch 900/3960: loss=2.6211, acc=0.4373
    Batch 1000/3960: loss=2.6129, acc=0.4387
    Batch 1100/3960: loss=2.6133, acc=0.4410
    Batch 1200/3960: loss=2.6184, acc=0.4389
    Batch 1300/3960: loss=2.6114, acc=0.4420
    Batch 1400/3960: loss=2.5999, acc=0.4439
    Batch 1500/3960: loss=2.6010, acc=0.4443
    Batch 1600/3960: loss=2.5934, acc=0.4462
    Batch 1700/3960: loss=2.5970, acc=0.4460
    Batch 1800/3960: loss=2.5939, acc=0.4450
    Batch 1900/3960: loss=2.5927, acc=0.4445
    Batch 2000/3960: loss=2.5929, acc=0.4455
    Batch 2100/3960: loss=2.5932, acc=0.4443
    Batch 2200/3960: loss=2.5900, acc=0.4459
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7968


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4642
>>> Validation completed <<<
Epoch time: 27.57 mins

>>> Starting Epoch 60/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 60...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 60/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6207, acc=0.4341
    Batch 200/3960: loss=2.6290, acc=0.4437
    Batch 300/3960: loss=2.6021, acc=0.4441
    Batch 400/3960: loss=2.5953, acc=0.4451
    Batch 500/3960: loss=2.6034, acc=0.4451
    Batch 600/3960: loss=2.6076, acc=0.4481
    Batch 700/3960: loss=2.5899, acc=0.4557
    Batch 800/3960: loss=2.5866, acc=0.4583
    Batch 900/3960: loss=2.5881, acc=0.4578
    Batch 1000/3960: loss=2.5821, acc=0.4575
    Batch 1100/3960: loss=2.5872, acc=0.4547
    Batch 1200/3960: loss=2.5943, acc=0.4535
    Batch 1300/3960: loss=2.5996, acc=0.4527
    Batch 1400/3960: loss=2.5904, acc=0.4532
    Batch 1500/3960: loss=2.5873, acc=0.4534
    Batch 1600/3960: loss=2.5879, acc=0.4534
    Batch 1700/3960: loss=2.5960, acc=0.4534
    Batch 1800/3960: loss=2.5934, acc=0.4542
    Batch 1900/3960: loss=2.5955, acc=0.4558
    Batch 2000/3960: loss=2.5922, acc=0.4570
    Batch 2100/3960: loss=2.5879, acc=0.4580
    Batch 2200/3960: loss=2.5858, acc=0.4579
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7904


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4522
>>> Validation completed <<<
Epoch time: 31.79 mins
Saved checkpoint

>>> Starting Epoch 61/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 61...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 61/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5589, acc=0.4629
    Batch 200/3960: loss=2.5489, acc=0.4636
    Batch 300/3960: loss=2.5244, acc=0.4665
    Batch 400/3960: loss=2.5439, acc=0.4645
    Batch 500/3960: loss=2.5622, acc=0.4629
    Batch 600/3960: loss=2.5614, acc=0.4632
    Batch 700/3960: loss=2.5682, acc=0.4642
    Batch 800/3960: loss=2.5713, acc=0.4597
    Batch 900/3960: loss=2.5719, acc=0.4600
    Batch 1000/3960: loss=2.5689, acc=0.4593
    Batch 1100/3960: loss=2.5685, acc=0.4591
    Batch 1200/3960: loss=2.5713, acc=0.4583
    Batch 1300/3960: loss=2.5648, acc=0.4596
    Batch 1400/3960: loss=2.5655, acc=0.4581
    Batch 1500/3960: loss=2.5622, acc=0.4612
    Batch 1600/3960: loss=2.5589, acc=0.4622
    Batch 1700/3960: loss=2.5631, acc=0.4620
    Batch 1800/3960: loss=2.5624, acc=0.4616
    Batch 1900/3960: loss=2.5696, acc=0.4614
    Batch 2000/3960: loss=2.5736, acc=0.4630
    Batch 2100/3960: loss=2.5731, acc=0.4624
    Batch 2200/3960: loss=2.5794, acc=0.4608
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8006


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4422
>>> Validation completed <<<
Epoch time: 25.72 mins

>>> Starting Epoch 62/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 62...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 62/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5807, acc=0.4790
    Batch 200/3960: loss=2.6240, acc=0.4311
    Batch 300/3960: loss=2.5958, acc=0.4406
    Batch 400/3960: loss=2.6077, acc=0.4433
    Batch 500/3960: loss=2.6117, acc=0.4407
    Batch 600/3960: loss=2.6075, acc=0.4385
    Batch 700/3960: loss=2.5908, acc=0.4420
    Batch 800/3960: loss=2.5976, acc=0.4364
    Batch 900/3960: loss=2.5999, acc=0.4423
    Batch 1000/3960: loss=2.6021, acc=0.4435
    Batch 1100/3960: loss=2.5968, acc=0.4428
    Batch 1200/3960: loss=2.5950, acc=0.4446
    Batch 1300/3960: loss=2.5919, acc=0.4455
    Batch 1400/3960: loss=2.5864, acc=0.4472
    Batch 1500/3960: loss=2.5884, acc=0.4465
    Batch 1600/3960: loss=2.5831, acc=0.4482
    Batch 1700/3960: loss=2.5816, acc=0.4493
    Batch 1800/3960: loss=2.5877, acc=0.4478
    Batch 1900/3960: loss=2.5904, acc=0.4476
    Batch 2000/3960: loss=2.5876, acc=0.4487
    Batch 2100/3960: loss=2.5858, acc=0.4499
    Batch 2200/3960: loss=2.5870, acc=0.4479
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7912


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4330
>>> Validation completed <<<
Epoch time: 23.61 mins

>>> Starting Epoch 63/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 63...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 63/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4969, acc=0.4796
    Batch 200/3960: loss=2.5820, acc=0.4621
    Batch 300/3960: loss=2.5726, acc=0.4539
    Batch 400/3960: loss=2.5813, acc=0.4601
    Batch 500/3960: loss=2.5743, acc=0.4590
    Batch 600/3960: loss=2.5806, acc=0.4617
    Batch 700/3960: loss=2.5725, acc=0.4608
    Batch 800/3960: loss=2.5802, acc=0.4599
    Batch 900/3960: loss=2.5753, acc=0.4613
    Batch 1000/3960: loss=2.5778, acc=0.4604
    Batch 1100/3960: loss=2.5819, acc=0.4564
    Batch 1200/3960: loss=2.5793, acc=0.4583
    Batch 1300/3960: loss=2.5845, acc=0.4558
    Batch 1400/3960: loss=2.5883, acc=0.4552
    Batch 1500/3960: loss=2.5856, acc=0.4552
    Batch 1600/3960: loss=2.5788, acc=0.4561
    Batch 1700/3960: loss=2.5791, acc=0.4553
    Batch 1800/3960: loss=2.5844, acc=0.4526
    Batch 1900/3960: loss=2.5826, acc=0.4518
    Batch 2000/3960: loss=2.5777, acc=0.4536
    Batch 2100/3960: loss=2.5796, acc=0.4511
    Batch 2200/3960: loss=2.5794, acc=0.4522
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.7942


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4220
>>> Validation completed <<<
Epoch time: 30.94 mins

>>> Starting Epoch 64/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 64...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 64/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5493, acc=0.4638
    Batch 200/3960: loss=2.5680, acc=0.4557
    Batch 300/3960: loss=2.5602, acc=0.4535
    Batch 400/3960: loss=2.5719, acc=0.4527
    Batch 500/3960: loss=2.5771, acc=0.4510
    Batch 600/3960: loss=2.5844, acc=0.4502
    Batch 700/3960: loss=2.5786, acc=0.4485
    Batch 800/3960: loss=2.5731, acc=0.4486
    Batch 900/3960: loss=2.5687, acc=0.4502
    Batch 1000/3960: loss=2.5691, acc=0.4498
    Batch 1100/3960: loss=2.5738, acc=0.4504
    Batch 1200/3960: loss=2.5858, acc=0.4489
    Batch 1300/3960: loss=2.5859, acc=0.4491
    Batch 1400/3960: loss=2.5825, acc=0.4506
    Batch 1500/3960: loss=2.5842, acc=0.4504
    Batch 1600/3960: loss=2.5842, acc=0.4501
    Batch 1700/3960: loss=2.5835, acc=0.4509
    Batch 1800/3960: loss=2.5800, acc=0.4533
    Batch 1900/3960: loss=2.5816, acc=0.4514
    Batch 2000/3960: loss=2.5832, acc=0.4492
    Batch 2100/3960: loss=2.5800, acc=0.4523
    Batch 2200/3960: loss=2.5803, acc=0.4497
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8044


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.4106
>>> Validation completed <<<
Epoch time: 23.82 mins

>>> Starting Epoch 65/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 65...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 65/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4808, acc=0.4814
    Batch 200/3960: loss=2.4990, acc=0.4683
    Batch 300/3960: loss=2.5455, acc=0.4708
    Batch 400/3960: loss=2.5743, acc=0.4635
    Batch 500/3960: loss=2.5874, acc=0.4598
    Batch 600/3960: loss=2.6015, acc=0.4567
    Batch 700/3960: loss=2.5791, acc=0.4599
    Batch 800/3960: loss=2.5710, acc=0.4580
    Batch 900/3960: loss=2.5715, acc=0.4569
    Batch 1000/3960: loss=2.5707, acc=0.4562
    Batch 1100/3960: loss=2.5720, acc=0.4603
    Batch 1200/3960: loss=2.5687, acc=0.4600
    Batch 1300/3960: loss=2.5695, acc=0.4568
    Batch 1400/3960: loss=2.5683, acc=0.4590
    Batch 1500/3960: loss=2.5688, acc=0.4609
    Batch 1600/3960: loss=2.5657, acc=0.4599
    Batch 1700/3960: loss=2.5653, acc=0.4600
    Batch 1800/3960: loss=2.5645, acc=0.4581
    Batch 1900/3960: loss=2.5616, acc=0.4591
    Batch 2000/3960: loss=2.5552, acc=0.4621
    Batch 2100/3960: loss=2.5558, acc=0.4597
    Batch 2200/3960: loss=2.5589, acc=0.4593
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8056


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3990
>>> Validation completed <<<
Epoch time: 23.90 mins
Saved checkpoint

>>> Starting Epoch 66/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 66...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 66/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5146, acc=0.4623
    Batch 200/3960: loss=2.5443, acc=0.4434
    Batch 300/3960: loss=2.5663, acc=0.4472
    Batch 400/3960: loss=2.5623, acc=0.4556
    Batch 500/3960: loss=2.5734, acc=0.4479
    Batch 600/3960: loss=2.5387, acc=0.4571
    Batch 700/3960: loss=2.5458, acc=0.4500
    Batch 800/3960: loss=2.5412, acc=0.4585
    Batch 900/3960: loss=2.5375, acc=0.4632
    Batch 1000/3960: loss=2.5315, acc=0.4659
    Batch 1100/3960: loss=2.5345, acc=0.4621
    Batch 1200/3960: loss=2.5364, acc=0.4616
    Batch 1300/3960: loss=2.5295, acc=0.4632
    Batch 1400/3960: loss=2.5327, acc=0.4626
    Batch 1500/3960: loss=2.5267, acc=0.4639
    Batch 1600/3960: loss=2.5297, acc=0.4629
    Batch 1700/3960: loss=2.5327, acc=0.4620
    Batch 1800/3960: loss=2.5317, acc=0.4614
    Batch 1900/3960: loss=2.5295, acc=0.4617
    Batch 2000/3960: loss=2.5288, acc=0.4611
    Batch 2100/3960: loss=2.5297, acc=0.4610
    Batch 2200/3960: loss=2.5317, acc=0.4604
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8038


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3882
>>> Validation completed <<<
Epoch time: 24.11 mins

>>> Starting Epoch 67/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 67...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 67/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5884, acc=0.4493
    Batch 200/3960: loss=2.6012, acc=0.4504
    Batch 300/3960: loss=2.5902, acc=0.4545
    Batch 400/3960: loss=2.5736, acc=0.4603
    Batch 500/3960: loss=2.5596, acc=0.4623
    Batch 600/3960: loss=2.5491, acc=0.4700
    Batch 700/3960: loss=2.5429, acc=0.4713
    Batch 800/3960: loss=2.5465, acc=0.4693
    Batch 900/3960: loss=2.5404, acc=0.4668
    Batch 1000/3960: loss=2.5374, acc=0.4653
    Batch 1100/3960: loss=2.5220, acc=0.4690
    Batch 1200/3960: loss=2.5199, acc=0.4693
    Batch 1300/3960: loss=2.5105, acc=0.4693
    Batch 1400/3960: loss=2.5121, acc=0.4659
    Batch 1500/3960: loss=2.5116, acc=0.4657
    Batch 1600/3960: loss=2.5081, acc=0.4654
    Batch 1700/3960: loss=2.5153, acc=0.4650
    Batch 1800/3960: loss=2.5195, acc=0.4668
    Batch 1900/3960: loss=2.5252, acc=0.4658
    Batch 2000/3960: loss=2.5295, acc=0.4642
    Batch 2100/3960: loss=2.5329, acc=0.4607
    Batch 2200/3960: loss=2.5342, acc=0.4572
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8062


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3752
>>> Validation completed <<<
Epoch time: 24.05 mins

>>> Starting Epoch 68/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 68...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 68/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4830, acc=0.4638
    Batch 200/3960: loss=2.4874, acc=0.4781
    Batch 300/3960: loss=2.4855, acc=0.4814
    Batch 400/3960: loss=2.4989, acc=0.4794
    Batch 500/3960: loss=2.4933, acc=0.4781
    Batch 600/3960: loss=2.4966, acc=0.4760
    Batch 700/3960: loss=2.5029, acc=0.4744
    Batch 800/3960: loss=2.5114, acc=0.4719
    Batch 900/3960: loss=2.5164, acc=0.4721
    Batch 1000/3960: loss=2.5227, acc=0.4732
    Batch 1100/3960: loss=2.5271, acc=0.4771
    Batch 1200/3960: loss=2.5271, acc=0.4746
    Batch 1300/3960: loss=2.5299, acc=0.4727
    Batch 1400/3960: loss=2.5269, acc=0.4728
    Batch 1500/3960: loss=2.5270, acc=0.4734
    Batch 1600/3960: loss=2.5295, acc=0.4712
    Batch 1700/3960: loss=2.5286, acc=0.4710
    Batch 1800/3960: loss=2.5300, acc=0.4725
    Batch 1900/3960: loss=2.5270, acc=0.4733
    Batch 2000/3960: loss=2.5267, acc=0.4733
    Batch 2100/3960: loss=2.5267, acc=0.4719
    Batch 2200/3960: loss=2.5275, acc=0.4707
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8120


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3588
>>> Validation completed <<<
Epoch time: 24.11 mins

>>> Starting Epoch 69/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 69...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 69/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4786, acc=0.4322
    Batch 200/3960: loss=2.4722, acc=0.4325
    Batch 300/3960: loss=2.4911, acc=0.4492
    Batch 400/3960: loss=2.5103, acc=0.4556
    Batch 500/3960: loss=2.5326, acc=0.4557
    Batch 600/3960: loss=2.5192, acc=0.4567
    Batch 700/3960: loss=2.5196, acc=0.4575
    Batch 800/3960: loss=2.5197, acc=0.4554
    Batch 900/3960: loss=2.5240, acc=0.4523
    Batch 1000/3960: loss=2.5139, acc=0.4579
    Batch 1100/3960: loss=2.5181, acc=0.4569
    Batch 1200/3960: loss=2.5235, acc=0.4570
    Batch 1300/3960: loss=2.5250, acc=0.4567
    Batch 1400/3960: loss=2.5209, acc=0.4585
    Batch 1500/3960: loss=2.5207, acc=0.4615
    Batch 1600/3960: loss=2.5288, acc=0.4581
    Batch 1700/3960: loss=2.5248, acc=0.4598
    Batch 1800/3960: loss=2.5250, acc=0.4610
    Batch 1900/3960: loss=2.5271, acc=0.4617
    Batch 2000/3960: loss=2.5276, acc=0.4610
    Batch 2100/3960: loss=2.5320, acc=0.4597
    Batch 2200/3960: loss=2.5307, acc=0.4601
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8064


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3406
>>> Validation completed <<<
Epoch time: 24.11 mins

>>> Starting Epoch 70/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 70...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 70/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5884, acc=0.4564
    Batch 200/3960: loss=2.5132, acc=0.4639
    Batch 300/3960: loss=2.4891, acc=0.4827
    Batch 400/3960: loss=2.5078, acc=0.4901
    Batch 500/3960: loss=2.5140, acc=0.4916
    Batch 600/3960: loss=2.5147, acc=0.4880
    Batch 700/3960: loss=2.5102, acc=0.4854
    Batch 800/3960: loss=2.4921, acc=0.4884
    Batch 900/3960: loss=2.5018, acc=0.4857
    Batch 1000/3960: loss=2.5097, acc=0.4839
    Batch 1100/3960: loss=2.5060, acc=0.4825
    Batch 1200/3960: loss=2.5156, acc=0.4807
    Batch 1300/3960: loss=2.5088, acc=0.4778
    Batch 1400/3960: loss=2.5080, acc=0.4778
    Batch 1500/3960: loss=2.5081, acc=0.4770
    Batch 1600/3960: loss=2.5024, acc=0.4791
    Batch 1700/3960: loss=2.5026, acc=0.4783
    Batch 1800/3960: loss=2.5040, acc=0.4760
    Batch 1900/3960: loss=2.5055, acc=0.4757
    Batch 2000/3960: loss=2.5039, acc=0.4760
    Batch 2100/3960: loss=2.5031, acc=0.4763
    Batch 2200/3960: loss=2.4981, acc=0.4756
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8104


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3220
>>> Validation completed <<<
Epoch time: 24.04 mins
Saved checkpoint

>>> Starting Epoch 71/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 71...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 71/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4257, acc=0.4867
    Batch 200/3960: loss=2.4498, acc=0.4997
    Batch 300/3960: loss=2.5129, acc=0.4783
    Batch 400/3960: loss=2.5112, acc=0.4752
    Batch 500/3960: loss=2.5155, acc=0.4737
    Batch 600/3960: loss=2.5138, acc=0.4716
    Batch 700/3960: loss=2.5091, acc=0.4695
    Batch 800/3960: loss=2.4916, acc=0.4774
    Batch 900/3960: loss=2.5032, acc=0.4772
    Batch 1000/3960: loss=2.5062, acc=0.4772
    Batch 1100/3960: loss=2.4983, acc=0.4751
    Batch 1200/3960: loss=2.5003, acc=0.4744
    Batch 1300/3960: loss=2.5004, acc=0.4748
    Batch 1400/3960: loss=2.4948, acc=0.4743
    Batch 1500/3960: loss=2.4935, acc=0.4750
    Batch 1600/3960: loss=2.4963, acc=0.4755
    Batch 1700/3960: loss=2.4939, acc=0.4756
    Batch 1800/3960: loss=2.4898, acc=0.4775
    Batch 1900/3960: loss=2.4917, acc=0.4770
    Batch 2000/3960: loss=2.4930, acc=0.4779
    Batch 2100/3960: loss=2.4896, acc=0.4783
    Batch 2200/3960: loss=2.4874, acc=0.4793
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8210


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3064
>>> Validation completed <<<
Epoch time: 25.81 mins

>>> Starting Epoch 72/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 72...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 72/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4358, acc=0.4836
    Batch 200/3960: loss=2.4331, acc=0.5002
    Batch 300/3960: loss=2.4639, acc=0.4971
    Batch 400/3960: loss=2.4560, acc=0.4984
    Batch 500/3960: loss=2.4429, acc=0.5057
    Batch 600/3960: loss=2.4652, acc=0.4979
    Batch 700/3960: loss=2.4630, acc=0.4981
    Batch 800/3960: loss=2.4574, acc=0.4966
    Batch 900/3960: loss=2.4756, acc=0.4908
    Batch 1000/3960: loss=2.4697, acc=0.4921
    Batch 1100/3960: loss=2.4713, acc=0.4918
    Batch 1200/3960: loss=2.4783, acc=0.4888
    Batch 1300/3960: loss=2.4790, acc=0.4890
    Batch 1400/3960: loss=2.4672, acc=0.4909
    Batch 1500/3960: loss=2.4701, acc=0.4897
    Batch 1600/3960: loss=2.4699, acc=0.4908
    Batch 1700/3960: loss=2.4740, acc=0.4886
    Batch 1800/3960: loss=2.4698, acc=0.4888
    Batch 1900/3960: loss=2.4703, acc=0.4887
    Batch 2000/3960: loss=2.4709, acc=0.4858
    Batch 2100/3960: loss=2.4768, acc=0.4844
    Batch 2200/3960: loss=2.4805, acc=0.4840
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8234


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2908
>>> Validation completed <<<
Epoch time: 24.04 mins

>>> Starting Epoch 73/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 73...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 73/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4399, acc=0.4882
    Batch 200/3960: loss=2.4608, acc=0.4695
    Batch 300/3960: loss=2.4602, acc=0.4746
    Batch 400/3960: loss=2.4478, acc=0.4814
    Batch 500/3960: loss=2.4556, acc=0.4797
    Batch 600/3960: loss=2.4608, acc=0.4804
    Batch 700/3960: loss=2.4802, acc=0.4749
    Batch 800/3960: loss=2.4817, acc=0.4749
    Batch 900/3960: loss=2.4829, acc=0.4739
    Batch 1000/3960: loss=2.4833, acc=0.4771
    Batch 1100/3960: loss=2.4807, acc=0.4758
    Batch 1200/3960: loss=2.4828, acc=0.4735
    Batch 1300/3960: loss=2.4828, acc=0.4769
    Batch 1400/3960: loss=2.4783, acc=0.4760
    Batch 1500/3960: loss=2.4806, acc=0.4775
    Batch 1600/3960: loss=2.4835, acc=0.4775
    Batch 1700/3960: loss=2.4811, acc=0.4748
    Batch 1800/3960: loss=2.4839, acc=0.4740
    Batch 1900/3960: loss=2.4863, acc=0.4749
    Batch 2000/3960: loss=2.4867, acc=0.4739
    Batch 2100/3960: loss=2.4856, acc=0.4735
    Batch 2200/3960: loss=2.4804, acc=0.4754
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8190


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2716
>>> Validation completed <<<
Epoch time: 24.05 mins

>>> Starting Epoch 74/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 74...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 74/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4056, acc=0.4839
    Batch 200/3960: loss=2.4053, acc=0.5093
    Batch 300/3960: loss=2.4087, acc=0.4930
    Batch 400/3960: loss=2.4416, acc=0.4881
    Batch 500/3960: loss=2.4606, acc=0.4836
    Batch 600/3960: loss=2.4631, acc=0.4864
    Batch 700/3960: loss=2.4543, acc=0.4930
    Batch 800/3960: loss=2.4643, acc=0.4882
    Batch 900/3960: loss=2.4608, acc=0.4920
    Batch 1000/3960: loss=2.4590, acc=0.4966
    Batch 1100/3960: loss=2.4588, acc=0.4936
    Batch 1200/3960: loss=2.4539, acc=0.4957
    Batch 1300/3960: loss=2.4616, acc=0.4948
    Batch 1400/3960: loss=2.4596, acc=0.4934
    Batch 1500/3960: loss=2.4655, acc=0.4927
    Batch 1600/3960: loss=2.4695, acc=0.4920
    Batch 1700/3960: loss=2.4658, acc=0.4912
    Batch 1800/3960: loss=2.4647, acc=0.4893
    Batch 1900/3960: loss=2.4671, acc=0.4899
    Batch 2000/3960: loss=2.4700, acc=0.4872
    Batch 2100/3960: loss=2.4688, acc=0.4867
    Batch 2200/3960: loss=2.4669, acc=0.4872
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8170


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2532
>>> Validation completed <<<
Epoch time: 35.59 mins

>>> Starting Epoch 75/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 75...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 75/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4547, acc=0.4830
    Batch 200/3960: loss=2.5037, acc=0.4862
    Batch 300/3960: loss=2.5198, acc=0.4716
    Batch 400/3960: loss=2.5005, acc=0.4676
    Batch 500/3960: loss=2.5006, acc=0.4622
    Batch 600/3960: loss=2.5059, acc=0.4572
    Batch 700/3960: loss=2.4887, acc=0.4663
    Batch 800/3960: loss=2.4725, acc=0.4753
    Batch 900/3960: loss=2.4737, acc=0.4737
    Batch 1000/3960: loss=2.4769, acc=0.4712
    Batch 1100/3960: loss=2.4892, acc=0.4703
    Batch 1200/3960: loss=2.4809, acc=0.4686
    Batch 1300/3960: loss=2.4788, acc=0.4696
    Batch 1400/3960: loss=2.4691, acc=0.4736
    Batch 1500/3960: loss=2.4734, acc=0.4735
    Batch 1600/3960: loss=2.4749, acc=0.4739
    Batch 1700/3960: loss=2.4782, acc=0.4742
    Batch 1800/3960: loss=2.4754, acc=0.4759
    Batch 1900/3960: loss=2.4753, acc=0.4747
    Batch 2000/3960: loss=2.4759, acc=0.4752
    Batch 2100/3960: loss=2.4753, acc=0.4767
    Batch 2200/3960: loss=2.4767, acc=0.4765
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8208


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2358
>>> Validation completed <<<
Epoch time: 40.73 mins
Saved checkpoint

>>> Starting Epoch 76/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 76...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 76/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.5051, acc=0.4530
    Batch 200/3960: loss=2.5019, acc=0.4790
    Batch 300/3960: loss=2.4623, acc=0.4849
    Batch 400/3960: loss=2.4796, acc=0.4876
    Batch 500/3960: loss=2.4905, acc=0.4813
    Batch 600/3960: loss=2.4690, acc=0.4889
    Batch 700/3960: loss=2.4710, acc=0.4864
    Batch 800/3960: loss=2.4758, acc=0.4885
    Batch 900/3960: loss=2.4700, acc=0.4879
    Batch 1000/3960: loss=2.4693, acc=0.4858
    Batch 1100/3960: loss=2.4576, acc=0.4920
    Batch 1200/3960: loss=2.4596, acc=0.4904
    Batch 1300/3960: loss=2.4630, acc=0.4898
    Batch 1400/3960: loss=2.4625, acc=0.4890
    Batch 1500/3960: loss=2.4678, acc=0.4884
    Batch 1600/3960: loss=2.4663, acc=0.4874
    Batch 1700/3960: loss=2.4599, acc=0.4903
    Batch 1800/3960: loss=2.4603, acc=0.4892
    Batch 1900/3960: loss=2.4610, acc=0.4915
    Batch 2000/3960: loss=2.4595, acc=0.4939
    Batch 2100/3960: loss=2.4574, acc=0.4960
    Batch 2200/3960: loss=2.4565, acc=0.4954
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8182


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2216
>>> Validation completed <<<
Epoch time: 28.58 mins

>>> Starting Epoch 77/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 77...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 77/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.6070, acc=0.4886
    Batch 200/3960: loss=2.4919, acc=0.5236
    Batch 300/3960: loss=2.4510, acc=0.5183
    Batch 400/3960: loss=2.4536, acc=0.5125
    Batch 500/3960: loss=2.4534, acc=0.5037
    Batch 600/3960: loss=2.4777, acc=0.4959
    Batch 700/3960: loss=2.4776, acc=0.4983
    Batch 800/3960: loss=2.4922, acc=0.4945
    Batch 900/3960: loss=2.4903, acc=0.4976
    Batch 1000/3960: loss=2.4767, acc=0.4995
    Batch 1100/3960: loss=2.4808, acc=0.4982
    Batch 1200/3960: loss=2.4926, acc=0.4917
    Batch 1300/3960: loss=2.4988, acc=0.4889
    Batch 1400/3960: loss=2.4901, acc=0.4889
    Batch 1500/3960: loss=2.4778, acc=0.4936
    Batch 1600/3960: loss=2.4791, acc=0.4922
    Batch 1700/3960: loss=2.4784, acc=0.4930
    Batch 1800/3960: loss=2.4718, acc=0.4921
    Batch 1900/3960: loss=2.4762, acc=0.4878
    Batch 2000/3960: loss=2.4749, acc=0.4888
    Batch 2100/3960: loss=2.4771, acc=0.4885
    Batch 2200/3960: loss=2.4735, acc=0.4880
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8248


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2072
>>> Validation completed <<<
Epoch time: 22.75 mins

>>> Starting Epoch 78/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 78...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 78/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4647, acc=0.5266
    Batch 200/3960: loss=2.4480, acc=0.4980
    Batch 300/3960: loss=2.4473, acc=0.4981
    Batch 400/3960: loss=2.4344, acc=0.4958
    Batch 500/3960: loss=2.4262, acc=0.5014
    Batch 600/3960: loss=2.4209, acc=0.5046
    Batch 700/3960: loss=2.4150, acc=0.4962
    Batch 800/3960: loss=2.4210, acc=0.4882
    Batch 900/3960: loss=2.4166, acc=0.4893
    Batch 1000/3960: loss=2.4141, acc=0.4928
    Batch 1100/3960: loss=2.4082, acc=0.4967
    Batch 1200/3960: loss=2.4179, acc=0.4944
    Batch 1300/3960: loss=2.4238, acc=0.4928
    Batch 1400/3960: loss=2.4253, acc=0.4915
    Batch 1500/3960: loss=2.4286, acc=0.4917
    Batch 1600/3960: loss=2.4256, acc=0.4918
    Batch 1700/3960: loss=2.4286, acc=0.4921
    Batch 1800/3960: loss=2.4318, acc=0.4912
    Batch 1900/3960: loss=2.4366, acc=0.4917
    Batch 2000/3960: loss=2.4377, acc=0.4912
    Batch 2100/3960: loss=2.4353, acc=0.4909
    Batch 2200/3960: loss=2.4328, acc=0.4944
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8230


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1940
>>> Validation completed <<<
Epoch time: 22.79 mins

>>> Starting Epoch 79/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 79...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 79/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3779, acc=0.4855
    Batch 200/3960: loss=2.4249, acc=0.4725
    Batch 300/3960: loss=2.4440, acc=0.4739
    Batch 400/3960: loss=2.4539, acc=0.4778
    Batch 500/3960: loss=2.4361, acc=0.4813
    Batch 600/3960: loss=2.4269, acc=0.4830
    Batch 700/3960: loss=2.4182, acc=0.4885
    Batch 800/3960: loss=2.4284, acc=0.4877
    Batch 900/3960: loss=2.4343, acc=0.4851
    Batch 1000/3960: loss=2.4352, acc=0.4913
    Batch 1100/3960: loss=2.4450, acc=0.4896
    Batch 1200/3960: loss=2.4439, acc=0.4873
    Batch 1300/3960: loss=2.4451, acc=0.4880
    Batch 1400/3960: loss=2.4398, acc=0.4917
    Batch 1500/3960: loss=2.4392, acc=0.4934
    Batch 1600/3960: loss=2.4420, acc=0.4920
    Batch 1700/3960: loss=2.4467, acc=0.4927
    Batch 1800/3960: loss=2.4459, acc=0.4930
    Batch 1900/3960: loss=2.4457, acc=0.4927
    Batch 2000/3960: loss=2.4429, acc=0.4932
    Batch 2100/3960: loss=2.4389, acc=0.4956
    Batch 2200/3960: loss=2.4336, acc=0.4965
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8164


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1826
>>> Validation completed <<<
Epoch time: 22.79 mins

>>> Starting Epoch 80/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 80...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 80/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4081, acc=0.4725
    Batch 200/3960: loss=2.4500, acc=0.4605
    Batch 300/3960: loss=2.4560, acc=0.4676
    Batch 400/3960: loss=2.4450, acc=0.4786
    Batch 500/3960: loss=2.4550, acc=0.4792
    Batch 600/3960: loss=2.4686, acc=0.4735
    Batch 700/3960: loss=2.4584, acc=0.4765
    Batch 800/3960: loss=2.4501, acc=0.4775
    Batch 900/3960: loss=2.4500, acc=0.4800
    Batch 1000/3960: loss=2.4410, acc=0.4830
    Batch 1100/3960: loss=2.4411, acc=0.4844
    Batch 1200/3960: loss=2.4468, acc=0.4843
    Batch 1300/3960: loss=2.4572, acc=0.4825
    Batch 1400/3960: loss=2.4556, acc=0.4843
    Batch 1500/3960: loss=2.4564, acc=0.4832
    Batch 1600/3960: loss=2.4535, acc=0.4850
    Batch 1700/3960: loss=2.4533, acc=0.4835
    Batch 1800/3960: loss=2.4459, acc=0.4837
    Batch 1900/3960: loss=2.4467, acc=0.4828
    Batch 2000/3960: loss=2.4446, acc=0.4867
    Batch 2100/3960: loss=2.4397, acc=0.4878
    Batch 2200/3960: loss=2.4357, acc=0.4875
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8294


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1718
>>> Validation completed <<<
Epoch time: 22.79 mins
Saved checkpoint

>>> Starting Epoch 81/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 81...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 81/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4364, acc=0.4650
    Batch 200/3960: loss=2.4281, acc=0.4667
    Batch 300/3960: loss=2.4290, acc=0.4705
    Batch 400/3960: loss=2.4224, acc=0.4766
    Batch 500/3960: loss=2.4235, acc=0.4789
    Batch 600/3960: loss=2.4088, acc=0.4765
    Batch 700/3960: loss=2.4096, acc=0.4831
    Batch 800/3960: loss=2.4080, acc=0.4918
    Batch 900/3960: loss=2.4042, acc=0.4895
    Batch 1000/3960: loss=2.3945, acc=0.4898
    Batch 1100/3960: loss=2.4018, acc=0.4871
    Batch 1200/3960: loss=2.4073, acc=0.4874
    Batch 1300/3960: loss=2.4029, acc=0.4907
    Batch 1400/3960: loss=2.4033, acc=0.4911
    Batch 1500/3960: loss=2.4067, acc=0.4940
    Batch 1600/3960: loss=2.4035, acc=0.4959
    Batch 1700/3960: loss=2.4052, acc=0.4963
    Batch 1800/3960: loss=2.4048, acc=0.4984
    Batch 1900/3960: loss=2.4016, acc=0.4974
    Batch 2000/3960: loss=2.4068, acc=0.4974
    Batch 2100/3960: loss=2.4054, acc=0.4979
    Batch 2200/3960: loss=2.4101, acc=0.4975
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8304


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1618
>>> Validation completed <<<
Epoch time: 34.81 mins

>>> Starting Epoch 82/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 82...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 82/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3177, acc=0.4700
    Batch 200/3960: loss=2.3246, acc=0.5098
    Batch 300/3960: loss=2.3344, acc=0.5090
    Batch 400/3960: loss=2.3294, acc=0.5039
    Batch 500/3960: loss=2.3345, acc=0.5102
    Batch 600/3960: loss=2.3468, acc=0.5014
    Batch 700/3960: loss=2.3512, acc=0.4975
    Batch 800/3960: loss=2.3511, acc=0.4987
    Batch 900/3960: loss=2.3515, acc=0.5024
    Batch 1000/3960: loss=2.3470, acc=0.5044
    Batch 1100/3960: loss=2.3611, acc=0.5027
    Batch 1200/3960: loss=2.3666, acc=0.5051
    Batch 1300/3960: loss=2.3719, acc=0.5049
    Batch 1400/3960: loss=2.3732, acc=0.5088
    Batch 1500/3960: loss=2.3809, acc=0.5068
    Batch 1600/3960: loss=2.3871, acc=0.5057
    Batch 1700/3960: loss=2.3956, acc=0.5018
    Batch 1800/3960: loss=2.4018, acc=0.5014
    Batch 1900/3960: loss=2.4017, acc=0.5010
    Batch 2000/3960: loss=2.4035, acc=0.5000
    Batch 2100/3960: loss=2.4016, acc=0.4995
    Batch 2200/3960: loss=2.4024, acc=0.4978
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8308


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1542
>>> Validation completed <<<
Epoch time: 39.14 mins

>>> Starting Epoch 83/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 83...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 83/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4191, acc=0.5019
    Batch 200/3960: loss=2.3896, acc=0.5034
    Batch 300/3960: loss=2.3761, acc=0.5086
    Batch 400/3960: loss=2.3886, acc=0.5027
    Batch 500/3960: loss=2.3848, acc=0.5011
    Batch 600/3960: loss=2.3880, acc=0.4975
    Batch 700/3960: loss=2.3801, acc=0.5058
    Batch 800/3960: loss=2.3731, acc=0.5085
    Batch 900/3960: loss=2.3808, acc=0.5073
    Batch 1000/3960: loss=2.3792, acc=0.5037
    Batch 1100/3960: loss=2.3774, acc=0.5030
    Batch 1200/3960: loss=2.3866, acc=0.5021
    Batch 1300/3960: loss=2.3813, acc=0.5055
    Batch 1400/3960: loss=2.3851, acc=0.5031
    Batch 1500/3960: loss=2.3841, acc=0.5044
    Batch 1600/3960: loss=2.3795, acc=0.5041
    Batch 1700/3960: loss=2.3781, acc=0.5046
    Batch 1800/3960: loss=2.3798, acc=0.5052
    Batch 1900/3960: loss=2.3833, acc=0.5051
    Batch 2000/3960: loss=2.3836, acc=0.5034
    Batch 2100/3960: loss=2.3869, acc=0.5022
    Batch 2200/3960: loss=2.3889, acc=0.4998
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8356


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1468
>>> Validation completed <<<
Epoch time: 22.65 mins

>>> Starting Epoch 84/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 84...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 84/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.4487, acc=0.4691
    Batch 200/3960: loss=2.4198, acc=0.4810
    Batch 300/3960: loss=2.4047, acc=0.4863
    Batch 400/3960: loss=2.4070, acc=0.4828
    Batch 500/3960: loss=2.4299, acc=0.4767
    Batch 600/3960: loss=2.4334, acc=0.4790
    Batch 700/3960: loss=2.4192, acc=0.4807
    Batch 800/3960: loss=2.4166, acc=0.4841
    Batch 900/3960: loss=2.4202, acc=0.4890
    Batch 1000/3960: loss=2.4133, acc=0.4868
    Batch 1100/3960: loss=2.4071, acc=0.4888
    Batch 1200/3960: loss=2.4096, acc=0.4881
    Batch 1300/3960: loss=2.4060, acc=0.4870
    Batch 1400/3960: loss=2.3992, acc=0.4893
    Batch 1500/3960: loss=2.3957, acc=0.4912
    Batch 1600/3960: loss=2.3925, acc=0.4937
    Batch 1700/3960: loss=2.3942, acc=0.4956
    Batch 1800/3960: loss=2.3949, acc=0.4954
    Batch 1900/3960: loss=2.3973, acc=0.4954
    Batch 2000/3960: loss=2.3944, acc=0.4965
    Batch 2100/3960: loss=2.3933, acc=0.4957
    Batch 2200/3960: loss=2.3876, acc=0.4959
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8356


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1386
>>> Validation completed <<<
Epoch time: 36.01 mins

>>> Starting Epoch 85/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 85...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 85/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3732, acc=0.5139
    Batch 200/3960: loss=2.3866, acc=0.5036
    Batch 300/3960: loss=2.3898, acc=0.4877
    Batch 400/3960: loss=2.4072, acc=0.4868
    Batch 500/3960: loss=2.4107, acc=0.4890
    Batch 600/3960: loss=2.4058, acc=0.4936
    Batch 700/3960: loss=2.3985, acc=0.4963
    Batch 800/3960: loss=2.3907, acc=0.5017
    Batch 900/3960: loss=2.3863, acc=0.5036
    Batch 1000/3960: loss=2.3793, acc=0.5046
    Batch 1100/3960: loss=2.3790, acc=0.5007
    Batch 1200/3960: loss=2.3825, acc=0.4966
    Batch 1300/3960: loss=2.3752, acc=0.5008
    Batch 1400/3960: loss=2.3751, acc=0.5018
    Batch 1500/3960: loss=2.3818, acc=0.4991
    Batch 1600/3960: loss=2.3719, acc=0.5029
    Batch 1700/3960: loss=2.3758, acc=0.5034
    Batch 1800/3960: loss=2.3741, acc=0.5035
    Batch 1900/3960: loss=2.3718, acc=0.5048
    Batch 2000/3960: loss=2.3740, acc=0.5034
    Batch 2100/3960: loss=2.3753, acc=0.5030
    Batch 2200/3960: loss=2.3776, acc=0.5010
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8356


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1338
>>> Validation completed <<<
Epoch time: 36.96 mins
Saved checkpoint

>>> Starting Epoch 86/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 86...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 86/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3163, acc=0.5483
    Batch 200/3960: loss=2.4494, acc=0.4998
    Batch 300/3960: loss=2.4588, acc=0.5035
    Batch 400/3960: loss=2.4375, acc=0.5044
    Batch 500/3960: loss=2.4164, acc=0.5082
    Batch 600/3960: loss=2.3943, acc=0.5067
    Batch 700/3960: loss=2.3790, acc=0.5070
    Batch 800/3960: loss=2.3770, acc=0.5100
    Batch 900/3960: loss=2.3723, acc=0.5110
    Batch 1000/3960: loss=2.3743, acc=0.5123
    Batch 1100/3960: loss=2.3709, acc=0.5088
    Batch 1200/3960: loss=2.3705, acc=0.5125
    Batch 1300/3960: loss=2.3685, acc=0.5144
    Batch 1400/3960: loss=2.3617, acc=0.5145
    Batch 1500/3960: loss=2.3631, acc=0.5135
    Batch 1600/3960: loss=2.3660, acc=0.5129
    Batch 1700/3960: loss=2.3635, acc=0.5136
    Batch 1800/3960: loss=2.3662, acc=0.5138
    Batch 1900/3960: loss=2.3640, acc=0.5135
    Batch 2000/3960: loss=2.3658, acc=0.5134
    Batch 2100/3960: loss=2.3644, acc=0.5135
    Batch 2200/3960: loss=2.3675, acc=0.5131
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8324


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1284
>>> Validation completed <<<
Epoch time: 39.37 mins

>>> Starting Epoch 87/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 87...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 87/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3873, acc=0.5291
    Batch 200/3960: loss=2.3211, acc=0.5494
    Batch 300/3960: loss=2.3418, acc=0.5411
    Batch 400/3960: loss=2.3481, acc=0.5333
    Batch 500/3960: loss=2.3402, acc=0.5221
    Batch 600/3960: loss=2.3403, acc=0.5161
    Batch 700/3960: loss=2.3344, acc=0.5151
    Batch 800/3960: loss=2.3411, acc=0.5064
    Batch 900/3960: loss=2.3426, acc=0.5092
    Batch 1000/3960: loss=2.3549, acc=0.5118
    Batch 1100/3960: loss=2.3445, acc=0.5089
    Batch 1200/3960: loss=2.3466, acc=0.5085
    Batch 1300/3960: loss=2.3498, acc=0.5104
    Batch 1400/3960: loss=2.3469, acc=0.5097
    Batch 1500/3960: loss=2.3458, acc=0.5100
    Batch 1600/3960: loss=2.3467, acc=0.5094
    Batch 1700/3960: loss=2.3475, acc=0.5085
    Batch 1800/3960: loss=2.3552, acc=0.5065
    Batch 1900/3960: loss=2.3570, acc=0.5049
    Batch 2000/3960: loss=2.3656, acc=0.5048
    Batch 2100/3960: loss=2.3622, acc=0.5060
    Batch 2200/3960: loss=2.3621, acc=0.5060
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8410


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1254
>>> Validation completed <<<
Epoch time: 24.58 mins

>>> Starting Epoch 88/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 88...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 88/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3333, acc=0.5179
    Batch 200/3960: loss=2.3630, acc=0.5118
    Batch 300/3960: loss=2.3567, acc=0.5054
    Batch 400/3960: loss=2.3523, acc=0.5082
    Batch 500/3960: loss=2.3445, acc=0.5152
    Batch 600/3960: loss=2.3455, acc=0.5077
    Batch 700/3960: loss=2.3506, acc=0.5045
    Batch 800/3960: loss=2.3564, acc=0.5057
    Batch 900/3960: loss=2.3465, acc=0.5095
    Batch 1000/3960: loss=2.3498, acc=0.5149
    Batch 1100/3960: loss=2.3437, acc=0.5194
    Batch 1200/3960: loss=2.3421, acc=0.5220
    Batch 1300/3960: loss=2.3399, acc=0.5209
    Batch 1400/3960: loss=2.3414, acc=0.5183
    Batch 1500/3960: loss=2.3450, acc=0.5179
    Batch 1600/3960: loss=2.3415, acc=0.5197
    Batch 1700/3960: loss=2.3475, acc=0.5184
    Batch 1800/3960: loss=2.3497, acc=0.5165
    Batch 1900/3960: loss=2.3506, acc=0.5165
    Batch 2000/3960: loss=2.3511, acc=0.5154
    Batch 2100/3960: loss=2.3510, acc=0.5176
    Batch 2200/3960: loss=2.3549, acc=0.5149
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8358


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1226
>>> Validation completed <<<
Epoch time: 32.36 mins

>>> Starting Epoch 89/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 89...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 89/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2963, acc=0.5353
    Batch 200/3960: loss=2.3534, acc=0.5165
    Batch 300/3960: loss=2.3868, acc=0.5129
    Batch 400/3960: loss=2.3909, acc=0.5164
    Batch 500/3960: loss=2.3992, acc=0.5148
    Batch 600/3960: loss=2.3911, acc=0.5116
    Batch 700/3960: loss=2.3765, acc=0.5149
    Batch 800/3960: loss=2.3610, acc=0.5168
    Batch 900/3960: loss=2.3567, acc=0.5160
    Batch 1000/3960: loss=2.3639, acc=0.5162
    Batch 1100/3960: loss=2.3584, acc=0.5168
    Batch 1200/3960: loss=2.3582, acc=0.5169
    Batch 1300/3960: loss=2.3556, acc=0.5150
    Batch 1400/3960: loss=2.3550, acc=0.5132
    Batch 1500/3960: loss=2.3503, acc=0.5137
    Batch 1600/3960: loss=2.3608, acc=0.5119
    Batch 1700/3960: loss=2.3635, acc=0.5109
    Batch 1800/3960: loss=2.3633, acc=0.5100
    Batch 1900/3960: loss=2.3577, acc=0.5113
    Batch 2000/3960: loss=2.3595, acc=0.5113
    Batch 2100/3960: loss=2.3605, acc=0.5129
    Batch 2200/3960: loss=2.3565, acc=0.5125
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8408


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1192
>>> Validation completed <<<
Epoch time: 41.72 mins

>>> Starting Epoch 90/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 90...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 90/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3728, acc=0.4892
    Batch 200/3960: loss=2.3487, acc=0.4874
    Batch 300/3960: loss=2.3322, acc=0.4984
    Batch 400/3960: loss=2.3224, acc=0.5067
    Batch 500/3960: loss=2.3098, acc=0.5094
    Batch 600/3960: loss=2.3163, acc=0.5128
    Batch 700/3960: loss=2.3068, acc=0.5155
    Batch 800/3960: loss=2.2985, acc=0.5217
    Batch 900/3960: loss=2.3037, acc=0.5208
    Batch 1000/3960: loss=2.3119, acc=0.5177
    Batch 1100/3960: loss=2.3034, acc=0.5204
    Batch 1200/3960: loss=2.3053, acc=0.5223
    Batch 1300/3960: loss=2.3010, acc=0.5229
    Batch 1400/3960: loss=2.3035, acc=0.5201
    Batch 1500/3960: loss=2.3038, acc=0.5189
    Batch 1600/3960: loss=2.3081, acc=0.5186
    Batch 1700/3960: loss=2.3202, acc=0.5133
    Batch 1800/3960: loss=2.3212, acc=0.5126
    Batch 1900/3960: loss=2.3225, acc=0.5119
    Batch 2000/3960: loss=2.3232, acc=0.5134
    Batch 2100/3960: loss=2.3227, acc=0.5143
    Batch 2200/3960: loss=2.3230, acc=0.5126
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8416


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1182
>>> Validation completed <<<
Epoch time: 41.07 mins
Saved checkpoint

>>> Starting Epoch 91/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 91...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 91/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2405, acc=0.5480
    Batch 200/3960: loss=2.3604, acc=0.5160
    Batch 300/3960: loss=2.3351, acc=0.5256
    Batch 400/3960: loss=2.3166, acc=0.5195
    Batch 500/3960: loss=2.3082, acc=0.5157
    Batch 600/3960: loss=2.3094, acc=0.5102
    Batch 700/3960: loss=2.3094, acc=0.5062
    Batch 800/3960: loss=2.3012, acc=0.5066
    Batch 900/3960: loss=2.3074, acc=0.5091
    Batch 1000/3960: loss=2.3109, acc=0.5096
    Batch 1100/3960: loss=2.3166, acc=0.5077
    Batch 1200/3960: loss=2.2997, acc=0.5118
    Batch 1300/3960: loss=2.3018, acc=0.5125
    Batch 1400/3960: loss=2.2989, acc=0.5134
    Batch 1500/3960: loss=2.3050, acc=0.5115
    Batch 1600/3960: loss=2.3036, acc=0.5138
    Batch 1700/3960: loss=2.2987, acc=0.5170
    Batch 1800/3960: loss=2.2963, acc=0.5181
    Batch 1900/3960: loss=2.2988, acc=0.5164
    Batch 2000/3960: loss=2.3000, acc=0.5169
    Batch 2100/3960: loss=2.3053, acc=0.5166
    Batch 2200/3960: loss=2.3077, acc=0.5150
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8486


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1154
>>> Validation completed <<<
Epoch time: 23.75 mins

>>> Starting Epoch 92/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 92...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 92/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2074, acc=0.5733
    Batch 200/3960: loss=2.2763, acc=0.5491
    Batch 300/3960: loss=2.3115, acc=0.5295
    Batch 400/3960: loss=2.3022, acc=0.5297
    Batch 500/3960: loss=2.3308, acc=0.5185
    Batch 600/3960: loss=2.3255, acc=0.5193
    Batch 700/3960: loss=2.3184, acc=0.5207
    Batch 800/3960: loss=2.3169, acc=0.5237
    Batch 900/3960: loss=2.3227, acc=0.5243
    Batch 1000/3960: loss=2.3207, acc=0.5229
    Batch 1100/3960: loss=2.3237, acc=0.5190
    Batch 1200/3960: loss=2.3284, acc=0.5162
    Batch 1300/3960: loss=2.3195, acc=0.5159
    Batch 1400/3960: loss=2.3232, acc=0.5153
    Batch 1500/3960: loss=2.3244, acc=0.5158
    Batch 1600/3960: loss=2.3281, acc=0.5122
    Batch 1700/3960: loss=2.3237, acc=0.5151
    Batch 1800/3960: loss=2.3240, acc=0.5148
    Batch 1900/3960: loss=2.3265, acc=0.5135
    Batch 2000/3960: loss=2.3246, acc=0.5154
    Batch 2100/3960: loss=2.3240, acc=0.5161
    Batch 2200/3960: loss=2.3198, acc=0.5169
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8392


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1138
>>> Validation completed <<<
Epoch time: 23.70 mins

>>> Starting Epoch 93/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 93...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 93/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2920, acc=0.5294
    Batch 200/3960: loss=2.3123, acc=0.5224
    Batch 300/3960: loss=2.3391, acc=0.5196
    Batch 400/3960: loss=2.3507, acc=0.5243
    Batch 500/3960: loss=2.3309, acc=0.5323
    Batch 600/3960: loss=2.3039, acc=0.5427
    Batch 700/3960: loss=2.3122, acc=0.5386
    Batch 800/3960: loss=2.3110, acc=0.5314
    Batch 900/3960: loss=2.3199, acc=0.5307
    Batch 1000/3960: loss=2.3259, acc=0.5255
    Batch 1100/3960: loss=2.3292, acc=0.5252
    Batch 1200/3960: loss=2.3243, acc=0.5244
    Batch 1300/3960: loss=2.3248, acc=0.5241
    Batch 1400/3960: loss=2.3195, acc=0.5276
    Batch 1500/3960: loss=2.3199, acc=0.5267
    Batch 1600/3960: loss=2.3182, acc=0.5255
    Batch 1700/3960: loss=2.3201, acc=0.5231
    Batch 1800/3960: loss=2.3235, acc=0.5237
    Batch 1900/3960: loss=2.3203, acc=0.5233
    Batch 2000/3960: loss=2.3185, acc=0.5216
    Batch 2100/3960: loss=2.3224, acc=0.5216
    Batch 2200/3960: loss=2.3225, acc=0.5222
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8414


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1120
>>> Validation completed <<<
Epoch time: 23.73 mins

>>> Starting Epoch 94/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 94...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 94/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1731, acc=0.5622
    Batch 200/3960: loss=2.2436, acc=0.5359
    Batch 300/3960: loss=2.2613, acc=0.5380
    Batch 400/3960: loss=2.2697, acc=0.5358
    Batch 500/3960: loss=2.2707, acc=0.5322
    Batch 600/3960: loss=2.3051, acc=0.5307
    Batch 700/3960: loss=2.3135, acc=0.5320
    Batch 800/3960: loss=2.3134, acc=0.5323
    Batch 900/3960: loss=2.3117, acc=0.5313
    Batch 1000/3960: loss=2.3041, acc=0.5314
    Batch 1100/3960: loss=2.2917, acc=0.5332
    Batch 1200/3960: loss=2.2993, acc=0.5337
    Batch 1300/3960: loss=2.3091, acc=0.5312
    Batch 1400/3960: loss=2.3122, acc=0.5292
    Batch 1500/3960: loss=2.3130, acc=0.5308
    Batch 1600/3960: loss=2.3162, acc=0.5297
    Batch 1700/3960: loss=2.3151, acc=0.5307
    Batch 1800/3960: loss=2.3144, acc=0.5287
    Batch 1900/3960: loss=2.3086, acc=0.5307
    Batch 2000/3960: loss=2.3132, acc=0.5271
    Batch 2100/3960: loss=2.3128, acc=0.5266
    Batch 2200/3960: loss=2.3199, acc=0.5239
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8490


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1110
>>> Validation completed <<<
Epoch time: 23.67 mins

>>> Starting Epoch 95/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 95...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 95/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1870, acc=0.5919
    Batch 200/3960: loss=2.2138, acc=0.5558
    Batch 300/3960: loss=2.2470, acc=0.5364
    Batch 400/3960: loss=2.2420, acc=0.5394
    Batch 500/3960: loss=2.2380, acc=0.5487
    Batch 600/3960: loss=2.2447, acc=0.5470
    Batch 700/3960: loss=2.2506, acc=0.5518
    Batch 800/3960: loss=2.2409, acc=0.5510
    Batch 900/3960: loss=2.2514, acc=0.5474
    Batch 1000/3960: loss=2.2521, acc=0.5482
    Batch 1100/3960: loss=2.2566, acc=0.5448
    Batch 1200/3960: loss=2.2727, acc=0.5366
    Batch 1300/3960: loss=2.2819, acc=0.5350
    Batch 1400/3960: loss=2.2792, acc=0.5356
    Batch 1500/3960: loss=2.2783, acc=0.5343
    Batch 1600/3960: loss=2.2761, acc=0.5355
    Batch 1700/3960: loss=2.2720, acc=0.5364
    Batch 1800/3960: loss=2.2757, acc=0.5361
    Batch 1900/3960: loss=2.2719, acc=0.5346
    Batch 2000/3960: loss=2.2741, acc=0.5337
    Batch 2100/3960: loss=2.2736, acc=0.5333
    Batch 2200/3960: loss=2.2753, acc=0.5314
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8454


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1106
>>> Validation completed <<<
Epoch time: 23.69 mins
Saved checkpoint

>>> Starting Epoch 96/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 96...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 96/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2067, acc=0.5164
    Batch 200/3960: loss=2.2468, acc=0.5224
    Batch 300/3960: loss=2.2598, acc=0.4939
    Batch 400/3960: loss=2.2433, acc=0.5092
    Batch 500/3960: loss=2.2444, acc=0.5014
    Batch 600/3960: loss=2.2549, acc=0.5112
    Batch 700/3960: loss=2.2774, acc=0.5084
    Batch 800/3960: loss=2.2816, acc=0.5118
    Batch 900/3960: loss=2.2867, acc=0.5135
    Batch 1000/3960: loss=2.3129, acc=0.5097
    Batch 1100/3960: loss=2.3078, acc=0.5095
    Batch 1200/3960: loss=2.3126, acc=0.5111
    Batch 1300/3960: loss=2.3102, acc=0.5102
    Batch 1400/3960: loss=2.3058, acc=0.5113
    Batch 1500/3960: loss=2.3019, acc=0.5126
    Batch 1600/3960: loss=2.2980, acc=0.5112
    Batch 1700/3960: loss=2.2942, acc=0.5152
    Batch 1800/3960: loss=2.2914, acc=0.5180
    Batch 1900/3960: loss=2.2915, acc=0.5176
    Batch 2000/3960: loss=2.2918, acc=0.5185
    Batch 2100/3960: loss=2.2919, acc=0.5178
    Batch 2200/3960: loss=2.2931, acc=0.5174
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8488


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1104
>>> Validation completed <<<
Epoch time: 23.81 mins

>>> Starting Epoch 97/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 97...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 97/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3184, acc=0.5179
    Batch 200/3960: loss=2.2586, acc=0.5353
    Batch 300/3960: loss=2.3131, acc=0.5350
    Batch 400/3960: loss=2.2873, acc=0.5323
    Batch 500/3960: loss=2.3086, acc=0.5316
    Batch 600/3960: loss=2.3183, acc=0.5347
    Batch 700/3960: loss=2.3150, acc=0.5323
    Batch 800/3960: loss=2.3178, acc=0.5317
    Batch 900/3960: loss=2.3236, acc=0.5277
    Batch 1000/3960: loss=2.3147, acc=0.5333
    Batch 1100/3960: loss=2.3135, acc=0.5325
    Batch 1200/3960: loss=2.3073, acc=0.5313
    Batch 1300/3960: loss=2.3119, acc=0.5283
    Batch 1400/3960: loss=2.3045, acc=0.5281
    Batch 1500/3960: loss=2.3041, acc=0.5257
    Batch 1600/3960: loss=2.2977, acc=0.5243
    Batch 1700/3960: loss=2.2946, acc=0.5250
    Batch 1800/3960: loss=2.2958, acc=0.5231
    Batch 1900/3960: loss=2.2966, acc=0.5217
    Batch 2000/3960: loss=2.2920, acc=0.5215
    Batch 2100/3960: loss=2.2931, acc=0.5219
    Batch 2200/3960: loss=2.2877, acc=0.5226
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8486


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1106
>>> Validation completed <<<
Epoch time: 31.97 mins

>>> Starting Epoch 98/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 98...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 98/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2621, acc=0.5269
    Batch 200/3960: loss=2.2660, acc=0.5368
    Batch 300/3960: loss=2.2560, acc=0.5480
    Batch 400/3960: loss=2.2770, acc=0.5341
    Batch 500/3960: loss=2.2615, acc=0.5332
    Batch 600/3960: loss=2.2557, acc=0.5334
    Batch 700/3960: loss=2.2591, acc=0.5370
    Batch 800/3960: loss=2.2639, acc=0.5363
    Batch 900/3960: loss=2.2732, acc=0.5363
    Batch 1000/3960: loss=2.2697, acc=0.5332
    Batch 1100/3960: loss=2.2617, acc=0.5385
    Batch 1200/3960: loss=2.2711, acc=0.5354
    Batch 1300/3960: loss=2.2719, acc=0.5350
    Batch 1400/3960: loss=2.2690, acc=0.5350
    Batch 1500/3960: loss=2.2692, acc=0.5349
    Batch 1600/3960: loss=2.2692, acc=0.5338
    Batch 1700/3960: loss=2.2676, acc=0.5322
    Batch 1800/3960: loss=2.2718, acc=0.5308
    Batch 1900/3960: loss=2.2726, acc=0.5304
    Batch 2000/3960: loss=2.2707, acc=0.5304
    Batch 2100/3960: loss=2.2755, acc=0.5309
    Batch 2200/3960: loss=2.2733, acc=0.5302
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8480


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1116
>>> Validation completed <<<
Epoch time: 36.25 mins

>>> Starting Epoch 99/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 99...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 99/120:   0%|                                                        | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1747, acc=0.5845
    Batch 200/3960: loss=2.1776, acc=0.5726
    Batch 300/3960: loss=2.1979, acc=0.5808
    Batch 400/3960: loss=2.2189, acc=0.5666
    Batch 500/3960: loss=2.2151, acc=0.5587
    Batch 600/3960: loss=2.2476, acc=0.5530
    Batch 700/3960: loss=2.2600, acc=0.5500
    Batch 800/3960: loss=2.2603, acc=0.5516
    Batch 900/3960: loss=2.2671, acc=0.5494
    Batch 1000/3960: loss=2.2686, acc=0.5470
    Batch 1100/3960: loss=2.2722, acc=0.5466
    Batch 1200/3960: loss=2.2812, acc=0.5423
    Batch 1300/3960: loss=2.2769, acc=0.5416
    Batch 1400/3960: loss=2.2792, acc=0.5431
    Batch 1500/3960: loss=2.2832, acc=0.5419
    Batch 1600/3960: loss=2.2790, acc=0.5395
    Batch 1700/3960: loss=2.2878, acc=0.5346
    Batch 1800/3960: loss=2.2824, acc=0.5328
    Batch 1900/3960: loss=2.2818, acc=0.5341
    Batch 2000/3960: loss=2.2887, acc=0.5327
    Batch 2100/3960: loss=2.2841, acc=0.5332
    Batch 2200/3960: loss=2.2872, acc=0.5309
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8498


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1128
>>> Validation completed <<<
Epoch time: 23.64 mins

>>> Starting Epoch 100/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 100...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 100/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2922, acc=0.5671
    Batch 200/3960: loss=2.2871, acc=0.5575
    Batch 300/3960: loss=2.2829, acc=0.5601
    Batch 400/3960: loss=2.2736, acc=0.5567
    Batch 500/3960: loss=2.2704, acc=0.5537
    Batch 600/3960: loss=2.2519, acc=0.5552
    Batch 700/3960: loss=2.2641, acc=0.5491
    Batch 800/3960: loss=2.2636, acc=0.5459
    Batch 900/3960: loss=2.2610, acc=0.5463
    Batch 1000/3960: loss=2.2599, acc=0.5463
    Batch 1100/3960: loss=2.2543, acc=0.5460
    Batch 1200/3960: loss=2.2452, acc=0.5427
    Batch 1300/3960: loss=2.2519, acc=0.5395
    Batch 1400/3960: loss=2.2613, acc=0.5370
    Batch 1500/3960: loss=2.2619, acc=0.5344
    Batch 1600/3960: loss=2.2584, acc=0.5345
    Batch 1700/3960: loss=2.2490, acc=0.5368
    Batch 1800/3960: loss=2.2521, acc=0.5359
    Batch 1900/3960: loss=2.2553, acc=0.5376
    Batch 2000/3960: loss=2.2595, acc=0.5367
    Batch 2100/3960: loss=2.2548, acc=0.5367
    Batch 2200/3960: loss=2.2524, acc=0.5375
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8530


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1142
>>> Validation completed <<<
Epoch time: 29.11 mins
Saved checkpoint

>>> Starting Epoch 101/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 101...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 101/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2207, acc=0.5251
    Batch 200/3960: loss=2.1692, acc=0.5583
    Batch 300/3960: loss=2.2044, acc=0.5558
    Batch 400/3960: loss=2.2125, acc=0.5464
    Batch 500/3960: loss=2.2292, acc=0.5487
    Batch 600/3960: loss=2.2387, acc=0.5427
    Batch 700/3960: loss=2.2593, acc=0.5336
    Batch 800/3960: loss=2.2523, acc=0.5351
    Batch 900/3960: loss=2.2605, acc=0.5369
    Batch 1000/3960: loss=2.2509, acc=0.5377
    Batch 1100/3960: loss=2.2462, acc=0.5391
    Batch 1200/3960: loss=2.2429, acc=0.5370
    Batch 1300/3960: loss=2.2428, acc=0.5333
    Batch 1400/3960: loss=2.2474, acc=0.5310
    Batch 1500/3960: loss=2.2454, acc=0.5285
    Batch 1600/3960: loss=2.2488, acc=0.5257
    Batch 1700/3960: loss=2.2474, acc=0.5276
    Batch 1800/3960: loss=2.2559, acc=0.5238
    Batch 1900/3960: loss=2.2543, acc=0.5237
    Batch 2000/3960: loss=2.2477, acc=0.5252
    Batch 2100/3960: loss=2.2533, acc=0.5257
    Batch 2200/3960: loss=2.2542, acc=0.5253
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8496


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1156
>>> Validation completed <<<
Epoch time: 38.54 mins

>>> Starting Epoch 102/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 102...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 102/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2604, acc=0.5603
    Batch 200/3960: loss=2.2982, acc=0.5283
    Batch 300/3960: loss=2.3004, acc=0.5168
    Batch 400/3960: loss=2.2539, acc=0.5404
    Batch 500/3960: loss=2.2526, acc=0.5394
    Batch 600/3960: loss=2.2548, acc=0.5419
    Batch 700/3960: loss=2.2575, acc=0.5442
    Batch 800/3960: loss=2.2535, acc=0.5420
    Batch 900/3960: loss=2.2563, acc=0.5447
    Batch 1000/3960: loss=2.2595, acc=0.5416
    Batch 1100/3960: loss=2.2546, acc=0.5389
    Batch 1200/3960: loss=2.2579, acc=0.5396
    Batch 1300/3960: loss=2.2548, acc=0.5409
    Batch 1400/3960: loss=2.2523, acc=0.5400
    Batch 1500/3960: loss=2.2465, acc=0.5395
    Batch 1600/3960: loss=2.2541, acc=0.5355
    Batch 1700/3960: loss=2.2527, acc=0.5361
    Batch 1800/3960: loss=2.2465, acc=0.5396
    Batch 1900/3960: loss=2.2468, acc=0.5400
    Batch 2000/3960: loss=2.2457, acc=0.5374
    Batch 2100/3960: loss=2.2439, acc=0.5366
    Batch 2200/3960: loss=2.2437, acc=0.5360
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8526


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1198
>>> Validation completed <<<
Epoch time: 23.47 mins

>>> Starting Epoch 103/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 103...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 103/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3028, acc=0.5371
    Batch 200/3960: loss=2.2839, acc=0.5395
    Batch 300/3960: loss=2.2901, acc=0.5375
    Batch 400/3960: loss=2.2828, acc=0.5444
    Batch 500/3960: loss=2.2796, acc=0.5437
    Batch 600/3960: loss=2.2574, acc=0.5424
    Batch 700/3960: loss=2.2471, acc=0.5465
    Batch 800/3960: loss=2.2513, acc=0.5450
    Batch 900/3960: loss=2.2432, acc=0.5439
    Batch 1000/3960: loss=2.2396, acc=0.5410
    Batch 1100/3960: loss=2.2410, acc=0.5403
    Batch 1200/3960: loss=2.2447, acc=0.5420
    Batch 1300/3960: loss=2.2435, acc=0.5429
    Batch 1400/3960: loss=2.2442, acc=0.5436
    Batch 1500/3960: loss=2.2499, acc=0.5424
    Batch 1600/3960: loss=2.2528, acc=0.5422
    Batch 1700/3960: loss=2.2510, acc=0.5425
    Batch 1800/3960: loss=2.2499, acc=0.5405
    Batch 1900/3960: loss=2.2454, acc=0.5404
    Batch 2000/3960: loss=2.2499, acc=0.5394
    Batch 2100/3960: loss=2.2493, acc=0.5367
    Batch 2200/3960: loss=2.2502, acc=0.5366
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8494


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1238
>>> Validation completed <<<
Epoch time: 23.44 mins

>>> Starting Epoch 104/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 104...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 104/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2485, acc=0.4994
    Batch 200/3960: loss=2.1610, acc=0.5208
    Batch 300/3960: loss=2.1882, acc=0.5395
    Batch 400/3960: loss=2.1897, acc=0.5411
    Batch 500/3960: loss=2.1995, acc=0.5449
    Batch 600/3960: loss=2.1986, acc=0.5487
    Batch 700/3960: loss=2.2050, acc=0.5407
    Batch 800/3960: loss=2.2022, acc=0.5382
    Batch 900/3960: loss=2.1930, acc=0.5424
    Batch 1000/3960: loss=2.1893, acc=0.5452
    Batch 1100/3960: loss=2.1845, acc=0.5439
    Batch 1200/3960: loss=2.1888, acc=0.5424
    Batch 1300/3960: loss=2.1827, acc=0.5465
    Batch 1400/3960: loss=2.1862, acc=0.5470
    Batch 1500/3960: loss=2.1844, acc=0.5472
    Batch 1600/3960: loss=2.1917, acc=0.5444
    Batch 1700/3960: loss=2.2004, acc=0.5417
    Batch 1800/3960: loss=2.2096, acc=0.5401
    Batch 1900/3960: loss=2.2082, acc=0.5409
    Batch 2000/3960: loss=2.2130, acc=0.5401
    Batch 2100/3960: loss=2.2128, acc=0.5408
    Batch 2200/3960: loss=2.2182, acc=0.5396
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8556


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1294
>>> Validation completed <<<
Epoch time: 23.45 mins

>>> Starting Epoch 105/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 105...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 105/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2047, acc=0.5235
    Batch 200/3960: loss=2.2167, acc=0.5225
    Batch 300/3960: loss=2.2016, acc=0.5419
    Batch 400/3960: loss=2.2136, acc=0.5305
    Batch 500/3960: loss=2.2050, acc=0.5334
    Batch 600/3960: loss=2.2114, acc=0.5254
    Batch 700/3960: loss=2.2232, acc=0.5232
    Batch 800/3960: loss=2.2113, acc=0.5252
    Batch 900/3960: loss=2.2106, acc=0.5236
    Batch 1000/3960: loss=2.2189, acc=0.5223
    Batch 1100/3960: loss=2.2224, acc=0.5184
    Batch 1200/3960: loss=2.2237, acc=0.5175
    Batch 1300/3960: loss=2.2227, acc=0.5183
    Batch 1400/3960: loss=2.2132, acc=0.5217
    Batch 1500/3960: loss=2.2149, acc=0.5262
    Batch 1600/3960: loss=2.2125, acc=0.5274
    Batch 1700/3960: loss=2.2054, acc=0.5305
    Batch 1800/3960: loss=2.2071, acc=0.5291
    Batch 1900/3960: loss=2.2038, acc=0.5300
    Batch 2000/3960: loss=2.2017, acc=0.5306
    Batch 2100/3960: loss=2.2025, acc=0.5320
    Batch 2200/3960: loss=2.2074, acc=0.5320
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8528


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1368
>>> Validation completed <<<
Epoch time: 23.46 mins
Saved checkpoint

>>> Starting Epoch 106/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 106...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 106/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1320, acc=0.5114
    Batch 200/3960: loss=2.1969, acc=0.5292
    Batch 300/3960: loss=2.1898, acc=0.5298
    Batch 400/3960: loss=2.1992, acc=0.5338
    Batch 500/3960: loss=2.1841, acc=0.5373
    Batch 600/3960: loss=2.1952, acc=0.5387
    Batch 700/3960: loss=2.2144, acc=0.5399
    Batch 800/3960: loss=2.2059, acc=0.5437
    Batch 900/3960: loss=2.2167, acc=0.5428
    Batch 1000/3960: loss=2.2141, acc=0.5426
    Batch 1100/3960: loss=2.2074, acc=0.5449
    Batch 1200/3960: loss=2.2032, acc=0.5484
    Batch 1300/3960: loss=2.1992, acc=0.5495
    Batch 1400/3960: loss=2.2044, acc=0.5497
    Batch 1500/3960: loss=2.2024, acc=0.5489
    Batch 1600/3960: loss=2.2078, acc=0.5454
    Batch 1700/3960: loss=2.2141, acc=0.5436
    Batch 1800/3960: loss=2.2097, acc=0.5463
    Batch 1900/3960: loss=2.2140, acc=0.5443
    Batch 2000/3960: loss=2.2182, acc=0.5428
    Batch 2100/3960: loss=2.2212, acc=0.5425
    Batch 2200/3960: loss=2.2207, acc=0.5428
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8554


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1438
>>> Validation completed <<<
Epoch time: 23.57 mins

>>> Starting Epoch 107/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 107...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 107/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2750, acc=0.5145
    Batch 200/3960: loss=2.2701, acc=0.5211
    Batch 300/3960: loss=2.2807, acc=0.5277
    Batch 400/3960: loss=2.2813, acc=0.5337
    Batch 500/3960: loss=2.2644, acc=0.5293
    Batch 600/3960: loss=2.2501, acc=0.5313
    Batch 700/3960: loss=2.2461, acc=0.5341
    Batch 800/3960: loss=2.2359, acc=0.5374
    Batch 900/3960: loss=2.2303, acc=0.5308
    Batch 1000/3960: loss=2.2302, acc=0.5376
    Batch 1100/3960: loss=2.2268, acc=0.5407
    Batch 1200/3960: loss=2.2181, acc=0.5424
    Batch 1300/3960: loss=2.2229, acc=0.5405
    Batch 1400/3960: loss=2.2162, acc=0.5415
    Batch 1500/3960: loss=2.2153, acc=0.5421
    Batch 1600/3960: loss=2.2201, acc=0.5416
    Batch 1700/3960: loss=2.2204, acc=0.5426
    Batch 1800/3960: loss=2.2205, acc=0.5439
    Batch 1900/3960: loss=2.2264, acc=0.5425
    Batch 2000/3960: loss=2.2275, acc=0.5435
    Batch 2100/3960: loss=2.2312, acc=0.5436
    Batch 2200/3960: loss=2.2324, acc=0.5430
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8602


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1514
>>> Validation completed <<<
Epoch time: 23.75 mins

>>> Starting Epoch 108/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 108...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 108/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3394, acc=0.5059
    Batch 200/3960: loss=2.3078, acc=0.5300
    Batch 300/3960: loss=2.3244, acc=0.5307
    Batch 400/3960: loss=2.3043, acc=0.5254
    Batch 500/3960: loss=2.2684, acc=0.5334
    Batch 600/3960: loss=2.2556, acc=0.5357
    Batch 700/3960: loss=2.2430, acc=0.5308
    Batch 800/3960: loss=2.2325, acc=0.5344
    Batch 900/3960: loss=2.2380, acc=0.5325
    Batch 1000/3960: loss=2.2409, acc=0.5300
    Batch 1100/3960: loss=2.2399, acc=0.5329
    Batch 1200/3960: loss=2.2337, acc=0.5357
    Batch 1300/3960: loss=2.2311, acc=0.5392
    Batch 1400/3960: loss=2.2260, acc=0.5446
    Batch 1500/3960: loss=2.2262, acc=0.5443
    Batch 1600/3960: loss=2.2278, acc=0.5418
    Batch 1700/3960: loss=2.2305, acc=0.5414
    Batch 1800/3960: loss=2.2278, acc=0.5387
    Batch 1900/3960: loss=2.2233, acc=0.5395
    Batch 2000/3960: loss=2.2229, acc=0.5388
    Batch 2100/3960: loss=2.2207, acc=0.5399
    Batch 2200/3960: loss=2.2194, acc=0.5397
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8558


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1628
>>> Validation completed <<<
Epoch time: 23.60 mins

>>> Starting Epoch 109/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 109...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 109/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2812, acc=0.5257
    Batch 200/3960: loss=2.2365, acc=0.5462
    Batch 300/3960: loss=2.2696, acc=0.5466
    Batch 400/3960: loss=2.2390, acc=0.5439
    Batch 500/3960: loss=2.2299, acc=0.5360
    Batch 600/3960: loss=2.2240, acc=0.5449
    Batch 700/3960: loss=2.2068, acc=0.5455
    Batch 800/3960: loss=2.2061, acc=0.5411
    Batch 900/3960: loss=2.2199, acc=0.5364
    Batch 1000/3960: loss=2.2132, acc=0.5404
    Batch 1100/3960: loss=2.2119, acc=0.5433
    Batch 1200/3960: loss=2.2154, acc=0.5436
    Batch 1300/3960: loss=2.2043, acc=0.5467
    Batch 1400/3960: loss=2.2141, acc=0.5411
    Batch 1500/3960: loss=2.2142, acc=0.5386
    Batch 1600/3960: loss=2.2148, acc=0.5342
    Batch 1700/3960: loss=2.2168, acc=0.5351
    Batch 1800/3960: loss=2.2136, acc=0.5364
    Batch 1900/3960: loss=2.2108, acc=0.5345
    Batch 2000/3960: loss=2.2084, acc=0.5343
    Batch 2100/3960: loss=2.2034, acc=0.5370
    Batch 2200/3960: loss=2.2039, acc=0.5379
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8564


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1724
>>> Validation completed <<<
Epoch time: 23.56 mins

>>> Starting Epoch 110/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 110...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 110/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.3462, acc=0.5721
    Batch 200/3960: loss=2.2672, acc=0.5555
    Batch 300/3960: loss=2.2740, acc=0.5443
    Batch 400/3960: loss=2.2287, acc=0.5510
    Batch 500/3960: loss=2.2095, acc=0.5514
    Batch 600/3960: loss=2.2300, acc=0.5450
    Batch 700/3960: loss=2.2492, acc=0.5378
    Batch 800/3960: loss=2.2448, acc=0.5387
    Batch 900/3960: loss=2.2331, acc=0.5426
    Batch 1000/3960: loss=2.2372, acc=0.5406
    Batch 1100/3960: loss=2.2322, acc=0.5435
    Batch 1200/3960: loss=2.2404, acc=0.5398
    Batch 1300/3960: loss=2.2349, acc=0.5423
    Batch 1400/3960: loss=2.2267, acc=0.5393
    Batch 1500/3960: loss=2.2262, acc=0.5381
    Batch 1600/3960: loss=2.2262, acc=0.5361
    Batch 1700/3960: loss=2.2281, acc=0.5367
    Batch 1800/3960: loss=2.2268, acc=0.5382
    Batch 1900/3960: loss=2.2252, acc=0.5383
    Batch 2000/3960: loss=2.2293, acc=0.5364
    Batch 2100/3960: loss=2.2324, acc=0.5348
    Batch 2200/3960: loss=2.2319, acc=0.5337
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8610


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.1872
>>> Validation completed <<<
Epoch time: 23.56 mins
Saved checkpoint

>>> Starting Epoch 111/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 111...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 111/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1714, acc=0.5381
    Batch 200/3960: loss=2.1900, acc=0.5028
    Batch 300/3960: loss=2.2196, acc=0.5131
    Batch 400/3960: loss=2.2087, acc=0.5187
    Batch 500/3960: loss=2.2074, acc=0.5322
    Batch 600/3960: loss=2.1873, acc=0.5354
    Batch 700/3960: loss=2.1969, acc=0.5367
    Batch 800/3960: loss=2.2030, acc=0.5355
    Batch 900/3960: loss=2.1995, acc=0.5370
    Batch 1000/3960: loss=2.2115, acc=0.5416
    Batch 1100/3960: loss=2.2056, acc=0.5455
    Batch 1200/3960: loss=2.2099, acc=0.5446
    Batch 1300/3960: loss=2.2068, acc=0.5455
    Batch 1400/3960: loss=2.2123, acc=0.5472
    Batch 1500/3960: loss=2.2089, acc=0.5483
    Batch 1600/3960: loss=2.2052, acc=0.5482
    Batch 1700/3960: loss=2.2046, acc=0.5472
    Batch 1800/3960: loss=2.2015, acc=0.5484
    Batch 1900/3960: loss=2.2016, acc=0.5507
    Batch 2000/3960: loss=2.2005, acc=0.5520
    Batch 2100/3960: loss=2.1958, acc=0.5527
    Batch 2200/3960: loss=2.1942, acc=0.5507
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8566


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2008
>>> Validation completed <<<
Epoch time: 22.72 mins

>>> Starting Epoch 112/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 112...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 112/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2204, acc=0.4991
    Batch 200/3960: loss=2.1970, acc=0.5283
    Batch 300/3960: loss=2.2024, acc=0.5209
    Batch 400/3960: loss=2.1848, acc=0.5254
    Batch 500/3960: loss=2.1833, acc=0.5240
    Batch 600/3960: loss=2.2095, acc=0.5258
    Batch 700/3960: loss=2.1993, acc=0.5325
    Batch 800/3960: loss=2.2032, acc=0.5332
    Batch 900/3960: loss=2.2051, acc=0.5357
    Batch 1000/3960: loss=2.2114, acc=0.5370
    Batch 1100/3960: loss=2.2138, acc=0.5414
    Batch 1200/3960: loss=2.2147, acc=0.5395
    Batch 1300/3960: loss=2.2148, acc=0.5413
    Batch 1400/3960: loss=2.2156, acc=0.5409
    Batch 1500/3960: loss=2.2154, acc=0.5383
    Batch 1600/3960: loss=2.2130, acc=0.5385
    Batch 1700/3960: loss=2.2133, acc=0.5379
    Batch 1800/3960: loss=2.2222, acc=0.5369
    Batch 1900/3960: loss=2.2232, acc=0.5373
    Batch 2000/3960: loss=2.2234, acc=0.5360
    Batch 2100/3960: loss=2.2224, acc=0.5372
    Batch 2200/3960: loss=2.2186, acc=0.5389
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8582


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2180
>>> Validation completed <<<
Epoch time: 22.26 mins

>>> Starting Epoch 113/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 113...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 113/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2443, acc=0.5105
    Batch 200/3960: loss=2.2734, acc=0.5146
    Batch 300/3960: loss=2.2610, acc=0.5309
    Batch 400/3960: loss=2.2372, acc=0.5320
    Batch 500/3960: loss=2.2325, acc=0.5379
    Batch 600/3960: loss=2.2137, acc=0.5430
    Batch 700/3960: loss=2.2112, acc=0.5443
    Batch 800/3960: loss=2.1925, acc=0.5504
    Batch 900/3960: loss=2.1998, acc=0.5417
    Batch 1000/3960: loss=2.2084, acc=0.5412
    Batch 1100/3960: loss=2.2082, acc=0.5415
    Batch 1200/3960: loss=2.2067, acc=0.5435
    Batch 1300/3960: loss=2.1986, acc=0.5474
    Batch 1400/3960: loss=2.2023, acc=0.5479
    Batch 1500/3960: loss=2.2018, acc=0.5481
    Batch 1600/3960: loss=2.2033, acc=0.5462
    Batch 1700/3960: loss=2.2007, acc=0.5479
    Batch 1800/3960: loss=2.1983, acc=0.5474
    Batch 1900/3960: loss=2.2012, acc=0.5450
    Batch 2000/3960: loss=2.2017, acc=0.5408
    Batch 2100/3960: loss=2.1973, acc=0.5425
    Batch 2200/3960: loss=2.1981, acc=0.5401
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8592


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2366
>>> Validation completed <<<
Epoch time: 22.70 mins

>>> Starting Epoch 114/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 114...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 114/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.2322, acc=0.5486
    Batch 200/3960: loss=2.2251, acc=0.5700
    Batch 300/3960: loss=2.2305, acc=0.5542
    Batch 400/3960: loss=2.2259, acc=0.5560
    Batch 500/3960: loss=2.2250, acc=0.5529
    Batch 600/3960: loss=2.2394, acc=0.5448
    Batch 700/3960: loss=2.2412, acc=0.5421
    Batch 800/3960: loss=2.2383, acc=0.5384
    Batch 900/3960: loss=2.2424, acc=0.5361
    Batch 1000/3960: loss=2.2436, acc=0.5354
    Batch 1100/3960: loss=2.2443, acc=0.5340
    Batch 1200/3960: loss=2.2413, acc=0.5361
    Batch 1300/3960: loss=2.2320, acc=0.5377
    Batch 1400/3960: loss=2.2262, acc=0.5402
    Batch 1500/3960: loss=2.2189, acc=0.5445
    Batch 1600/3960: loss=2.2146, acc=0.5458
    Batch 1700/3960: loss=2.2155, acc=0.5467
    Batch 1800/3960: loss=2.2172, acc=0.5453
    Batch 1900/3960: loss=2.2122, acc=0.5471
    Batch 2000/3960: loss=2.2110, acc=0.5456
    Batch 2100/3960: loss=2.2108, acc=0.5451
    Batch 2200/3960: loss=2.2089, acc=0.5476
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8612


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2602
>>> Validation completed <<<
Epoch time: 23.81 mins

>>> Starting Epoch 115/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 115...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 115/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1461, acc=0.5545
    Batch 200/3960: loss=2.1756, acc=0.5389
    Batch 300/3960: loss=2.2212, acc=0.5417
    Batch 400/3960: loss=2.1932, acc=0.5513
    Batch 500/3960: loss=2.1696, acc=0.5529
    Batch 600/3960: loss=2.1935, acc=0.5445
    Batch 700/3960: loss=2.1967, acc=0.5466
    Batch 800/3960: loss=2.2004, acc=0.5476
    Batch 900/3960: loss=2.1931, acc=0.5476
    Batch 1000/3960: loss=2.2064, acc=0.5478
    Batch 1100/3960: loss=2.2104, acc=0.5448
    Batch 1200/3960: loss=2.2147, acc=0.5452
    Batch 1300/3960: loss=2.2095, acc=0.5469
    Batch 1400/3960: loss=2.2039, acc=0.5463
    Batch 1500/3960: loss=2.2017, acc=0.5466
    Batch 1600/3960: loss=2.2019, acc=0.5470
    Batch 1700/3960: loss=2.2005, acc=0.5475
    Batch 1800/3960: loss=2.2063, acc=0.5486
    Batch 1900/3960: loss=2.2009, acc=0.5492
    Batch 2000/3960: loss=2.2014, acc=0.5494
    Batch 2100/3960: loss=2.2018, acc=0.5476
    Batch 2200/3960: loss=2.2049, acc=0.5472
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8580


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.2812
>>> Validation completed <<<
Epoch time: 330.09 mins
Saved checkpoint

>>> Starting Epoch 116/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 116...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 116/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]

    Batch 100/3960: loss=2.1790, acc=0.5866
    Batch 200/3960: loss=2.2059, acc=0.5599
    Batch 300/3960: loss=2.2307, acc=0.5477
    Batch 400/3960: loss=2.2252, acc=0.5531
    Batch 500/3960: loss=2.2522, acc=0.5423
    Batch 600/3960: loss=2.2280, acc=0.5374
    Batch 700/3960: loss=2.2287, acc=0.5431
    Batch 800/3960: loss=2.2112, acc=0.5478
    Batch 900/3960: loss=2.2202, acc=0.5417
    Batch 1000/3960: loss=2.2208, acc=0.5410
    Batch 1100/3960: loss=2.2170, acc=0.5396
    Batch 1200/3960: loss=2.2224, acc=0.5364
    Batch 1300/3960: loss=2.2279, acc=0.5356
    Batch 1400/3960: loss=2.2374, acc=0.5331
    Batch 1500/3960: loss=2.2331, acc=0.5344
    Batch 1600/3960: loss=2.2354, acc=0.5349
    Batch 1700/3960: loss=2.2346, acc=0.5340
    Batch 1800/3960: loss=2.2344, acc=0.5347
    Batch 1900/3960: loss=2.2286, acc=0.5368
    Batch 2000/3960: loss=2.2276, acc=0.5351
    Batch 2100/3960: loss=2.2274, acc=0.5337
    Batch 2200/3960: loss=2.2281, acc=0.5329
    Batch 2300/3960

Validation (raw):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (raw): 0.8582


Validation (ema):   0%|          | 0/157 [00:00<?, ?it/s]

Validation Acc (ema): 0.3038
>>> Validation completed <<<
Epoch time: 51.98 mins

>>> Starting Epoch 117/120 <<<
  [DEBUG] Entered train_one_epoch function
  [DEBUG] Set model to train mode
  [DEBUG] Imported tqdm
  Training epoch 117...
  Loading first batch (this may take 10-30 seconds)...
  [DEBUG] About to create iterator from train_loader...
  [DEBUG] Calling iter(train_loader)...
  [DEBUG] Iterator created successfully!
  [DEBUG] Calling next(train_iter)...
  ✓ First batch loaded! Shape: torch.Size([32, 3, 224, 224]), Labels: torch.Size([32])
  Batch size actual: 32, Expected: 32


Epoch 117/120:   0%|                                                       | 0/3960 [00:00<?, ?it/s]