In [2]:
import sys 
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
from collections import deque
import random
import time
import wandb
import logging
from tqdm.notebook import tqdm # Use notebook version for Kaggle UI
import argparse # Keep argparse for structure, but values will be fixed/from wandb
from wandb.sdk.wandb_settings import Settings # For timeout setting

In [3]:
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("WANDB_API_KEY")
    wandb.login(key=wandb_api_key)
    print("W&B login successful using Kaggle Secret.")
except ImportError:
    print("kaggle_secrets not found. Ensure it's enabled or use interactive/env var login.")
    wandb.login() # Will use env var WANDB_API_KEY if set, otherwise prompt
except Exception as e:
     print(f"W&B login using Kaggle Secret failed: {e}. Trying other methods.")
     wandb.login() 

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mteja_sai[0m ([33mteja_sai-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


W&B login successful using Kaggle Secret.


In [4]:
SWEEP_ID = "teja_sai-indian-institute-of-technology-madras/CNN_FROM_SCRATCH_SWEEP/6hw3q9fq"

In [5]:
WANDB_PROJECT_NAME = "CNN_FROM_SCRATCH_SWEEP"
WANDB_ENTITY = "teja_sai-indian-institute-of-technology-madras"

In [6]:
DATA_DIR = "/kaggle/input/inaturalist-12k/inaturalist_12K" # Adjust if your dataset path is different
SEED = 42
IMG_SIZE = 224
NUM_WORKERS = 2 # Kaggle typically has 2 CPU cores available
VAL_SPLIT = 0.2
AGENT_RUN_COUNT = 20

In [7]:
# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stdout)

In [8]:
def set_seed(seed=42):
    """Sets seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    logging.debug(f"Seed set to {seed}")

In [9]:
def str2bool(v):
    """ Parses boolean command-line style arguments. """
    if isinstance(v, bool): return v
    if isinstance(v, str):
        if v.lower() in ('yes', 'true', 't', 'y', '1'): return True
        elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False
    raise argparse.ArgumentTypeError(f'Boolean value expected, got {v} of type {type(v)}')

In [10]:
class INaturalistDataset(Dataset):
    """ Loads iNaturalist subset images. """
    def __init__(self, root_dir, transform=None):
        if not os.path.isdir(root_dir): raise FileNotFoundError(f"Root directory not found: {root_dir}")
        self.root_dir = root_dir
        self.transform = transform
        try:
            self.classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
            if not self.classes: raise ValueError(f"No class subdirectories found in {root_dir}")
        except OSError as e: raise OSError(f"Error reading directory {root_dir}: {e}")
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.idx_to_class = {i: cls for cls, i in self.class_to_idx.items()}
        self.images, self.labels = [], []
        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            try:
                for img_name in os.listdir(class_dir):
                    img_path = os.path.join(class_dir, img_name)
                    if os.path.isfile(img_path) and img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): # Broader check
                        self.images.append(img_path); self.labels.append(self.class_to_idx[class_name])
            except OSError as e: logging.warning(f"Could not read directory {class_dir}: {e}")
        if not self.images: raise RuntimeError(f"No valid image files found under {root_dir}")
        logging.info(f"Found {len(self.classes)} classes, loaded {len(self.images)} images from {os.path.basename(root_dir)}.")

    def __len__(self): return len(self.images)
    def __getitem__(self, idx):
        img_path = self.images[idx]
        try:
            with Image.open(img_path) as img: image = img.convert("RGB")
        except Exception as e:
            logging.error(f"Error processing image {img_path}: {e}")
            # Return placeholder tensors on error to avoid crashing DataLoader worker
            return torch.zeros((3, IMG_SIZE, IMG_SIZE), dtype=torch.float32), -1 # Invalid label
        label = self.labels[idx]
        if self.transform: image = self.transform(image)
        return image, label

In [11]:
def get_data_loaders(data_dir, batch_size=32, val_split=0.2, augment=True, num_workers=2, img_size=224, seed=42):
    """ Creates DataLoaders without sklearn. """
    logging.info(f"Setting up data loaders: batch_size={batch_size}, augment={augment}, val_split={val_split}")
    random.seed(seed); np.random.seed(seed) # Seed for splitting consistency
    normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    target_size = (img_size, img_size)
    if augment:
        train_transform = transforms.Compose([transforms.Resize(target_size), transforms.RandomHorizontalFlip(),
                                             transforms.RandomRotation(15), transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0.1),
                                             transforms.ToTensor(), normalize])
    else:
        train_transform = transforms.Compose([transforms.Resize(target_size), transforms.ToTensor(), normalize])
    val_test_transform = transforms.Compose([transforms.Resize(target_size), transforms.ToTensor(), normalize])

    train_dir, test_dir = os.path.join(data_dir, 'train'), os.path.join(data_dir, 'val')
    if not os.path.isdir(train_dir): raise FileNotFoundError(f"Training directory not found: {train_dir}")
    if not os.path.isdir(test_dir): raise FileNotFoundError(f"Test directory not found: {test_dir}")

    try:
        full_dataset = INaturalistDataset(root_dir=train_dir, transform=train_transform)
        test_dataset = INaturalistDataset(root_dir=test_dir, transform=val_test_transform)
    except Exception as e:
        logging.error(f"Error loading dataset from {data_dir}: {e}", exc_info=True)
        raise e

    targets = np.array(full_dataset.labels); dataset_size = len(targets); train_indices = []; val_indices = []; val_loader = None
    num_classes = len(full_dataset.classes)

    if 0 < val_split < 1 and dataset_size >= 2 and num_classes > 0:
        indices_by_class = {lbl: [] for lbl in range(num_classes)}
        for idx, label in enumerate(targets):
             if label >= 0: indices_by_class[label].append(idx) # Ignore potential -1 from errors

        for label, indices in indices_by_class.items():
            n_cls = len(indices);
            if n_cls == 0: continue
            np.random.shuffle(indices); n_val = int(np.floor(val_split * n_cls))
            if n_cls > 1 and n_val == n_cls: n_val = n_cls - 1
            elif n_cls <= 1: n_val = 0
            val_indices.extend(indices[:n_val]); train_indices.extend(indices[n_val:])

        if not train_indices or not val_indices:
             logging.warning("Stratified split resulted in empty train or validation set. Using random split.")
             # Fallback to simple random split if stratification fails badly
             all_indices = list(range(dataset_size))
             random.shuffle(all_indices)
             split_point = int(dataset_size * (1 - val_split))
             train_indices = all_indices[:split_point]
             val_indices = all_indices[split_point:]

        logging.info(f"Split: {len(train_indices)} train, {len(val_indices)} validation samples.")
        random.shuffle(train_indices) # Shuffle combined training indices

        # Create validation subset with correct transform
        original_transform = full_dataset.transform; full_dataset.transform = val_test_transform
        val_subset = Subset(full_dataset, val_indices); full_dataset.transform = original_transform
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=torch.cuda.is_available())
    else:
        logging.warning("Validation split skipped (val_split not in (0,1) or dataset too small).")
        train_indices = list(range(dataset_size))

    train_subset = Subset(full_dataset, train_indices)
    # Use persistent_workers=True if num_workers > 0 and PyTorch version supports it
    use_persistent_workers = num_workers > 0 and hasattr(DataLoader, 'persistent_workers')
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=num_workers,
                              pin_memory=torch.cuda.is_available(), drop_last=True,
                              persistent_workers=use_persistent_workers)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,
                             pin_memory=torch.cuda.is_available(),
                             persistent_workers=use_persistent_workers)

    logging.info("Data loaders created.")
    return train_loader, val_loader, test_loader, full_dataset.classes

In [12]:
_ACTIVATIONS = {'relu': nn.ReLU, 'gelu': nn.GELU, 'silu': nn.SiLU, 'mish': nn.Mish}
class CustomCNN(nn.Module):
    """ Customizable CNN for Part A. """
    def __init__(self, input_channels=3, num_classes=10, filter_sizes=None, num_filters=None, filter_organization="same", activation_name='relu', dense_neurons=128, dropout_rate=0.3, batch_norm=True, img_size=224):
        super(CustomCNN, self).__init__()
        if filter_sizes is None: filter_sizes = [3] * 5
        elif len(filter_sizes) != 5: raise ValueError("filter_sizes must be a list of length 5")
        if num_filters is None: num_filters_list = [32] * 5
        elif isinstance(num_filters, int):
             base_filters = num_filters
             if filter_organization == "double": num_filters_list = [max(1, base_filters * (2**i)) for i in range(5)] # Ensure min 1 filter
             elif filter_organization == "half": num_filters_list = [max(1, base_filters // (2**i)) for i in range(5)] # Ensure min 1 filter
             else: num_filters_list = [max(1, base_filters)] * 5 # Ensure min 1 filter
        elif isinstance(num_filters, list) and len(num_filters) == 5: num_filters_list = [max(1, f) for f in num_filters] # Ensure min 1 filter
        else: raise ValueError("num_filters must be an int or a list of length 5")

        if activation_name not in _ACTIVATIONS: raise ValueError(f"Unsupported activation: {activation_name}")
        activation = _ACTIVATIONS[activation_name]

        self.layers = nn.ModuleList(); in_channels = input_channels; current_dim = img_size
        logging.debug(f"Building CNN: filters={num_filters_list}, kernel_sizes={filter_sizes}")
        for i in range(5):
            k_size = filter_sizes[i]; out_channels = num_filters_list[i]; padding = k_size // 2
            conv = nn.Conv2d(in_channels, out_channels, kernel_size=k_size, padding=padding, bias=not batch_norm)
            self.layers.append(conv)
            if batch_norm: self.layers.append(nn.BatchNorm2d(out_channels))
            self.layers.append(activation())
            self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2)); current_dim //= 2
            if dropout_rate > 0: self.layers.append(nn.Dropout(dropout_rate))
            in_channels = out_channels
            logging.debug(f"Layer {i+1}: Conv({conv.in_channels}->{conv.out_channels}, k={k_size}), Dim after pool: {current_dim}")

        self.flattened_size = num_filters_list[-1] * current_dim * current_dim
        if self.flattened_size <= 0: raise ValueError(f"Flattened size is non-positive ({self.flattened_size}) for img_size {img_size}.")
        logging.debug(f"Flattened size: {self.flattened_size}")

        self.fc1 = nn.Linear(self.flattened_size, dense_neurons)
        self.dropout_fc = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(dense_neurons, num_classes)
        self.config = {'num_filters': num_filters_list, 'filter_sizes': filter_sizes, 'activation_name': activation_name, 'dense_neurons': dense_neurons, 'dropout_rate': dropout_rate, 'batch_norm': batch_norm, 'img_size': img_size}
        logging.debug(f"Final layers: FC({self.flattened_size}->{dense_neurons}), FC({dense_neurons}->{num_classes})")

    def forward(self, x):
        for layer in self.layers: x = layer(x)
        x = x.view(x.size(0), -1); x = F.relu(self.fc1(x)); x = self.dropout_fc(x); x = self.fc2(x)
        return x
    def count_parameters(self): return sum(p.numel() for p in self.parameters() if p.requires_grad)
    def calculate_computations(self): # Simplified FLOPs estimate
        total_macs = 0; h = w = self.config['img_size']; cin = 3
        nf = self.config['num_filters']; fs = self.config['filter_sizes']
        current_h, current_w = h, w
        for i in range(5):
            k, cout = fs[i], nf[i];
            macs_conv = (k * k * cin * cout * current_h * current_w)
            total_macs += macs_conv
            if self.config['batch_norm']: total_macs += 2 * cout * current_h * current_w
            total_macs += cout * current_h * current_w # Activation approx
            current_h //= 2; current_w //= 2
            cin = cout
        flat_size = nf[-1] * current_h * current_w
        dense_neurons = self.config['dense_neurons']; num_classes = self.fc2.out_features
        total_macs += flat_size * dense_neurons
        total_macs += dense_neurons # ReLU approx
        total_macs += dense_neurons * num_classes
        return 2 * total_macs


In [13]:
def train_sweep_trial():
    """ Trains one trial based on wandb.config, skipping undesired configs. """
    run = None
    config = None
    try:
        run = wandb.init(settings=wandb.Settings(init_timeout=300))
        config = wandb.config # Access hyperparameters for this run

        # --- Set Run Name (using more details) ---
        try:
            run_name = (f"f{config.num_filters}_k{config.filter_size}_d{config.dense_neurons}_bs{config.batch_size}"
                        f"_{config.activation[:3]}_{config.filter_organization[:3]}"
                        f"_lr{config.learning_rate:.1E}_do{config.dropout_rate:.1f}"
                        f"_bn{str(config.batch_norm)[0]}_aug{str(config.data_augmentation)[0]}")
            run_name = run_name.replace('.','p').replace('-','').replace('E','e') # Make W&B name cleaner
            wandb.run.name = run_name[:128] # Limit name length
        except Exception as name_e:
             logging.warning(f"Could not set run name: {name_e}")

        logging.info(f"--- Starting Trial: {wandb.run.name} (ID: {run.id}) ---")
        logging.info(f"Config: {dict(config)}") # Log the config dictionary

        # --- Setup ---
        set_seed(SEED)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if torch.cuda.is_available(): torch.cuda.empty_cache(); logging.info("Cleared CUDA cache.")
        logging.info(f"Using device: {device}")

        # --- Data ---
        train_loader, val_loader, _, classes = get_data_loaders(
            data_dir=DATA_DIR, batch_size=config.batch_size, augment=config.data_augmentation,
            num_workers=NUM_WORKERS, img_size=IMG_SIZE, val_split=VAL_SPLIT, seed=SEED
        )
        num_classes = len(classes)
        if val_loader is None: logging.warning("Validation loader is None. Skipping validation.")


        # --- Model, Loss, Optimizer, Scheduler ---
        model = CustomCNN(
            num_classes=num_classes, num_filters=config.num_filters,
            filter_organization=config.filter_organization, filter_sizes=[config.filter_size] * 5,
            activation_name=config.activation, dense_neurons=config.dense_neurons,
            dropout_rate=config.dropout_rate, batch_norm=config.batch_norm, img_size=IMG_SIZE
        ).to(device)

        wandb.watch(model, log="all", log_freq=100)
        total_params = model.count_parameters()
        total_flops = model.calculate_computations()
        # Log initial parameters and FLOPs - use commit=False to batch with first epoch log
        wandb.log({"total_parameters": total_params, "total_flops_estimate": total_flops}, commit=False)
        logging.info(f"Model Params: {total_params:,}, Est. FLOPs: {total_flops:,.0f}")

        criterion = nn.CrossEntropyLoss()
        # Safely access weight_decay, defaulting to 0 if not in config
        weight_decay = getattr(config, 'weight_decay', 0)
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.2, verbose=False)

        # --- Training Loop ---
        best_val_acc = 0.0; best_epoch = 0
        logging.info(f"Starting training for {config.epochs} epochs...")

        for epoch in range(5):
            epoch_start_time = time.time()
            model.train()
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if torch.cuda.is_available(): torch.cuda.empty_cache(); 
            running_loss = 0.0; correct_train = 0; total_train = 0
            train_pbar = tqdm(train_loader, desc=f"Ep {epoch+1}/{config.epochs} Tr", leave=False, file=sys.stdout) # Use sys.stdout
            for i, (inputs, labels) in enumerate(train_pbar):
                # Skip batch if label is invalid (e.g., from image loading error)
                if torch.any(labels < 0):
                    logging.warning(f"Skipping batch {i} due to invalid labels.")
                    continue

                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad(set_to_none=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = outputs.max(1)
                total_train += labels.size(0)
                correct_train += predicted.eq(labels).sum().item()
                if i % 20 == 0: train_pbar.set_postfix({'L': f'{running_loss / (i+1):.3f}', 'Acc': f'{100. * correct_train / total_train:.1f}%'})
            train_pbar.close()
            train_loss = running_loss / len(train_loader) if len(train_loader) > 0 else 0
            train_accuracy = 100. * correct_train / total_train if total_train > 0 else 0

            # --- Validation ---
            val_loss = float('nan'); val_accuracy = float('nan')
            if val_loader:
                model.eval(); val_correct = 0; val_total = 0; running_val_loss = 0.0
                val_pbar = tqdm(val_loader, desc=f"Ep {epoch+1} Val", leave=False, file=sys.stdout) # Use sys.stdout
                with torch.no_grad():
                    for i_val, (inputs, labels) in enumerate(val_pbar):
                        if torch.any(labels < 0): continue # Skip invalid validation samples
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs); loss = criterion(outputs, labels)
                        running_val_loss += loss.item(); _, predicted = outputs.max(1)
                        val_total += labels.size(0); val_correct += predicted.eq(labels).sum().item()
                        if i_val % 10 == 0: val_pbar.set_postfix({'L': f'{running_val_loss / (i_val+1):.3f}', 'Acc': f'{100. * val_correct / val_total:.1f}%'})
                val_pbar.close()
                if len(val_loader) > 0 and val_total > 0:
                    val_loss = running_val_loss / len(val_loader)
                    val_accuracy = 100. * val_correct / val_total
                    scheduler.step(val_loss) # Step scheduler based on validation loss

            # --- Logging ---
            epoch_duration = time.time() - epoch_start_time
            log_dict = {'epoch': epoch + 1, 'train_loss': train_loss, 'train_accuracy': train_accuracy, 'lr': optimizer.param_groups[0]['lr'], 'epoch_sec': epoch_duration}
            if not np.isnan(val_loss): log_dict['val_loss'] = val_loss
            if not np.isnan(val_accuracy): log_dict['val_accuracy'] = val_accuracy
            wandb.log(log_dict) # Log all metrics for this epoch

            # --- Best Model Tracking ---
            # Ensure val_accuracy is a valid number before comparison
            if not np.isnan(val_accuracy) and val_accuracy > best_val_acc:
                best_val_acc = val_accuracy; best_epoch = epoch + 1
                logging.info(f"*** New best validation accuracy: {best_val_acc:.2f}% at epoch {best_epoch} ***")
                # Saving model checkpoint is optional if only tracking metrics in W&B for sweep
                # If needed: torch.save({'model_state_dict': model.state_dict(), ... }, 'best_model_temp.pth')

        # --- End of Trial Logging ---
        # Log summary metrics for the sweep to use
        wandb.run.summary["best_val_accuracy"] = best_val_acc if best_epoch > 0 else (val_accuracy if not np.isnan(val_accuracy) else 0)
        wandb.run.summary["best_epoch"] = best_epoch if best_epoch > 0 else config.epochs
        wandb.run.summary["final_train_accuracy"] = train_accuracy
        wandb.run.summary["final_val_loss"] = val_loss # Log last validation loss
        logging.info(f"--- Trial {wandb.run.name} Finished. Best Val Acc: {best_val_acc:.2f}% at Ep {best_epoch} ---")

    except Exception as e:
         # Log error to W&B if possible
         try:
             if wandb.run:
                 wandb.log({"error": str(e)})
         except: pass # Ignore logging errors if W&B failed
         logging.error(f"Error during training trial {run.id if run else 'unknown'}: {e}", exc_info=True)
         if run: wandb.finish(exit_code=1) # Mark run as failed

    finally:
        # Ensure W&B run finishes cleanly
        if run and wandb.run is not None and wandb.run.id == run.id:
             if hasattr(wandb.run, 'finished') and not wandb.run.finished:
                 try:
                     logging.debug(f"Finishing W&B run {run.id} in finally block.")
                     wandb.finish()
                 except Exception as final_finish_e:
                     logging.error(f"Error during wandb.finish in finally block: {final_finish_e}")


In [14]:
sweep_config_ref = {
    'method': 'bayes',
    'metric': {
        'name': 'val_accuracy', # Make sure this matches the metric logged and sweep goal
        'goal': 'maximize'
    },
    'parameters': {
        'num_filters': {'values': [32, 64, 128]},
        'filter_organization': {'values': ['same', 'double', 'half']},
        'filter_size': {'values': [3, 5]},
        'activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'dense_neurons': {'values': [128, 256, 512]},
        'dropout_rate': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5},
        'batch_norm': {'values': [True, False]},
        'data_augmentation': {'values': [True, False]},
        'learning_rate': {'distribution': 'log_uniform_values', 'min': 1e-4, 'max': 1e-2},
        'batch_size': {'values': [32, 64, 128]}, 
        'weight_decay': {'values': [0, 0.0005]},  
        'epochs': {'value': 10},
        'optimizer': {'values': ['adam']}, 
        
    }
}
print("Sweep Configuration Reference (Agent uses config from W&B):")
# print(sweep_config_ref) # Can optionally print this


# =============================================================================
# Start the W&B Agent
# =============================================================================
print(f"\n--- Starting W&B Agent for Sweep: {SWEEP_ID} ---")
print(f"--- Agent will run max {AGENT_RUN_COUNT if AGENT_RUN_COUNT else 'unlimited'} trials ---")
# print(f"--- Constraints: filter_org!=double, num_filters<={MAX_NUM_FILTERS}, batch_size<={MAX_BATCH_SIZE}, filter_size<={MAX_FILTER_SIZE} ---") # Constraints removed message

try:
    # Run the agent, calling train_sweep_trial for each run
    wandb.agent(SWEEP_ID, function=train_sweep_trial, count=AGENT_RUN_COUNT)
except Exception as e:
    logging.error(f"W&B Agent execution stopped due to error: {e}", exc_info=True)

print("--- W&B Agent Finished ---")

Sweep Configuration Reference (Agent uses config from W&B):

--- Starting W&B Agent for Sweep: teja_sai-indian-institute-of-technology-madras/CNN_FROM_SCRATCH_SWEEP/6hw3q9fq ---
--- Agent will run max 20 trials ---


[34m[1mwandb[0m: Agent Starting Run: c8g7ml9m with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3786039267066405
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.002943028996875744
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.




Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▂▂▁▁
lr,████▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,█▆▅▁▃
train_loss,█▁▁▁▁
val_accuracy,█▂▂▁▁
val_loss,▁█▁▃▃

0,1
best_epoch,1.0
best_val_accuracy,10.45523
epoch,5.0
epoch_sec,93.66966
final_train_accuracy,9.4125
final_val_loss,2.30265
lr,0.00059
total_flops_estimate,5280377344.0
total_parameters,2202378.0
train_accuracy,9.4125


[34m[1mwandb[0m: Agent Starting Run: kong2ueo with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.459033076113484
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.00011385007301074842
[34m[1mwandb[0m: 	num_filters: 128




Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▁▃█▅▁
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▃▄▇█
train_loss,█▆▄▂▁
val_accuracy,▁▃▄▅█
val_loss,█▆▆▅▁

0,1
best_epoch,5.0
best_val_accuracy,14.8074
epoch,5.0
epoch_sec,96.12524
final_train_accuracy,20.4125
final_val_loss,2.24971
lr,0.00011
total_flops_estimate,5312988928.0
total_parameters,1398794.0
train_accuracy,20.4125


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sxni4f4p with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.2321188195914381
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.005338357276042174
[34m[1mwandb[0m: 	num_filters: 64


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▂▁▅█▇
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▁▃▅█
train_loss,█▅▅▃▁
val_accuracy,▂▁▄██
val_loss,▇█▆▂▁

0,1
best_epoch,5.0
best_val_accuracy,18.8094
epoch,5.0
epoch_sec,63.34766
final_train_accuracy,17.3625
final_val_loss,2.21474
lr,0.00534
total_flops_estimate,674043680.0
total_parameters,52838.0
train_accuracy,17.3625


[34m[1mwandb[0m: Agent Starting Run: 4s15bjgo with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3627115777347426
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.00044657156601584015
[34m[1mwandb[0m: 	num_filters: 32


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▃▆▁▃
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆██
train_loss,█▅▄▂▁
val_accuracy,▁▃▄▅█
val_loss,█▇▄▄▁

0,1
best_epoch,5.0
best_val_accuracy,20.36018
epoch,5.0
epoch_sec,85.88101
final_train_accuracy,19.375
final_val_loss,2.20554
lr,0.00045
total_flops_estimate,213712592.0
total_parameters,21008.0
train_accuracy,19.375


[34m[1mwandb[0m: Agent Starting Run: g7nkutzm with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.24363585459173676
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.0005873170549806452
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▇▃▁█▄
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅▄█▇
val_loss,█▄▃▁▁

0,1
best_epoch,4.0
best_val_accuracy,35.21761
epoch,5.0
epoch_sec,69.45614
final_train_accuracy,36.1625
final_val_loss,1.87166
lr,0.00059
total_flops_estimate,5317814272.0
total_parameters,3811466.0
train_accuracy,36.1625


[34m[1mwandb[0m: Agent Starting Run: qlq60t3d with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_rate: 0.24693583756603635
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.007519264764227329
[34m[1mwandb[0m: 	num_filters: 64


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,██▆▄▁
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▅▂▁▄█
train_loss,█▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▄█▅▁▅

0,1
best_epoch,1.0
best_val_accuracy,10.005
epoch,5.0
epoch_sec,88.96961
final_train_accuracy,10.175
final_val_loss,2.30312
lr,0.00752
total_flops_estimate,3903018240.0
total_parameters,817546.0
train_accuracy,10.175


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lpzdjhrp with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.25342779850433206
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0004805007599143429
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▃▁█▂
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▆▆▅█
val_loss,█▄▃▃▁

0,1
best_epoch,5.0
best_val_accuracy,31.06553
epoch,5.0
epoch_sec,83.98024
final_train_accuracy,33.1625
final_val_loss,1.94615
lr,0.00048
total_flops_estimate,14669015040.0
total_parameters,4866186.0
train_accuracy,33.1625


[34m[1mwandb[0m: Agent Starting Run: aokjyvf7 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.3746709298462543
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.002078962956563375
[34m[1mwandb[0m: 	num_filters: 32


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▁▆▄█▅
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▆▇██
train_loss,█▁▁▁▁
val_accuracy,▁▄▅▇█
val_loss,█▃▃▁▃

0,1
best_epoch,5.0
best_val_accuracy,21.91096
epoch,5.0
epoch_sec,64.86855
final_train_accuracy,19.2
final_val_loss,2.19394
lr,0.00208
total_flops_estimate,5423234048.0
total_parameters,17207082.0
train_accuracy,19.2


[34m[1mwandb[0m: Agent Starting Run: kla1xpo6 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3775180913046584
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.009794489656255392
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▆▁▄▁
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▅▅█▁▄
train_loss,█▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▄▇█▁▁

0,1
best_epoch,1.0
best_val_accuracy,10.005
epoch,5.0
epoch_sec,223.96211
final_train_accuracy,9.4375
final_val_loss,2.30246
lr,0.00979
total_flops_estimate,83248010752.0
total_parameters,95338506.0
train_accuracy,9.4375


[34m[1mwandb[0m: Agent Starting Run: 7onqaf76 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.281869434624041
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001666486482130785
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▄▁▄▃
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▅▆█
train_loss,█▆▄▃▁
val_accuracy,▁▆█▇█
val_loss,█▃▁▁▁

0,1
best_epoch,5.0
best_val_accuracy,33.86693
epoch,5.0
epoch_sec,77.65186
final_train_accuracy,40.825
final_val_loss,1.87231
lr,0.00017
total_flops_estimate,14634795008.0
total_parameters,4865546.0
train_accuracy,40.825


[34m[1mwandb[0m: Agent Starting Run: xwivk7wj with config:
[34m[1mwandb[0m: 	activation: gelu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2277129040733822
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00028528895319583284
[34m[1mwandb[0m: 	num_filters: 32


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▁▅█▅▂
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▁▁
val_accuracy,▁▃▇▇█
val_loss,█▆▃▁▁

0,1
best_epoch,5.0
best_val_accuracy,25.21261
epoch,5.0
epoch_sec,84.75639
final_train_accuracy,23.5625
final_val_loss,2.13135
lr,0.00029
total_flops_estimate,594495728.0
total_parameters,75342.0
train_accuracy,23.5625


[34m[1mwandb[0m: Agent Starting Run: 8khh7ml6 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.3029432579322354
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00024534036728489
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▅▂▅▁
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▅▇█
train_loss,█▆▄▃▁
val_accuracy,▁▅▇██
val_loss,█▄▂▂▁

0,1
best_epoch,4.0
best_val_accuracy,34.51726
epoch,5.0
epoch_sec,78.13398
final_train_accuracy,37.575
final_val_loss,1.86198
lr,0.00025
total_flops_estimate,14631578112.0
total_parameters,3257098.0
train_accuracy,37.575


[34m[1mwandb[0m: Agent Starting Run: nvtqz8qo with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.23446883001882543
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.000237386407631334
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,█▃▂▁▁
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▄▂▁
val_accuracy,▁▅▇██
val_loss,█▄▂▁▁

0,1
best_epoch,5.0
best_val_accuracy,34.91746
epoch,5.0
epoch_sec,85.11842
final_train_accuracy,36.4125
final_val_loss,1.86037
lr,0.00024
total_flops_estimate,14669015040.0
total_parameters,4866186.0
train_accuracy,36.4125


[34m[1mwandb[0m: Agent Starting Run: atd4u033 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.1969101302276676
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001278432326004258
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▁▇▃█▄
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▃▂▂▁
val_accuracy,▁▅███
val_loss,█▄▂▁▂

0,1
best_epoch,3.0
best_val_accuracy,27.81391
epoch,5.0
epoch_sec,578.40257
final_train_accuracy,29.7625
final_val_loss,2.04043
lr,0.00013
total_flops_estimate,83349171200.0
total_parameters,121035402.0
train_accuracy,29.7625


[34m[1mwandb[0m: Agent Starting Run: srmqd9kd with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_rate: 0.308419731097464
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00010707616389749902
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▂█▁▆▂
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▄▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▄▄▁

0,1
best_epoch,5.0
best_val_accuracy,29.11456
epoch,5.0
epoch_sec,467.48486
final_train_accuracy,34.0875
final_val_loss,1.99891
lr,0.00011
total_flops_estimate,83248010752.0
total_parameters,95338506.0
train_accuracy,34.0875


[34m[1mwandb[0m: Agent Starting Run: yc9h9c9r with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.23996166176976988
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002049966515125114
[34m[1mwandb[0m: 	num_filters: 32


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▁▄▂▇█
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▇▆█
val_loss,█▆▃▃▁

0,1
best_epoch,5.0
best_val_accuracy,26.56328
epoch,5.0
epoch_sec,90.70485
final_train_accuracy,26.6625
final_val_loss,2.02556
lr,0.0002
total_flops_estimate,5423234048.0
total_parameters,17207082.0
train_accuracy,26.6625


[34m[1mwandb[0m: Agent Starting Run: 6hrl27h3 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2499696202275422
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00016869291859997356
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▄▃▁█▅
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▅▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▅███
val_loss,█▄▂▁▁

0,1
best_epoch,3.0
best_val_accuracy,31.31566
epoch,5.0
epoch_sec,100.05613
final_train_accuracy,32.2
final_val_loss,1.94009
lr,0.00017
total_flops_estimate,14669015040.0
total_parameters,4866186.0
train_accuracy,32.2


[34m[1mwandb[0m: Agent Starting Run: t825m0lq with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.27032220784301486
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00038513633228152777
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/63 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▃▃▁█▅
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▅▇█
train_loss,█▆▄▂▁
val_accuracy,▁▆▆▆█
val_loss,█▄▂▃▁

0,1
best_epoch,5.0
best_val_accuracy,28.96448
epoch,5.0
epoch_sec,97.9342
final_train_accuracy,28.175
final_val_loss,2.0055
lr,0.00039
total_flops_estimate,14634795008.0
total_parameters,4865546.0
train_accuracy,28.175


[34m[1mwandb[0m: Agent Starting Run: 5z3vqakl with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.1917319118331544
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00010868857816714909
[34m[1mwandb[0m: 	num_filters: 128


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 3 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 4/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 4 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 5/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 5 Val:   0%|          | 0/32 [00:00<?, ?it/s]

0,1
epoch,▁▃▅▆█
epoch_sec,▃▂▁▃█
lr,▁▁▁▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁▄▆▇█
train_loss,█▅▃▂▁
val_accuracy,▁▃▄▇█
val_loss,█▅▅▃▁

0,1
best_epoch,5.0
best_val_accuracy,33.11656
epoch,5.0
epoch_sec,100.66447
final_train_accuracy,32.7
final_val_loss,1.90715
lr,0.00011
total_flops_estimate,14634795008.0
total_parameters,4865546.0
train_accuracy,32.7


[34m[1mwandb[0m: Agent Starting Run: zrtg2a76 with config:
[34m[1mwandb[0m: 	activation: mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2835777990204119
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.000755339080892768
[34m[1mwandb[0m: 	num_filters: 32


Ep 1/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 1 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 2/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

Ep 2 Val:   0%|          | 0/32 [00:00<?, ?it/s]

Ep 3/10 Tr:   0%|          | 0/125 [00:00<?, ?it/s]

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


--- W&B Agent Finished ---


0,1
epoch,▁█
epoch_sec,█▁
lr,▁▁
total_flops_estimate,▁
total_parameters,▁
train_accuracy,▁█
train_loss,█▁
val_accuracy,▁█
val_loss,█▁

0,1
epoch,2
epoch_sec,83.95169
error,DataLoader worker (p...
lr,0.00076
total_flops_estimate,587155920
total_parameters,75280
train_accuracy,19.15
train_loss,2.20263
val_accuracy,20.41021
val_loss,2.1879


In [1]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
from collections import deque
import random
import time
import wandb
import logging 
from tqdm.notebook import tqdm # Use notebook version for Kaggle UI
import argparse # Keep for str2bool if used by helpers
import copy # Needed to save best model state
from wandb.sdk.wandb_settings import Settings

In [2]:
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("WANDB_API_KEY")
    wandb.login(key=wandb_api_key)
    print("W&B login successful using Kaggle Secret.")
except ImportError:
    print("kaggle_secrets not found. Ensure it's enabled or use interactive/env var login.")
    wandb.login() # Will use env var WANDB_API_KEY if set, otherwise prompt
except Exception as e:
     print(f"W&B login using Kaggle Secret failed: {e}. Trying other methods.")
     wandb.login() # Fallback attempt

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mteja_sai[0m ([33mteja_sai-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


W&B login successful using Kaggle Secret.


In [3]:
DATA_DIR = "/kaggle/input/inaturalist-12k/inaturalist_12K"
SEED = 42
IMG_SIZE = 224
NUM_WORKERS = 2 # Kaggle typical limit
VAL_SPLIT = 0.2
WANDB_PROJECT_NAME = "CNN_FROM_SCRATCH_SWEEP" # Project for this specific run
WANDB_ENTITY = None # Optional: Let wandb infer or set "user_or_team_name"
OUTPUT_DIR = "/kaggle/working/output_best_run_q4" # Save outputs here in Kaggle
MODEL_SAVE_NAME = "best_cnn_q4_model.pth" # Filename for the saved model

# --- <<< EDIT THESE HYPERPARAMETERS FOR YOUR BEST RUN >>> ---
BEST_CONFIG = {
    'num_filters': 64,
    'filter_size': 3,
    'filter_organization': 'same',
    'activation': 'mish',
    'dense_neurons': 256,
    'dropout_rate': 0.35,
    'batch_norm': True,
    'data_augmentation': True,
    'learning_rate': 0.0005,
    'weight_decay': 0.0005,
    'batch_size': 32,
    'epochs': 15, # Number of epochs for THIS run
}
# --- End of Hyperparameters ---

# --- Grid/Visualization Settings ---
GRID_ROWS = 10
GRID_COLS = 3

# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stdout)

In [4]:
def set_seed(seed=42):
    """Sets seed for reproducibility."""
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed); torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True; torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    logging.debug(f"Seed set to {seed}")

def seed_worker(worker_id):
    """Seeds DataLoader workers."""
    worker_seed = (torch.initial_seed()) % 2**32
    np.random.seed(worker_seed); random.seed(worker_seed)
    logging.debug(f"Worker {worker_id} seeded with {worker_seed}")

g_dataloader_seed = torch.Generator()

In [5]:
class INaturalistDataset(Dataset):
    """ Loads iNaturalist subset images. """
    def __init__(self, root_dir, transform=None):
        if not os.path.isdir(root_dir): raise FileNotFoundError(f"Dir not found: {root_dir}")
        self.root_dir=root_dir; self.transform=transform
        try: self.classes=sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir,d))]); assert self.classes
        except: raise ValueError(f"No class subdirs found: {root_dir}")
        self.class_to_idx={c:i for i,c in enumerate(self.classes)}; self.idx_to_class={i:c for c,i in self.class_to_idx.items()}
        self.images, self.labels = [], []
        img_count = 0
        for cn in self.classes:
            cd=os.path.join(root_dir,cn)
            try:
                for imn in os.listdir(cd):
                    imp=os.path.join(cd,imn)
                    if os.path.isfile(imp) and imn.lower().endswith(('.png','.jpg','.jpeg','.gif','.bmp')): self.images.append(imp); self.labels.append(self.class_to_idx[cn]); img_count+=1
            except OSError as e: logging.warning(f"Read err {cd}:{e}")
        if not self.images: raise RuntimeError(f"No images: {root_dir}")
        logging.info(f"Dataset({os.path.basename(root_dir)}): {len(self.classes)} classes, {img_count} images.")
    def __len__(self): return len(self.images)
    def __getitem__(self, idx):
        imp=self.images[idx]; lbl=self.labels[idx]
        try:
            with Image.open(imp) as img: image=img.convert("RGB")
        except Exception as e: logging.error(f"Err IMG {imp}: {e}"); return torch.zeros((3,IMG_SIZE,IMG_SIZE),dtype=torch.float32),torch.tensor(-1,dtype=torch.long)
        if self.transform: image=self.transform(image)
        return image, torch.tensor(lbl, dtype=torch.long)

In [12]:
def get_data_loaders(data_dir, batch_size=32, val_split=0.2, augment=True, num_workers=2, img_size=224, seed=42, generator=None):
    """ Creates DataLoaders without sklearn, with deterministic worker seeding. """
    logging.info(f"DataLoaders: batch={batch_size}, augment={augment}, val_split={val_split}, workers={num_workers}")
    if generator is None: generator = torch.Generator().manual_seed(seed)
    normalize=transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]); target_size=(img_size,img_size)
    if augment: train_transform=transforms.Compose([transforms.Resize(target_size),transforms.RandomHorizontalFlip(),transforms.RandomRotation(15),transforms.ColorJitter(brightness=0.2,contrast=0.2,saturation=0.1,hue=0.1),transforms.ToTensor(),normalize])
    else: train_transform=transforms.Compose([transforms.Resize(target_size),transforms.ToTensor(),normalize])
    val_test_transform=transforms.Compose([transforms.Resize(target_size),transforms.ToTensor(),normalize])
    train_dir,test_dir=os.path.join(data_dir,'train'),os.path.join(data_dir,'val'); assert os.path.isdir(train_dir),f"No train dir: {train_dir}"; assert os.path.isdir(test_dir),f"No test dir: {test_dir}"
    try: full_dataset=INaturalistDataset(root_dir=train_dir,transform=train_transform); test_dataset=INaturalistDataset(root_dir=test_dir,transform=val_test_transform)
    except Exception as e: logging.error(f"Dataset load error: {e}"); raise e
    targets=np.array(full_dataset.labels); dataset_size=len(targets); train_indices=[]; val_indices=[]; val_loader=None; num_classes=len(full_dataset.classes); valid_indices=[idx for idx,l in enumerate(targets) if l>=0]
    if 0<val_split<1 and len(valid_indices)>=2 and num_classes>0:
        local_rng=np.random.RandomState(seed); indices_by_class={lbl:[] for lbl in range(num_classes)}
        for idx in valid_indices: indices_by_class[targets[idx]].append(idx)
        for label,indices in indices_by_class.items():
            n_cls=len(indices);
            if n_cls==0: continue
            local_rng.shuffle(indices); n_val=int(np.floor(val_split*n_cls))
            if n_cls>1 and n_val==n_cls: n_val=n_cls-1
            elif n_cls<=1 and val_split>0: n_val=0
            val_indices.extend(indices[:n_val]); train_indices.extend(indices[n_val:])
        if not train_indices or not val_indices:
            logging.warning("Strat split failed. Random split."); local_rng.shuffle(valid_indices); split_point=int(len(valid_indices)*(1-val_split)); train_indices=valid_indices[:split_point]; val_indices=valid_indices[split_point:]
        logging.info(f"Split (seed {seed}): {len(train_indices)} train, {len(val_indices)} val.")
        local_rng.shuffle(train_indices)
        original_transform=full_dataset.transform; full_dataset.transform=val_test_transform
        val_subset=Subset(full_dataset,val_indices); full_dataset.transform=original_transform
        val_loader=DataLoader(val_subset,batch_size=batch_size,shuffle=False,num_workers=num_workers,pin_memory=torch.cuda.is_available(),worker_init_fn=seed_worker if num_workers>0 else None,generator=generator if num_workers>0 else None)
    else: logging.warning("Val split skipped."); train_indices=valid_indices
    train_subset=Subset(full_dataset,train_indices)
    use_persistent_workers=num_workers>0 and sys.version_info>=(3,8); loader_kwargs={'persistent_workers':True,'prefetch_factor':2} if use_persistent_workers else {}
    train_loader=DataLoader(train_subset,batch_size=batch_size,shuffle=True,num_workers=num_workers,pin_memory=torch.cuda.is_available(),drop_last=True,worker_init_fn=seed_worker if num_workers>0 else None,generator=generator if num_workers>0 else None,**loader_kwargs)
    test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=num_workers,pin_memory=torch.cuda.is_available(),worker_init_fn=seed_worker if num_workers>0 else None,generator=generator if num_workers>0 else None,**loader_kwargs)
    return train_loader, val_loader, test_loader, full_dataset.classes

In [8]:
_ACTIVATIONS = {'relu': nn.ReLU, 'gelu': nn.GELU, 'silu': nn.SiLU, 'mish': nn.Mish}
class CustomCNN(nn.Module):
    # (Same implementation as previous versions)
    def __init__(self, input_channels=3, num_classes=10, filter_sizes=None, num_filters=None, filter_organization="same", activation_name='relu', dense_neurons=128, dropout_rate=0.3, batch_norm=True, img_size=224):
        super(CustomCNN, self).__init__(); # ... (rest of __init__ - same as before) ...
        if filter_sizes is None: filter_sizes=[3]*5
        elif len(filter_sizes)!=5: raise ValueError("filter_sizes must be list[5]")
        if num_filters is None: num_filters_list=[32]*5
        elif isinstance(num_filters,int): base=num_filters; org=filter_organization; nl=[max(1,base*(2**i) if org=="double" else base//(2**i) if org=="half" else base) for i in range(5)]; num_filters_list=nl
        elif isinstance(num_filters,list) and len(num_filters)==5: num_filters_list=[max(1,f) for f in num_filters]
        else: raise ValueError("num_filters must be int or list[5]")
        if activation_name not in _ACTIVATIONS: raise ValueError(f"Unsupported activation: {activation_name}")
        activation=_ACTIVATIONS[activation_name]; self.layers=nn.ModuleList(); cin=input_channels; cdim=img_size
        for i in range(5):
            ks,cout,p=filter_sizes[i],num_filters_list[i],filter_sizes[i]//2; conv=nn.Conv2d(cin,cout,ks,padding=p,bias=not batch_norm); self.layers.append(conv)
            if batch_norm: self.layers.append(nn.BatchNorm2d(cout))
            self.layers.append(activation()); self.layers.append(nn.MaxPool2d(2,2)); cdim//=2
            if dropout_rate>0: self.layers.append(nn.Dropout(dropout_rate))
            cin=cout
        self.flat_size=num_filters_list[-1]*cdim*cdim; assert self.flat_size>0, f"Flat size <=0 for img {img_size}"
        self.fc1=nn.Linear(self.flat_size,dense_neurons); self.do_fc=nn.Dropout(dropout_rate); self.fc2=nn.Linear(dense_neurons,num_classes)
        self.config={'nf':num_filters_list,'fs':filter_sizes,'act':activation_name,'dn':dense_neurons,'dr':dropout_rate,'bn':batch_norm,'isz':img_size,'nc':num_classes}
    def forward(self,x):
        for layer in self.layers: x=layer(x)
        x=x.view(x.size(0),-1); x=F.relu(self.fc1(x)); x=self.do_fc(x); x=self.fc2(x)
        return x
    def count_parameters(self): return sum(p.numel() for p in self.parameters() if p.requires_grad)
    def calculate_computations(self): # Simplified FLOPs estimate
        # (Same calculation logic as before)
        macs=0; h=w=self.config['isz']; cin=3; nf=self.config['nf']; fs=self.config['fs']; ch,cw=h,w; nc=self.config.get('nc', 10); dn=self.config.get('dn', 128)
        for i in range(5): k,cout=fs[i],nf[i]; macs+=k*k*cin*cout*ch*cw; macs+=2*cout*ch*cw if self.config['bn'] else 0; macs+=cout*ch*cw; ch//=2; cw//=2; cin=cout
        flat=nf[-1]*ch*cw; macs+=flat*dn; macs+=dn; macs+=dn*nc; return 2*macs

In [9]:
_MEAN = np.array([0.485, 0.456, 0.406])
_STD = np.array([0.229, 0.224, 0.225])
def denormalize(tensor):
    """Denormalizes a tensor image for display."""
    try:
        tensor=tensor.clone().cpu().numpy(); tensor=np.transpose(tensor,(1,2,0))
        tensor=_STD*tensor+_MEAN; tensor=np.clip(tensor,0,1)
        return tensor
    except Exception as e: logging.error(f"Dnorm err:{e}"); return np.zeros((IMG_SIZE,IMG_SIZE,3))

# =============================================================================
# Main Training & Evaluation Function
# =============================================================================
def train_and_evaluate(config, fixed_args):
    """ Trains, evaluates, generates grid using the provided config. """
    run = None
    best_model_state = None
    best_val_acc = 0.0
    best_epoch = 0
    final_test_acc = 0.0

    try:
        # --- W&B Init ---
        # Generate a descriptive run name
        run_name = (f"f{config['num_filters']}_k{config['filter_size']}_d{config['dense_neurons']}_bs{config['batch_size']}"
                    f"_{config['activation'][:3]}_{config['filter_organization'][:3]}"
                    f"_lr{config['learning_rate']:.1E}_do{config['dropout_rate']:.1f}"
                    f"_bn{str(config['batch_norm'])[0]}_aug{str(config['data_augmentation'])[0]}")
        run_name = run_name.replace('.','p').replace('-','').replace('E','e')

        run = wandb.init(
            project=fixed_args['wandb_project'],
            entity=fixed_args['wandb_entity'],
            config=config, # Log the hyperparameters used for this run
            job_type="train_evaluate_config", # Set job type
            name=run_name[:128], # Use generated name, limit length
            settings=Settings(init_timeout=300)
        )
        if not run: raise Exception("wandb.init failed")
        logging.info(f"--- Training & Evaluating Config ---")
        logging.info(f"W&B Run: {wandb.run.name} ({run.id})")
        logging.info(f"Config: {dict(wandb.config)}") # Log the config used

        # --- Setup ---
        set_seed(fixed_args['seed'])
        g_dataloader_seed.manual_seed(fixed_args['seed']) # Seed the DataLoader generator
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if torch.cuda.is_available(): torch.cuda.empty_cache()
        logging.info(f"Using device: {device}")

        # --- Data Loaders ---
        train_loader, val_loader, test_loader, classes = get_data_loaders(
            data_dir=fixed_args['data_dir'], batch_size=config['batch_size'], augment=config['data_augmentation'],
            num_workers=fixed_args['num_workers'], img_size=fixed_args['img_size'],
            val_split=fixed_args['val_split'], seed=fixed_args['seed'],
            generator=g_dataloader_seed
        )
        num_classes = len(classes)
        idx_to_class = {i: name for i, name in enumerate(classes)}

        # --- Model, Loss, Optimizer, Scheduler ---
        model = CustomCNN(
            num_classes=num_classes, num_filters=config['num_filters'],
            filter_organization=config['filter_organization'], filter_sizes=[config['filter_size']] * 5,
            activation_name=config['activation'], dense_neurons=config['dense_neurons'],
            dropout_rate=config['dropout_rate'], batch_norm=config['batch_norm'], img_size=fixed_args['img_size']
        ).to(device)

        wandb.watch(model, log="all", log_freq=100)
        total_params = model.count_parameters(); total_flops = model.calculate_computations()
        wandb.log({"total_parameters": total_params, "total_flops_estimate": total_flops}, commit=False)
        logging.info(f"Model Params: {total_params:,}, Est. FLOPs: {total_flops:,.0f}")

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config.get('weight_decay', 0))
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.2, verbose=False)

        # --- Training & Validation Loop ---
        epochs_to_run = config['epochs']
        logging.info(f"Starting training for {epochs_to_run} epochs...")

        for epoch in range(epochs_to_run):
            epoch_start_time = time.time()
            # (Training Epoch Logic)
            model.train(); running_loss = 0.0; correct_train = 0; total_train = 0
            train_pbar = tqdm(train_loader, desc=f"Ep {epoch+1} Train", leave=False, file=sys.stdout)
            for i, batch_data in enumerate(train_pbar):
                 try: inputs, labels = batch_data; assert not torch.any(labels < 0)
                 except: logging.warning(f"Skip bad train batch {i}"); continue
                 inputs, labels = inputs.to(device), labels.to(device)
                 try:
                    optimizer.zero_grad(set_to_none=True); outputs = model(inputs)
                    loss = criterion(outputs, labels); loss.backward(); optimizer.step()
                    running_loss += loss.item(); _, predicted = outputs.max(1)
                    total_train += labels.size(0); correct_train += predicted.eq(labels).sum().item()
                    if i % 20 == 0 and total_train > 0: train_pbar.set_postfix({'L': f'{running_loss/(i+1):.3f}', 'Acc': f'{100.*correct_train/total_train:.1f}%'})
                 except RuntimeError as e: logging.error(f"Runtime error: {e}"); torch.cuda.empty_cache(); raise e
            train_pbar.close()
            epoch_train_loss = running_loss/len(train_loader) if len(train_loader)>0 else 0
            epoch_train_acc = 100.*correct_train/total_train if total_train>0 else 0

            # (Validation Epoch Logic)
            epoch_val_loss = float('nan'); epoch_val_acc = float('nan')
            if val_loader and len(val_loader) > 0:
                model.eval(); val_correct = 0; val_total = 0; val_loss_accum = 0.0
                val_pbar = tqdm(val_loader, desc=f"Ep {epoch+1} Val ", leave=False, file=sys.stdout)
                with torch.no_grad():
                    for i_val, batch_data in enumerate(val_pbar):
                        try: inputs, labels = batch_data; assert not torch.any(labels < 0)
                        except: logging.warning(f"Skip bad val batch {i_val}"); continue
                        inputs, labels = inputs.to(device), labels.to(device)
                        try:
                            outputs = model(inputs); loss = criterion(outputs, labels)
                            val_loss_accum += loss.item() * inputs.size(0)
                            _, predicted = outputs.max(1)
                            val_total += labels.size(0); val_correct += predicted.eq(labels).sum().item()
                            if i_val % 10 == 0 and val_total > 0: val_pbar.set_postfix({'L': f'{val_loss_accum/val_total:.3f}', 'Acc': f'{100.*val_correct/val_total:.1f}%'})
                        except RuntimeError as e: logging.error(f"Val forward error: {e}")
                val_pbar.close()
                if val_total > 0:
                    epoch_val_loss = val_loss_accum / val_total
                    epoch_val_acc = 100. * val_correct / val_total
                    scheduler.step(epoch_val_loss)

            # --- Logging ---
            epoch_duration = time.time() - epoch_start_time
            log_dict = {'epoch': epoch+1, 'train_loss': epoch_train_loss, 'train_accuracy': epoch_train_acc, 'lr': optimizer.param_groups[0]['lr'], 'epoch_sec': epoch_duration}
            if not np.isnan(epoch_val_loss): log_dict['val_loss'] = epoch_val_loss
            if not np.isnan(epoch_val_acc): log_dict['val_accuracy'] = epoch_val_acc
            wandb.log(log_dict, commit=True)
            logging.info(f'E{epoch+1}/{epochs_to_run}|Tr L:{epoch_train_loss:.3f},Tr Acc:{epoch_train_acc:.2f}%|'+(f'Val L:{epoch_val_loss:.3f},Val Acc:{epoch_val_acc:.2f}%|' if not np.isnan(epoch_val_acc) else 'Val:N/A|')+f'LR:{optimizer.param_groups[0]["lr"]:.1E}|T:{epoch_duration:.1f}s')

            # --- Save Best Model State IN MEMORY ---
            if not np.isnan(epoch_val_acc) and epoch_val_acc > best_val_acc:
                best_val_acc = epoch_val_acc; best_epoch = epoch + 1
                best_model_state = copy.deepcopy(model.state_dict())
                logging.info(f"*** Best val acc: {best_val_acc:.2f}% at Ep {best_epoch} ***")

            if torch.cuda.is_available(): torch.cuda.empty_cache()

        logging.info(f"--- Training Finished. Best Val Acc: {best_val_acc:.2f}% at epoch {best_epoch} ---")

        # --- Load Best Model State for Evaluation ---
        if best_model_state:
            model.load_state_dict(best_model_state)
            logging.info(f"Loaded best model state (from epoch {best_epoch}) for evaluation.")
        else:
            logging.warning("No validation improvement or no validation done. Evaluating using final epoch state.")

        # --- Evaluation on Test Set ---
        model.eval(); correct_test = 0; total_test = 0; test_loss_accum = 0.0
        all_test_images = []; all_test_labels = []; all_test_preds = []
        logging.info("Starting evaluation on the test set...")
        test_pbar = tqdm(test_loader, desc="Testing", leave=False, file=sys.stdout)
        criterion_test = nn.CrossEntropyLoss()

        with torch.no_grad():
            for inputs, labels in test_pbar:
                 if torch.any(labels < 0): continue
                 inputs, labels = inputs.to(device), labels.to(device)
                 outputs = model(inputs); loss = criterion_test(outputs, labels)
                 test_loss_accum += loss.item() * inputs.size(0)
                 _, predicted = outputs.max(1)
                 total_test += labels.size(0); correct_test += (predicted == labels).sum().item()
                 if len(all_test_images) < fixed_args['grid_rows'] * fixed_args['grid_cols'] * 2: # Use fixed_args here
                      all_test_images.append(inputs.cpu()); all_test_labels.append(labels.cpu()); all_test_preds.append(predicted.cpu())
        test_pbar.close()

        final_test_loss = test_loss_accum / total_test if total_test > 0 else float('nan')
        final_test_acc = 100. * correct_test / total_test if total_test > 0 else 0
        logging.info(f'Final Test Loss: {final_test_loss:.4f}, Final Test Accuracy: {final_test_acc:.2f}%')

        # --- Log Final Test Metrics to W&B Summary ---
        wandb.run.summary["best_val_accuracy"] = best_val_acc
        wandb.run.summary["best_epoch"] = best_epoch
        wandb.run.summary["final_test_loss"] = final_test_loss
        wandb.run.summary["final_test_accuracy"] = final_test_acc # Key Q4 metric

        # --- Generate and Log Prediction Grid ---
        # ... (Prediction grid generation logic - needs fixed_args) ...
        logging.info("Generating prediction grid...")
        try:
            if all_test_images:
                all_test_images=torch.cat(all_test_images); all_test_labels=torch.cat(all_test_labels); all_test_preds=torch.cat(all_test_preds)
                num_classes_plot=min(num_classes, fixed_args['grid_rows']); samples_per_class=fixed_args['grid_cols']
                plt.figure(figsize=(samples_per_class*3, num_classes_plot*3.5)); plotted_count=0
                plot_indices_master = torch.randperm(len(all_test_images), generator=torch.Generator().manual_seed(fixed_args['seed'])) # Seeded shuffle
                class_plotted_counts = {i: 0 for i in range(num_classes)}
                for idx in plot_indices_master:
                    if plotted_count >= num_classes_plot*samples_per_class: break
                    true_lbl_idx = all_test_labels[idx].item();
                    if true_lbl_idx < 0 or true_lbl_idx >= num_classes: continue # Skip invalid labels
                    if class_plotted_counts[true_lbl_idx] < samples_per_class:
                        img=all_test_images[idx]; pred_lbl_idx=all_test_preds[idx].item()
                        img_denorm=denormalize(img)
                        ax_row = true_lbl_idx % num_classes_plot; ax_col = class_plotted_counts[true_lbl_idx]
                        plot_idx = ax_row * samples_per_class + ax_col + 1
                        if plot_idx <= num_classes_plot * samples_per_class:
                            ax=plt.subplot(num_classes_plot, samples_per_class, plot_idx)
                            ax.imshow(img_denorm); title_color = 'green' if true_lbl_idx==pred_lbl_idx else 'red'
                            ax.set_title(f'T:{idx_to_class.get(true_lbl_idx,"?")}\nP:{idx_to_class.get(pred_lbl_idx,"?")}',color=title_color, fontsize=9); ax.axis('off')
                            class_plotted_counts[true_lbl_idx] += 1; plotted_count += 1
                plt.tight_layout(); grid_save_path = os.path.join(fixed_args['output_dir'], 'prediction_grid_best.png')
                os.makedirs(fixed_args['output_dir'], exist_ok=True); plt.savefig(grid_save_path, dpi=150); plt.close()
                wandb.log({"prediction_grid": wandb.Image(grid_save_path)}, commit=True)
                logging.info(f"Prediction grid saved to {grid_save_path} and logged.")
            else: logging.warning("No images collected for grid.")
        except Exception as e: logging.error(f"Failed grid gen: {e}", exc_info=True); plt.close()


        # --- Save Final Best Model ---
        if best_model_state:
            save_path = os.path.join(fixed_args['output_dir'], fixed_args['model_save_name'])
            os.makedirs(fixed_args['output_dir'], exist_ok=True)
            try:
                save_dict = {'epoch': best_epoch, 'model_state_dict': best_model_state, 'best_val_accuracy': best_val_acc, 'config': config}
                torch.save(save_dict, save_path)
                logging.info(f"Best model state saved to {save_path}")
                model_artifact = wandb.Artifact(f"best-model-{run.id}", type="model", description=f"Best model from run {run.name}", metadata=dict(config)) # Use dict(config)
                model_artifact.add_file(save_path)
                wandb.log_artifact(model_artifact)
                logging.info("Best model logged as W&B artifact.")
            except Exception as e: logging.error(f"Failed to save best model state: {e}", exc_info=True)
        else: logging.warning("No best model state found to save.")

    except Exception as e:
         logging.error(f"Unhandled error in train_and_evaluate: {e}", exc_info=True)
         if run and wandb.run: wandb.finish(exit_code=1)

    finally:
        # Final check to ensure W&B run finishes
        if run and wandb.run is not None and wandb.run.id == run.id:
             if hasattr(wandb.run, 'finished') and not wandb.run.finished:
                 try: wandb.finish()
                 except Exception as fe: logging.error(f"Error finishing W&B run: {fe}")


In [13]:
if __name__ == "__main__":
    # --- Define fixed arguments (not hyperparameters) ---
    # Using defaults or could use a simple argparse for these if needed
    fixed_args = {
        "data_dir": DATA_DIR,
        "output_dir": OUTPUT_DIR,
        "model_save_name": MODEL_SAVE_NAME,
        "wandb_project": WANDB_PROJECT_NAME,
        "wandb_entity": WANDB_ENTITY,
        "seed": SEED,
        "img_size": IMG_SIZE,
        "num_workers": NUM_WORKERS,
        "val_split": VAL_SPLIT,
        "grid_rows": GRID_ROWS,
        "grid_cols": GRID_COLS,
    }

    # --- Basic Validation ---
    if not os.path.isdir(fixed_args['data_dir']):
        logging.error(f"Data directory not found: {fixed_args['data_dir']}")
        sys.exit(1)

    logging.info(f"Starting run with BEST_CONFIG defined in script.")
    if 'BEST_CONFIG' not in globals():
        logging.error("BEST_CONFIG dictionary not defined!")
        sys.exit(1)

    # --- Run Training & Evaluation using BEST_CONFIG ---
    train_and_evaluate(BEST_CONFIG, fixed_args) # Pass config and fixed args

    print("--- Script Finished ---")



Ep 1 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 1 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 2 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 2 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 3 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 3 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 4 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 4 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 5 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 5 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 6 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 6 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 7 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 7 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 8 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 8 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 9 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 9 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 10 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 10 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 11 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 11 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 12 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 12 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 13 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 13 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 14 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 14 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Ep 15 Train:   0%|          | 0/250 [00:00<?, ?it/s]

Ep 15 Val :   0%|          | 0/63 [00:00<?, ?it/s]

Testing:   0%|          | 0/63 [00:00<?, ?it/s]

--- Script Finished ---
