In [1]:
import torch 
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
Tesla P100-PCIE-16GB


In [2]:
data_path = '/kaggle/input/inaturalist/inaturalist_12K/train'
data_path_test='/kaggle/input/inaturalist/inaturalist_12K/val'

In [3]:
import wandb
wandb.login(key='9f6e625b6a4825fa64c9ba29384c657072eb3b12')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs24m049[0m ([33mcs24m049-iit-m[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
from torch import nn
from torch.nn import functional as F
#from PP_data import preprocess_data, show_images


class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10, input_channels=3, kernel_size=[], no_kernels=[], fc1_size=512, conv_activation='ReLU', use_batch_norm=True,dropout=0.5):
        
        super(SimpleCNN, self).__init__()
        self.conv_activation = conv_activation
        self.fc1_size = fc1_size
        self.kernel_size = kernel_size
        self.no_kernels = no_kernels
        self.input_channels = input_channels
        self.use_batch_norm = use_batch_norm  
        self.dropout=dropout  # Dropout probability
        # Flag to enable/disable Batch Normalization
        # Define convolutional layers with optional Batch Normalization
        
        self.conv1 = nn.Conv2d(input_channels, no_kernels[0], kernel_size=kernel_size[0], stride=1, padding=kernel_size[0] // 2)
        self.bn1 = nn.BatchNorm2d(no_kernels[0]) if use_batch_norm else None  # Optional Batch Norm
        self.conv2 = nn.Conv2d(no_kernels[0], no_kernels[1], kernel_size=kernel_size[1], stride=1, padding=kernel_size[1] // 2)
        self.bn2 = nn.BatchNorm2d(no_kernels[1]) if use_batch_norm else None
        self.conv3 = nn.Conv2d(no_kernels[1], no_kernels[2], kernel_size=kernel_size[2], stride=1, padding=kernel_size[2] // 2)
        self.bn3 = nn.BatchNorm2d(no_kernels[2]) if use_batch_norm else None
        self.conv4 = nn.Conv2d(no_kernels[2], no_kernels[3], kernel_size=kernel_size[3], stride=1, padding=kernel_size[3] // 2)
        self.bn4 = nn.BatchNorm2d(no_kernels[3]) if use_batch_norm else None
        self.conv5 = nn.Conv2d(no_kernels[3], no_kernels[4], kernel_size=kernel_size[4], stride=1, padding=kernel_size[4] // 2)
        self.bn5 = nn.BatchNorm2d(no_kernels[4]) if use_batch_norm else None

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(no_kernels[4] * 7 * 7, fc1_size)
        self.fc2 = nn.Linear(fc1_size, num_classes)
        
        self.dropout_layer=nn.Dropout(p=self.dropout) if self.dropout>0 else None# Optional Dropout

    def forward(self, x):
        x = self.pool(self.apply_batch_norm(self.conv1(x), self.bn1))  # Apply Batch Norm if enabled
        x = self.pool(self.apply_batch_norm(self.conv2(x), self.bn2))
        x = self.pool(self.apply_batch_norm(self.conv3(x), self.bn3))
        x = self.pool(self.apply_batch_norm(self.conv4(x), self.bn4))
        x = self.pool(self.apply_batch_norm(self.conv5(x), self.bn5))
        x = x.view(-1, self.no_kernels[4] * 7 * 7)  # Flatten the tensor
        x = self.activation(self.fc1(x))
        if self.dropout_layer is not None:
            x=self.dropout_layer(x)  # Apply Dropout
        x = self.fc2(x)
        return x

    def apply_batch_norm(self, x, bn_layer):
        if self.use_batch_norm and bn_layer is not None:
            return self.activation(bn_layer(x))  # Apply Batch Norm and activation
        else:
            return self.activation(x)  # Skip Batch Norm and apply activation

    def activation(self, x):
        if self.conv_activation == 'ReLU':
            return F.relu(x)
        elif self.conv_activation == 'Sigmoid':
            return F.sigmoid(x)
        elif self.conv_activation == 'Tanh':
            return F.tanh(x)
        elif self.conv_activation == 'GELU':
            return F.gelu(x)
        elif self.conv_activation == 'SiLU':
            return F.silu(x)
        elif self.conv_activation == 'Mish':
            return F.mish(x)
        elif self.conv_activation == 'ELU':
            return F.elu(x)
        elif self.conv_activation == 'SELU':
            return F.selu(x)
        elif self.conv_activation == 'LeakyReLU':
            return F.leaky_relu(x)
        else:
            raise ValueError("Invalid activation function specified")

In [6]:
import torch
import torch.nn as nn
import wandb
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch import nn
#from cnn import SimpleCNN
import torch.amp as amp
import os
wandb._service_wait = 60


def train():
    # Initialize wandb
    wandb.init()
    config = wandb.config  # Access sweep parameters
    
    # Set device (use GPU if available)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    scaler = amp.GradScaler(enabled=device.type == 'cuda')
    
    # Set a meaningful name for the run
    wandb.run.name = (
    f"base_filter_size_{config.base_filter}_"
    f"filter_strategy_{config.filter_strategy}_"
    f"kernel_{'_'.join(map(str, config.kernel_sizes))}_"
    f"activation_{config.activation}_batchnorm_{config.batch_norm}_"
    f"dropout_{config.dropout}_fcsize_{config.fc_size}_epochs_{config.epochs}_"
    f"augmentation_{config.data_augmentation}_lr_{config.learning_rate:.1e}_"
    f"batchsize_{config.batch_size}"
    )
    base_filter = config.base_filter  # Base filter size for the first layer
    filters=[]
    if config['filter_strategy'] == 'same':
        filters = [base_filter] * 5
    elif config['filter_strategy'] == 'doubling':
       filters = [base_filter * (2 ** i) for i in range(5)]
    elif config['filter_strategy'] == 'halving':
        filters = [base_filter * (2 ** i) for i in reversed(range(5))]

    # Extract parameters from wandb config
    no_kernels =filters
    kernel_size = config.kernel_sizes
    activation = config.activation
    batch_norm = config.batch_norm
    dropout = config.dropout
    fc_size = config.fc_size
    learning_rate = config.learning_rate
    batch_size = config.batch_size
    data_augmentation = config.data_augmentation
    epochs = config.epochs
    val_split = 0.2  # 20% of data for validation

    # Initialize the model
    model = SimpleCNN(
        num_classes=10,
        kernel_size=kernel_size,
        no_kernels=no_kernels,
        fc1_size=fc_size,
        conv_activation=activation,
        use_batch_norm=batch_norm,
        dropout=dropout
    )
    
    #model=nn.DataParallel(model)  # Use DataParallel for multi-GPU training if available
    model=model.to(device)  # Move model to the appropriate device
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()

    if config.optimizer_type == 'Adam':
        optimizer =optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    elif config.optimizer_type == 'Nadam':
        optimizer = optim.NAdam(model.parameters(), lr=learning_rate, weight_decay=1e-4)

    # Load and split the dataset into training and validation sets
    dataset = datasets.ImageFolder(
        root=data_path,
        transform=get_transforms(data_augmentation, is_training=True)
    )
    
    # Calculate split sizes
    dataset_size = len(dataset)
    val_size = int(val_split * dataset_size)
    train_size = dataset_size - val_size
    
    # Split the dataset
    train_dataset, val_dataset = random_split(
        dataset, [train_size, val_size], 
        generator=torch.Generator().manual_seed(42)  # For reproducibility
    )
    
    # Update validation set transforms (no augmentation for validation)
    val_dataset.dataset.transform = get_transforms(False, is_training=False)
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        pin_memory=True, 
        num_workers=4
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False,  # No need to shuffle validation data
        pin_memory=True, 
        num_workers=4
    )

    # Training loop
    best_val_accuracy = 0
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss, train_accuracy = run_epoch(model, train_loader, criterion, optimizer, device,scaler, is_training=True)
        
        # Validation phase
        model.eval()  # Set model to evaluation mode
        with torch.no_grad():  # Disable gradient calculation for validation
            val_loss, val_accuracy = run_epoch(model, val_loader, criterion, optimizer, device,scaler,is_training=False)
        
        # Save best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), f"best_model_{wandb.run.id}.pth")

        # Log metrics to wandb
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_acc': train_accuracy,
            'val_loss': val_loss,
            'val_acc': val_accuracy
        })
        
        print(f"Epoch [{epoch + 1}/{epochs}], "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")
    
    # Log final best validation accuracy and hyperparameters
    wandb.log({
        'best_val_acc': best_val_accuracy,
        'batch_size': batch_size,
        'activation': config.activation,
        'batch_norm': config.batch_norm,
        'dropout': config.dropout,
        'fc_size': config.fc_size,
        'learning_rate': config.learning_rate,
        'data_augmentation': config.data_augmentation,
        'epochs': config.epochs,
        'base_filter': config.base_filter,
        'filter_strategy': config.filter_strategy,
        'kernel_sizes': config.kernel_sizes
    })
    
    # Update the sweep metric
    wandb.run.summary["val_acc"] = best_val_accuracy


def run_epoch(model, dataloader, criterion, optimizer, device, scaler,is_training=True):
    """Run one epoch of training or validation."""
    total_loss = 0
    total = 0
    correct = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
         # Forward pass with autocast for mixed precision
        with amp.autocast(device_type=device.type,enabled= scaler.is_enabled() and is_training):
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        # Backward pass and optimization (only during training)
        if is_training:
            optimizer.zero_grad()
            # Use scaler for mixed precision gradient scaling
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Accumulate loss
        total_loss += loss.item() * images.size(0)
    
    # Calculate average loss and accuracy
    avg_loss = total_loss / total
    accuracy = 100 * correct / total
    
    return avg_loss, accuracy


def get_transforms(data_augmentation, is_training=True):
    """Get the appropriate transforms based on whether we're training and using augmentation."""
    if is_training and data_augmentation:
        return transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])


sweep_config = {
    'method': 'bayes',  # Optimization method: 'grid', 'random', or 'bayes'
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        # strategy for filter size
        'filter_strategy': {'values': ['same', 'doubling', 'halving']},
        #base filter size
        'base_filter': {'values': [32,64]},
        
        # Kernel sizes for each layer
        'kernel_sizes': {'values': [[3, 3, 5, 5, 7], [5, 5, 7, 7, 3], [3, 5, 7, 5, 3],[3,3,3,3,3],[5,5,5,5,5]]},
        'optimizer_type': {'values': ['Adam', 'Nadam']},
        # Activation function
        'activation': {'values': ['ReLU', 'GELU', 'SiLU', 'Mish']},

        # Batch normalization
        'batch_norm': {'values': [True, False]},

        # Dropout
        'dropout': {'values': [0.0, 0.2, 0.3]},

        # Fully connected layer size
        'fc_size': {'values': [256, 512]},

        # Learning rate
        'learning_rate': {"distribution": "log_uniform_values", "min": 1e-5, "max": 1e-3},

        # Batch size
        'batch_size': {'values': [32, 64]},

        # Data augmentation
        'data_augmentation': {'values': [True, False]},

        # Number of epochs
        'epochs': {'values': [10,15,20]}
    }
}

if __name__ == "__main__":
    wandb.login()
    sweep_id = wandb.sweep(sweep_config, project="da6401 a2")
    wandb.agent(sweep_id, train, count=1)

Create sweep with ID: abnvkgmf
Sweep URL: https://wandb.ai/cs24m049-iit-m/da6401%20a2%20/sweeps/abnvkgmf


[34m[1mwandb[0m: Agent Starting Run: dx5no7bq with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	fc_size: 256
[34m[1mwandb[0m: 	filter_strategy: halving
[34m[1mwandb[0m: 	kernel_sizes: [5, 5, 7, 7, 3]
[34m[1mwandb[0m: 	learning_rate: 0.0007713339180941757
[34m[1mwandb[0m: 	optimizer_type: Nadam
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import wandb

# Init W&B
#wandb.init(project="finetune_inaturalist", name="efficientnetv2_finetune")

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transforms
def get_transforms(is_train=True):
    if is_train:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.3, 0.3, 0.3, 0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

# Load dataset
dataset = datasets.ImageFolder(data_path, transform=get_transforms(is_train=True))
val_size = int(0.2 * len(dataset))
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
val_ds.dataset.transform = get_transforms(is_train=False)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=4)

# Load EfficientNetV2 pre-trained
model = models.efficientnet_v2_s(weights="EfficientNet_V2_S_Weights.DEFAULT")

# Modify final layer
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 10)

# Freeze all layers first
for param in model.parameters():
    param.requires_grad = False

# Strategy: unfreeze last N layers
N = 20
for param in list(model.parameters())[-N:]:
    param.requires_grad = True

model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

# Training + Validation loop
def run_epoch(model, dataloader, train=False):
    model.train() if train else model.eval()
    running_loss, correct, total = 0.0, 0, 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        if train:
            optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        if train:
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
    return running_loss / total, 100 * correct / total

# Training loop
best_val_acc = 0
for epoch in range(10):
    train_loss, train_acc = run_epoch(model, train_loader, train=True)
    val_loss, val_acc = run_epoch(model, val_loader, train=False)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_efficientnetv2.pth")
    
    print(f"Epoch {epoch+1}: Train Acc={train_acc:.2f}%, Val Acc={val_acc:.2f}%")

#wandb.run.summary["best_val_acc"] = best_val_acc


Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 107MB/s] 


Epoch 1: Train Acc=51.60%, Val Acc=69.53%


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.amp as amp
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import wandb

# Set your data paths
data_path = data_path  # Replace with your actual path
test_data_path = data_path_test  # Replace with your actual path

# Best configuration from the sweep
best_config = {
    'base_filter': 64,
    'filter_strategy': 'doubling',
    'kernel_sizes': [3, 5, 7, 5, 3],
    'activation': 'ReLU',
    'batch_norm': True,
    'dropout': 0.2,
    'fc_size': 512,
    'learning_rate': 3.3e-5,
    'batch_size': 32,
    'data_augmentation': False,
    'epochs': 15,
    'optimizer_type': 'Adam'
}

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
scaler = amp.GradScaler(enabled=device.type == 'cuda')

# Define transforms based on config
def get_transforms(data_augmentation, is_training=True):
    """Get the appropriate transforms based on whether we're training and using augmentation."""
    if is_training and data_augmentation:
        return transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

# Determine filter sizes based on strategy
base_filter = best_config['base_filter']
if best_config['filter_strategy'] == 'same':
    filters = [base_filter] * 5
elif best_config['filter_strategy'] == 'doubling':
    filters = [base_filter * (2 ** i) for i in range(5)]
elif best_config['filter_strategy'] == 'halving':
    filters = [base_filter * (2 ** i) for i in reversed(range(5))]

# Initialize model with best config
#from cnn import SimpleCNN  # Make sure this import works
model = SimpleCNN(
    num_classes=10,
    kernel_size=best_config['kernel_sizes'],
    no_kernels=filters,
    fc1_size=best_config['fc_size'],
    conv_activation=best_config['activation'],
    use_batch_norm=best_config['batch_norm'],
    dropout=best_config['dropout']
)
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
if best_config['optimizer_type'] == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=best_config['learning_rate'], weight_decay=1e-4)
elif best_config['optimizer_type'] == 'Nadam':
    optimizer = optim.NAdam(model.parameters(), lr=best_config['learning_rate'], weight_decay=1e-4)

# Load and split dataset
dataset = datasets.ImageFolder(
    root=data_path,
    transform=get_transforms(best_config['data_augmentation'], is_training=True)
)

# Calculate split sizes (80% train, 20% validation)
val_split = 0.2
dataset_size = len(dataset)
val_size = int(val_split * dataset_size)
train_size = dataset_size - val_size

# Split dataset
train_dataset, val_dataset = random_split(
    dataset, [train_size, val_size],
    generator=torch.Generator().manual_seed(42)  # For reproducibility
)

# Update validation transform
val_dataset.dataset.transform = get_transforms(False, is_training=False)

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=best_config['batch_size'],
    shuffle=True,
    pin_memory=True,
    num_workers=4
)

val_loader = DataLoader(
    val_dataset,
    batch_size=best_config['batch_size'],
    shuffle=False,
    pin_memory=True,
    num_workers=4
)

# Define function to run an epoch
def run_epoch(model, dataloader, criterion, optimizer, device, scaler, is_training=True):
    """Run one epoch of training or validation."""
    total_loss = 0
    total = 0
    correct = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass with autocast for mixed precision
        with amp.autocast(device_type=device.type, enabled=scaler.is_enabled() and is_training):
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        # Backward pass and optimization (only during training)
        if is_training:
            optimizer.zero_grad()
            # Use scaler for mixed precision gradient scaling
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Accumulate loss
        total_loss += loss.item() * images.size(0)
    
    # Calculate average loss and accuracy
    avg_loss = total_loss / total
    accuracy = 100 * correct / total
    
    return avg_loss, accuracy

# Train the model
print("Starting training with best configuration...")
best_val_accuracy = 0
model_save_path = "best_model.pth"

for epoch in range(best_config['epochs']):
    # Training phase
    model.train()
    train_loss, train_accuracy = run_epoch(model, train_loader, criterion, optimizer, device, scaler, is_training=True)
    
    # Validation phase
    model.eval()
    with torch.no_grad():
        val_loss, val_accuracy = run_epoch(model, val_loader, criterion, optimizer, device, scaler, is_training=False)
    
    # Save best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), model_save_path)
        print(f"New best model saved with validation accuracy: {val_accuracy:.2f}%")
    
    print(f"Epoch [{epoch + 1}/{best_config['epochs']}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

print(f"Training completed. Best validation accuracy: {best_val_accuracy:.2f}%")

Using device: cuda
Starting training with best configuration...
New best model saved with validation accuracy: 29.56%
Epoch [1/15], Train Loss: 2.0945, Train Accuracy: 25.73%, Val Loss: 1.9876, Val Accuracy: 29.56%
New best model saved with validation accuracy: 32.72%
Epoch [2/15], Train Loss: 1.9206, Train Accuracy: 32.55%, Val Loss: 1.8898, Val Accuracy: 32.72%
New best model saved with validation accuracy: 34.72%
Epoch [3/15], Train Loss: 1.8234, Train Accuracy: 35.94%, Val Loss: 1.8348, Val Accuracy: 34.72%
Epoch [4/15], Train Loss: 1.7435, Train Accuracy: 38.69%, Val Loss: 1.8599, Val Accuracy: 34.67%
New best model saved with validation accuracy: 36.02%
Epoch [5/15], Train Loss: 1.6676, Train Accuracy: 41.73%, Val Loss: 1.8514, Val Accuracy: 36.02%
New best model saved with validation accuracy: 37.57%
Epoch [6/15], Train Loss: 1.5788, Train Accuracy: 44.84%, Val Loss: 1.7799, Val Accuracy: 37.57%
Epoch [7/15], Train Loss: 1.4928, Train Accuracy: 47.76%, Val Loss: 1.8051, Val Accu

In [8]:
# Load the test dataset
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = datasets.ImageFolder(
    root=test_data_path,
    transform=test_transform
)

test_loader = DataLoader(
    test_dataset,
    batch_size=best_config['batch_size'],
    shuffle=False,
    pin_memory=True,
    num_workers=4
)

# Get class names
class_names = test_dataset.classes
print(f"Classes: {class_names}")

# Load the best model
model_save_path="/kaggle/working/best_model.pth"
model.load_state_dict(torch.load(model_save_path))
model.eval()

# Evaluate on test set
def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    all_images = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            # Store predictions, labels, and images for visualization
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            # Convert images to numpy for visualization (only store a subset if needed)
            for img in images.cpu().numpy():
                all_images.append(img)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy, all_preds, all_labels, all_images

# Run the evaluation
test_accuracy, predictions, true_labels, test_images = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.2f}%")

Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']


  model.load_state_dict(torch.load(model_save_path))


Test Accuracy: 43.20%


In [12]:
def create_creative_prediction_grid(images, preds, labels, class_names, rows=10, cols=3):
    import numpy as np
    from PIL import Image, ImageDraw, ImageFont

    # Normalize stats
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    num_samples = rows * cols

    # Select correct/incorrect
    correct = [i for i, (p, l) in enumerate(zip(preds, labels)) if p == l]
    incorrect = [i for i, (p, l) in enumerate(zip(preds, labels)) if p != l]
    n_incorrect = min(num_samples // 3, len(incorrect))
    n_correct = num_samples - n_incorrect

    selected = list(np.random.choice(correct, n_correct, replace=False)) + \
               list(np.random.choice(incorrect, n_incorrect, replace=False))
    np.random.shuffle(selected)

    if len(selected) < num_samples:
        remaining = num_samples - len(selected)
        pool = [i for i in range(len(images)) if i not in selected]
        selected += list(np.random.choice(pool, remaining, replace=False))

    # Layout settings
    image_size = 180
    padding_x = 40
    padding_y = 30
    margin = 60
    text_height = 45
    cell_width = image_size + padding_x
    cell_height = image_size + text_height + padding_y

    grid_width = cols * cell_width + 2 * margin
    grid_height = rows * cell_height + 2 * margin + 80  # + title

    grid_image = Image.new('RGB', (grid_width, grid_height), color=(255, 255, 255))
    draw = ImageDraw.Draw(grid_image)

    try:
        font = ImageFont.truetype("Arial.ttf", 18)
        small_font = ImageFont.truetype("Arial.ttf", 14)
    except IOError:
        font = ImageFont.load_default()
        small_font = ImageFont.load_default()

    # Title
    title = "Test Set Predictions (Best CNN)"
    title_x = (grid_width - draw.textlength(title, font=font)) // 2
    draw.text((title_x, 20), title, fill=(0, 0, 0), font=font)

    model_info = f"{best_config['filter_strategy'].capitalize()} filters | {best_config['activation']} | Dropout={best_config['dropout']}"
    draw.text((margin, 50), model_info, fill=(60, 60, 60), font=small_font)
    draw.text((margin, 70), f"Test Accuracy: {test_accuracy:.2f}%", fill=(60, 60, 60), font=small_font)

    # Draw each image block
    for idx, img_idx in enumerate(selected):
        img = images[img_idx].transpose(1, 2, 0) * std + mean
        img = np.clip(img, 0, 1)
        img = (img * 255).astype(np.uint8)
        img_pil = Image.fromarray(img).resize((image_size, image_size), Image.BICUBIC)

        row, col = divmod(idx, cols)
        x = margin + col * cell_width
        y = margin + row * cell_height + 80

        pred = preds[img_idx]
        label = labels[img_idx]
        is_correct = pred == label

        bg_color = (235, 255, 235) if is_correct else (255, 235, 235)
        draw.rectangle([x - 8, y - 8, x + image_size + 8, y + image_size + text_height + 8], fill=bg_color, outline=(200, 200, 200))

        grid_image.paste(img_pil, (x, y))

        # Text
        draw.text((x, y + image_size + 5), f"True: {class_names[label]}", fill=(0, 100, 0) if is_correct else (150, 0, 0), font=small_font)
        draw.text((x, y + image_size + 22), f"Pred: {class_names[pred]}", fill=(0, 0, 100), font=small_font)

    return grid_image
grid_image = create_creative_prediction_grid(test_images, predictions, true_labels, class_names)
grid_image.save("prediction_grid.png")
print("🎨 Balanced 10×3 prediction grid saved!")

🎨 Balanced 10×3 prediction grid saved!
