In [None]:
!pip install -U gdown

# Download the zip file
!gdown --id 1DB-r714_Wr2J4boXFuvOHn6hUwUO3fgR -O datasets.zip
!unzip /content/datasets.zip -d /content/datasets

Downloading...
From (original): https://drive.google.com/uc?id=1DB-r714_Wr2J4boXFuvOHn6hUwUO3fgR
From (redirected): https://drive.google.com/uc?id=1DB-r714_Wr2J4boXFuvOHn6hUwUO3fgR&confirm=t&uuid=0fe41eb4-04dd-436c-954c-54ba7b5a9f72
To: /content/datasets.zip
100% 107M/107M [00:04<00:00, 22.3MB/s]
Archive:  /content/datasets.zip
replace /content/datasets/ants/ants (1).jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!pip install torch torchvision
!pip install tqdm
!pip install matplotlib seaborn scikit-learn pillow
!pip install optuna




OPTUNA Hypertunning

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from torchvision import transforms, models
from torchvision.datasets import ImageFolder

from PIL import Image
import numpy as np
import optuna

from sklearn.model_selection import StratifiedShuffleSplit


def get_classifier(num_layers, in_features, out_features, dropout_rate, hidden_units, activation_name):
    """
    Dynamically build classifier head with given hyperparameters
    """
    layers = []
    input_dim = in_features
    activation_fn = {
        'relu': nn.ReLU(inplace=True),
        'leaky_relu': nn.LeakyReLU(negative_slope=0.01, inplace=True)
    }[activation_name]

    for i in range(num_layers):
        layers.append(nn.Linear(input_dim, hidden_units))
        layers.append(activation_fn)
        layers.append(nn.Dropout(dropout_rate))
        input_dim = hidden_units
    layers.append(nn.Linear(input_dim, out_features))
    return nn.Sequential(*layers)


def get_scheduler(optimizer, scheduler_name, patience):
    if scheduler_name == 'ReduceLROnPlateau':
        return torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=patience, verbose=True)
    elif scheduler_name == 'StepLR':
        return torch.optim.lr_scheduler.StepLR(optimizer, step_size=patience, gamma=0.5)
    elif scheduler_name == 'CosineAnnealingLR':
        return torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=patience*10)
    else:
        return None


def objective(trial):
    # Hyperparameters to tune
    lr = trial.suggest_float('lr', 1e-5, 1e-3, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'SGD'])
    num_layers = trial.suggest_int('num_classifier_layers', 1, 3)
    hidden_units = trial.suggest_categorical('hidden_units', [128, 256, 512])
    activation_name = trial.suggest_categorical('activation', ['relu', 'leaky_relu'])
    scheduler_name = trial.suggest_categorical('scheduler', ['ReduceLROnPlateau', 'StepLR', 'CosineAnnealingLR', 'None'])
    scheduler_patience = trial.suggest_int('scheduler_patience', 3, 10)
    rotation_degree = trial.suggest_float('rotation_degree', 10.0, 40.0)
    brightness = trial.suggest_float('brightness', 0.1, 0.5)
    contrast = trial.suggest_float('contrast', 0.1, 0.5)
    saturation = trial.suggest_float('saturation', 0.1, 0.5)
    hue = trial.suggest_float('hue', 0.01, 0.15)
    num_epochs = trial.suggest_int('num_epochs', 2, 6)
    # For early stopping/pruning
    early_stopping_patience = trial.suggest_int('early_stopping_patience', 3, 7)

    # Setup augmentations reflecting the hyperparameters
    augmentations = transforms.Compose([
        transforms.Resize((224 + 32, 224 + 32)),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.RandomRotation(degrees=rotation_degree, fill=0),
        transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue),
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0))], p=0.3),
        transforms.RandomApply([transforms.RandomPerspective(distortion_scale=0.2, p=0.5)], p=0.3),
        transforms.Resize((224, 224)),          # <-- This line ENSURES fixed output size!
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225]),
    ])


    # Dataset loading/splitting
    data_dir = "/content/datasets"
    from torchvision.datasets import ImageFolder
    full_dataset = ImageFolder(data_dir)
    num_classes = len(full_dataset.classes)

    from sklearn.model_selection import StratifiedShuffleSplit
    labels = [s[1] for s in full_dataset.samples]
    indices = list(range(len(full_dataset)))
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    train_idx, val_idx = next(splitter.split(indices, labels))

    train_samples = [full_dataset.samples[i] for i in train_idx]
    val_samples = [full_dataset.samples[i] for i in val_idx]

    class_to_idx = full_dataset.class_to_idx

    # Custom Dataset class to map samples with transform
    class CustomDataset(torch.utils.data.Dataset):
        def __init__(self, samples, transform, class_to_idx):
            self.samples = samples
            self.transform = transform
            self.class_to_idx = class_to_idx

        def __len__(self):
            return len(self.samples)

        def __getitem__(self, idx):
            path, label = self.samples[idx]
            img = Image.open(path).convert('RGB')
            if self.transform:
                img = self.transform(img)
            return img, label

    train_dataset = CustomDataset(train_samples, transform=augmentations, class_to_idx=class_to_idx)
    val_dataset = CustomDataset(val_samples, transform=val_transform, class_to_idx=class_to_idx)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Build model
    backbone = models.efficientnet_b0(pretrained=True)
    in_features = backbone.classifier[1].in_features
    classifier_head = get_classifier(num_layers, in_features, num_classes, dropout_rate, hidden_units, activation_name)
    backbone.classifier = classifier_head

    model = backbone.to(device)

    # Define criterion
    criterion = nn.CrossEntropyLoss()

    # Define optimizer
    if optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'AdamW':
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9)

    # Define scheduler
    scheduler = get_scheduler(optimizer, scheduler_name, scheduler_patience)

    # Early stopping variables
    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0
        running_corrects = 0
        total = 0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(dim=1)
            running_corrects += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = running_corrects / total * 100

        model.eval()
        val_loss = 0
        val_corrects = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                preds = outputs.argmax(dim=1)
                val_corrects += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_corrects / val_total * 100

        if scheduler:
            if scheduler_name == 'ReduceLROnPlateau':
                scheduler.step(val_loss)
            else:
                scheduler.step()

        print(f"Epoch {epoch+1}/{num_epochs} "
              f"Train Loss: {train_loss:.4f} Train Acc: {train_acc:.2f}% "
              f"Val Loss: {val_loss:.4f} Val Acc: {val_acc:.2f}%")

        # Early stopping check
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= early_stopping_patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

        # Report to Optuna for pruning
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return best_val_acc

# Running the study
if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=15, timeout=6*60*60)  # 6 hours timeout

    print("Best hyperparameters found:")
    print(study.best_trial.params)


[I 2025-08-02 15:50:54,480] A new study created in memory with name: no-name-d768cb94-5913-443b-934f-65b301fe2adc


Epoch 1/5 Train Loss: 1.9591 Train Acc: 47.85% Val Loss: 1.0059 Val Acc: 76.71%
Epoch 2/5 Train Loss: 0.7685 Train Acc: 79.77% Val Loss: 0.4790 Val Acc: 86.17%
Epoch 3/5 Train Loss: 0.4781 Train Acc: 85.78% Val Loss: 0.3759 Val Acc: 88.26%
Epoch 4/5 Train Loss: 0.3804 Train Acc: 88.37% Val Loss: 0.3509 Val Acc: 89.26%


[I 2025-08-02 17:32:15,394] Trial 0 finished with value: 89.80891719745223 and parameters: {'lr': 7.089036358695856e-05, 'weight_decay': 0.00016006913322157032, 'dropout_rate': 0.12255201954274884, 'batch_size': 32, 'optimizer': 'AdamW', 'num_classifier_layers': 1, 'hidden_units': 128, 'activation': 'relu', 'scheduler': 'CosineAnnealingLR', 'scheduler_patience': 10, 'rotation_degree': 20.50914829759194, 'brightness': 0.29905584790856476, 'contrast': 0.36051251719438027, 'saturation': 0.35152073948456297, 'hue': 0.08182412259570633, 'num_epochs': 5, 'early_stopping_patience': 3}. Best is trial 0 with value: 89.80891719745223.


Epoch 5/5 Train Loss: 0.3321 Train Acc: 89.78% Val Loss: 0.3248 Val Acc: 89.81%




Epoch 1/2 Train Loss: 1.3810 Train Acc: 56.95% Val Loss: 0.8944 Val Acc: 71.61%


[I 2025-08-02 18:08:53,703] Trial 1 finished with value: 76.7970882620564 and parameters: {'lr': 0.0006434829794286229, 'weight_decay': 0.0019676077185097856, 'dropout_rate': 0.32979602305510325, 'batch_size': 16, 'optimizer': 'Adam', 'num_classifier_layers': 2, 'hidden_units': 256, 'activation': 'leaky_relu', 'scheduler': 'None', 'scheduler_patience': 5, 'rotation_degree': 11.397607967282626, 'brightness': 0.128944893594884, 'contrast': 0.267874650763847, 'saturation': 0.4314453514425377, 'hue': 0.12949254315477438, 'num_epochs': 2, 'early_stopping_patience': 5}. Best is trial 0 with value: 89.80891719745223.


Epoch 2/2 Train Loss: 0.9672 Train Acc: 71.01% Val Loss: 0.7598 Val Acc: 76.80%




Epoch 1/2 Train Loss: 2.0541 Train Acc: 35.40% Val Loss: 1.0593 Val Acc: 72.70%


[I 2025-08-02 18:45:32,512] Trial 2 finished with value: 86.07825295723384 and parameters: {'lr': 9.4807291742241e-05, 'weight_decay': 0.0001146416505961318, 'dropout_rate': 0.3450917067440256, 'batch_size': 16, 'optimizer': 'Adam', 'num_classifier_layers': 2, 'hidden_units': 128, 'activation': 'leaky_relu', 'scheduler': 'StepLR', 'scheduler_patience': 3, 'rotation_degree': 37.13763183893976, 'brightness': 0.15294835877011093, 'contrast': 0.36740312939820685, 'saturation': 0.3582879572587506, 'hue': 0.02075664158047861, 'num_epochs': 2, 'early_stopping_patience': 4}. Best is trial 0 with value: 89.80891719745223.


Epoch 2/2 Train Loss: 0.9489 Train Acc: 71.95% Val Loss: 0.4821 Val Acc: 86.08%




Epoch 1/6 Train Loss: 2.3399 Train Acc: 27.92% Val Loss: 2.0136 Val Acc: 50.77%
Epoch 2/6 Train Loss: 1.6456 Train Acc: 55.09% Val Loss: 1.0300 Val Acc: 74.52%
Epoch 3/6 Train Loss: 0.9966 Train Acc: 72.56% Val Loss: 0.6283 Val Acc: 81.26%
Epoch 4/6 Train Loss: 0.7155 Train Acc: 78.32% Val Loss: 0.5091 Val Acc: 84.35%
Epoch 5/6 Train Loss: 0.5910 Train Acc: 81.41% Val Loss: 0.4315 Val Acc: 85.90%


[I 2025-08-02 20:34:01,827] Trial 3 finished with value: 86.16924476797088 and parameters: {'lr': 0.0009701245298630597, 'weight_decay': 2.34932786859851e-06, 'dropout_rate': 0.326726679674586, 'batch_size': 16, 'optimizer': 'SGD', 'num_classifier_layers': 1, 'hidden_units': 256, 'activation': 'relu', 'scheduler': 'CosineAnnealingLR', 'scheduler_patience': 7, 'rotation_degree': 36.06641185994404, 'brightness': 0.1616361362602513, 'contrast': 0.2992367670321473, 'saturation': 0.15740398361932106, 'hue': 0.11013469125013171, 'num_epochs': 6, 'early_stopping_patience': 5}. Best is trial 0 with value: 89.80891719745223.


Epoch 6/6 Train Loss: 0.5412 Train Acc: 82.55% Val Loss: 0.4186 Val Acc: 86.17%




Epoch 1/4 Train Loss: 2.4533 Train Acc: 16.93% Val Loss: 2.3631 Val Acc: 39.40%
Epoch 2/4 Train Loss: 2.1676 Train Acc: 32.70% Val Loss: 1.7816 Val Acc: 43.68%
Epoch 3/4 Train Loss: 1.7894 Train Acc: 44.44% Val Loss: 1.4456 Val Acc: 57.96%


[I 2025-08-02 21:46:41,704] Trial 4 finished with value: 66.2420382165605 and parameters: {'lr': 2.31990490669854e-05, 'weight_decay': 7.338254609394071e-06, 'dropout_rate': 0.3362305246587246, 'batch_size': 16, 'optimizer': 'Adam', 'num_classifier_layers': 2, 'hidden_units': 128, 'activation': 'leaky_relu', 'scheduler': 'CosineAnnealingLR', 'scheduler_patience': 8, 'rotation_degree': 25.621364751158055, 'brightness': 0.44324969595344654, 'contrast': 0.46806360105119194, 'saturation': 0.2519100190394554, 'hue': 0.08096694093026992, 'num_epochs': 4, 'early_stopping_patience': 7}. Best is trial 0 with value: 89.80891719745223.


Epoch 4/4 Train Loss: 1.4806 Train Acc: 54.65% Val Loss: 1.1321 Val Acc: 66.24%


[I 2025-08-02 22:11:55,386] Trial 5 pruned. 


Epoch 1/4 Train Loss: 2.4428 Train Acc: 20.23% Val Loss: 2.1862 Val Acc: 33.21%
Best hyperparameters found:
{'lr': 7.089036358695856e-05, 'weight_decay': 0.00016006913322157032, 'dropout_rate': 0.12255201954274884, 'batch_size': 32, 'optimizer': 'AdamW', 'num_classifier_layers': 1, 'hidden_units': 128, 'activation': 'relu', 'scheduler': 'CosineAnnealingLR', 'scheduler_patience': 10, 'rotation_degree': 20.50914829759194, 'brightness': 0.29905584790856476, 'contrast': 0.36051251719438027, 'saturation': 0.35152073948456297, 'hue': 0.08182412259570633, 'num_epochs': 5, 'early_stopping_patience': 3}


# Step 1

Baseline Model

* Architecture: Pretrained ResNet18 (final layer adjusted for number of classes)
* Input: Images resized to 224×224, no augmentation
* Optimizer: SGD, learning rate 0.01 (default)
* Training: 10 epochs, single train/val split (80/20)
* Loss Function: CrossEntropyLoss
* No tuning: No learning rate schedules, no early stopping, no regularization
* Progress visualization: Uses tqdm for per-batch progress bars



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm   # << NEW: tqdm import
import os

# --------- PARAMETERS -----------
data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 3
learning_rate = 0.01

# --------- DATA LOADERS ----------
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

full_dataset = datasets.ImageFolder(data_dir, transform=transform)
num_classes = len(full_dataset.classes)

val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# --------- MODEL ----------
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# --------- OPTIMIZER, LOSS -------
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# --------- TRAIN LOOP WITH TQDM -----------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # Wrap the training loader with tqdm!
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    # ------ Validate ------
    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    # tqdm for validation loader as well
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 81.0MB/s]
Epoch 1/3 - Training: 100%|██████████| 138/138 [18:35<00:00,  8.08s/it]


Epoch [1/3] Train Loss: 1.0579 Train Acc: 71.72%


Epoch 1/3 - Validation: 100%|██████████| 35/35 [01:33<00:00,  2.67s/it]


         Val Loss: 0.5064 Val Acc: 85.79%


Epoch 2/3 - Training: 100%|██████████| 138/138 [18:50<00:00,  8.19s/it]


Epoch [2/3] Train Loss: 0.4098 Train Acc: 88.79%


Epoch 2/3 - Validation: 100%|██████████| 35/35 [01:32<00:00,  2.65s/it]


         Val Loss: 0.4027 Val Acc: 87.98%


Epoch 3/3 - Training: 100%|██████████| 138/138 [18:34<00:00,  8.08s/it]


Epoch [3/3] Train Loss: 0.2576 Train Acc: 93.40%


Epoch 3/3 - Validation: 100%|██████████| 35/35 [01:34<00:00,  2.70s/it]

         Val Loss: 0.3746 Val Acc: 87.98%





# Step 2

Basic Data Augmentation

* Architecture: Same as Step 1 (ResNet18, last layer adjusted).
* Data Augmentation:
* * Training set: Includes
* * RandomHorizontalFlip and RandomCrop to increase dataset variability.
Validation set: Only resizing (no augmentation).
* Other settings:
* * SGD optimizer, learning rate 0.01, 10 epochs, 80/20 split.
* * No scheduler, no early stopping, no model saving.




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import os

# --------- PARAMETERS -----------
data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 3
learning_rate = 0.01

# --------- DATA LOADERS WITH AUGMENTATION ----------
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_val = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

# Build one ImageFolder FOR CLASS MAPPING
full_dataset = datasets.ImageFolder(data_dir)
num_classes = len(full_dataset.classes)
val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size

# Split indices for train/val
indices = torch.randperm(len(full_dataset))
train_indices = indices[:train_size]
val_indices = indices[train_size:]

# Clone datasets and assign proper transforms and class mapping
train_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
val_dataset = datasets.ImageFolder(data_dir, transform=transform_val)
# Share class mapping (CRITICAL)
train_dataset.classes = full_dataset.classes
train_dataset.class_to_idx = full_dataset.class_to_idx
val_dataset.classes = full_dataset.classes
val_dataset.class_to_idx = full_dataset.class_to_idx

# Use Subset to partition
train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
val_dataset = torch.utils.data.Subset(val_dataset, val_indices)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# --------- MODEL ----------
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# --------- OPTIMIZER, LOSS -------
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# --------- TRAIN LOOP WITH TQDM -----------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")


Epoch 1/3 - Training: 100%|██████████| 138/138 [17:48<00:00,  7.75s/it]


Epoch [1/3] Train Loss: 1.0388 Train Acc: 72.52%


Epoch 1/3 - Validation: 100%|██████████| 35/35 [01:28<00:00,  2.52s/it]


         Val Loss: 0.5226 Val Acc: 84.79%


Epoch 2/3 - Training:  98%|█████████▊| 135/138 [17:45<00:23,  7.89s/it]


KeyboardInterrupt: 

# Step 3

Optimizer & Learning Rate Tuning

* Architecture: Still using ResNet18.
* Data Augmentation: Same as Step 2 (random crop, horizontal flip).
* Optimizer: Switched from SGD to Adam for usually better/faster convergence.
* Learning Rate: Typically start with a lower learning rate for Adam (e.g., 0.001 or 0.0001).
* Optional: Add a simple scheduler like StepLR for basic learning rate decay.
* Other: 10 epochs, same train/val split, no other tuning yet.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import os

# --------- PARAMETERS -----------
data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 3
learning_rate = 1e-4     # Lowered for Adam

# --------- DATA LOADERS WITH AUGMENTATION ----------
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_val = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

full_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
num_classes = len(full_dataset.classes)
val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size

# Assign transforms for train/val split
train_indices = torch.arange(train_size)
val_indices = torch.arange(train_size, train_size + val_size)
train_dataset = torch.utils.data.Subset(full_dataset, train_indices)
val_dataset_raw = datasets.ImageFolder(data_dir, transform=transform_val)
val_dataset = torch.utils.data.Subset(val_dataset_raw, val_indices)

# Share class mapping (CRITICAL)
train_dataset.classes = full_dataset.classes
train_dataset.class_to_idx = full_dataset.class_to_idx
val_dataset.classes = full_dataset.classes
val_dataset.class_to_idx = full_dataset.class_to_i

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# --------- MODEL ----------
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# --------- OPTIMIZER, SCHEDULER, LOSS -------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# --------- TRAIN LOOP WITH TQDM -----------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")

    scheduler.step()  # ADVANCES THE LR SCHEDULE



Epoch 1/3 - Training: 100%|██████████| 138/138 [18:57<00:00,  8.25s/it]


Epoch [1/3] Train Loss: 0.7209 Train Acc: 78.94%


Epoch 1/3 - Validation: 100%|██████████| 35/35 [01:31<00:00,  2.60s/it]


         Val Loss: 5.6382 Val Acc: 10.29%


Epoch 2/3 - Training: 100%|██████████| 138/138 [18:38<00:00,  8.11s/it]


Epoch [2/3] Train Loss: 0.2724 Train Acc: 91.79%


Epoch 2/3 - Validation: 100%|██████████| 35/35 [01:31<00:00,  2.63s/it]


         Val Loss: 6.1990 Val Acc: 10.20%


Epoch 3/3 - Training: 100%|██████████| 138/138 [18:31<00:00,  8.05s/it]


Epoch [3/3] Train Loss: 0.1573 Train Acc: 95.43%


Epoch 3/3 - Validation: 100%|██████████| 35/35 [01:31<00:00,  2.62s/it]

         Val Loss: 7.0780 Val Acc: 10.38%





# Step 4

 Increase Model Complexity
* Instead of ResNet18, use a deeper/bigger model like ResNet50 or EfficientNet-B0.
*  Keep the previous data augmentation from Step 3.
* Keep optimizer and learning rate tuning unchanged (Adam with lr=1e-4).
* Optionally, add a little dropout in the classifier head to help regularization, but this is optional here (will be covered more in Step 5).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm

data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 3
learning_rate = 1e-4

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

transform_val = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

full_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
num_classes = len(full_dataset.classes)

val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size

train_dataset = torch.utils.data.Subset(full_dataset, range(train_size))
val_dataset_raw = datasets.ImageFolder(data_dir, transform=transform_val)
val_dataset = torch.utils.data.Subset(val_dataset_raw, range(train_size, train_size + val_size))

# Share class mapping (CRITICAL)
train_dataset.classes = full_dataset.classes
train_dataset.class_to_idx = full_dataset.class_to_idx
val_dataset.classes = full_dataset.classes
val_dataset.class_to_idx = full_dataset.class_to_i

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# -- Model change: EfficientNet-B0 or ResNet50 --
model = models.efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
# Alternative:
# model = models.resnet50(pretrained=True)
# model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 153MB/s]
Epoch 1/3 - Training: 100%|██████████| 138/138 [17:26<00:00,  7.58s/it]


Epoch [1/3] Train Loss: 1.3315 Train Acc: 65.42%


Epoch 1/3 - Validation: 100%|██████████| 35/35 [00:58<00:00,  1.66s/it]


         Val Loss: 4.9680 Val Acc: 10.29%


Epoch 2/3 - Training: 100%|██████████| 138/138 [16:55<00:00,  7.36s/it]


Epoch [2/3] Train Loss: 0.4008 Train Acc: 88.74%


Epoch 2/3 - Validation: 100%|██████████| 35/35 [00:52<00:00,  1.51s/it]


         Val Loss: 5.9329 Val Acc: 10.29%


Epoch 3/3 - Training: 100%|██████████| 138/138 [17:04<00:00,  7.43s/it]


Epoch [3/3] Train Loss: 0.2506 Train Acc: 92.33%


Epoch 3/3 - Validation: 100%|██████████| 35/35 [00:55<00:00,  1.60s/it]

         Val Loss: 6.8139 Val Acc: 10.29%





# Step 5

 Better Augmentation & Regularization

* Advanced Data Augmentation: Add stronger and more varied image augmentations (e.g. ColorJitter, RandomRotation).
* Regularization: Use weight decay (L2 regularization) in the optimizer and add dropout in the classifier head if your model supports it.
* Architecture: Same as Step 4 (EfficientNet-B0 or ResNet50).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm

data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 3
learning_rate = 1e-4
weight_decay = 1e-4

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    transforms.ToTensor(),
])

transform_val = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

full_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
num_classes = len(full_dataset.classes)

val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size

train_dataset = torch.utils.data.Subset(full_dataset, range(train_size))
val_dataset_raw = datasets.ImageFolder(data_dir, transform=transform_val)
val_dataset = torch.utils.data.Subset(val_dataset_raw, range(train_size, train_size + val_size))

# Share class mapping (CRITICAL)
train_dataset.classes = full_dataset.classes
train_dataset.class_to_idx = full_dataset.class_to_idx
val_dataset.classes = full_dataset.classes
val_dataset.class_to_idx = full_dataset.class_to_i

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Model with dropout in classifier
model = models.efficientnet_b0(pretrained=True)
in_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(in_features, num_classes)
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")


Epoch 1/3 - Training: 100%|██████████| 138/138 [17:40<00:00,  7.68s/it]


Epoch [1/3] Train Loss: 1.4235 Train Acc: 61.60%


Epoch 1/3 - Validation: 100%|██████████| 35/35 [00:56<00:00,  1.62s/it]


         Val Loss: 4.9919 Val Acc: 10.29%


Epoch 2/3 - Training: 100%|██████████| 138/138 [17:50<00:00,  7.75s/it]


Epoch [2/3] Train Loss: 0.4827 Train Acc: 86.31%


Epoch 2/3 - Validation: 100%|██████████| 35/35 [00:57<00:00,  1.65s/it]


         Val Loss: 5.7971 Val Acc: 10.38%


Epoch 3/3 - Training: 100%|██████████| 138/138 [17:46<00:00,  7.73s/it]


Epoch [3/3] Train Loss: 0.3065 Train Acc: 90.65%


Epoch 3/3 - Validation: 100%|██████████| 35/35 [00:54<00:00,  1.55s/it]

         Val Loss: 6.6921 Val Acc: 10.38%





# Step 6

Adaptive Learning Rate, Early Stopping & Model Saving

* Adaptive Learning Rate: Uses ReduceLROnPlateau, which automatically lowers the learning rate if the validation loss plateaus.
* Early Stopping: Stops training if validation accuracy hasn’t improved for a set number of epochs (patience), preventing overfitting.
* Model Saving: Stores the weights of the best model (with highest validation accuracy).
* Regularization & Augmentation: Retains advanced transformations, dropout, and weight decay from Step 5.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm

data_dir = "/content/datasets"
image_size = 224
batch_size = 32
num_epochs = 50             # More epochs (we now stop early if no improvement)
learning_rate = 1e-4
weight_decay = 1e-4
patience = 10               # Early stopping patience

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    transforms.ToTensor(),
])

transform_val = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

full_dataset = datasets.ImageFolder(data_dir, transform=transform_train)
num_classes = len(full_dataset.classes)

val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size

train_dataset = torch.utils.data.Subset(full_dataset, range(train_size))
val_dataset_raw = datasets.ImageFolder(data_dir, transform=transform_val)
val_dataset = torch.utils.data.Subset(val_dataset_raw, range(train_size, train_size + val_size))

# Share class mapping (CRITICAL)
train_dataset.classes = full_dataset.classes
train_dataset.class_to_idx = full_dataset.class_to_idx
val_dataset.classes = full_dataset.classes
val_dataset.class_to_idx = full_dataset.class_to_i

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

model = models.efficientnet_b0(pretrained=True)
in_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(in_features, num_classes)
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

best_val_acc = 0.0
epochs_no_improve = 0
best_model_path = "best_model.pth"

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_acc = 100.0 * correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_loss:.4f} Train Acc: {train_acc:.2f}%")

    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100.0 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    print(f"         Val Loss: {avg_val_loss:.4f} Val Acc: {val_acc:.2f}%")

    scheduler.step(avg_val_loss)  # Adjust learning rate based on val loss

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        epochs_no_improve = 0
        torch.save(model.state_dict(), best_model_path)
        print("New best model saved!")
    else:
        epochs_no_improve += 1
        if epochs_no_improve > patience:
            print("Early stopping triggered!")
            break


Epoch 1/50 - Training: 100%|██████████| 138/138 [17:17<00:00,  7.52s/it]


Epoch [1/50] Train Loss: 1.4658 Train Acc: 60.83%


Epoch 1/50 - Validation: 100%|██████████| 35/35 [00:54<00:00,  1.55s/it]


         Val Loss: 4.7976 Val Acc: 10.38%
New best model saved!


Epoch 2/50 - Training: 100%|██████████| 138/138 [17:31<00:00,  7.62s/it]


Epoch [2/50] Train Loss: 0.4928 Train Acc: 85.85%


Epoch 2/50 - Validation: 100%|██████████| 35/35 [00:54<00:00,  1.57s/it]


         Val Loss: 6.0092 Val Acc: 10.38%


Epoch 3/50 - Training: 100%|██████████| 138/138 [17:28<00:00,  7.60s/it]


Epoch [3/50] Train Loss: 0.3211 Train Acc: 90.40%


Epoch 3/50 - Validation: 100%|██████████| 35/35 [00:56<00:00,  1.61s/it]


         Val Loss: 6.6526 Val Acc: 10.38%


Epoch 4/50 - Training: 100%|██████████| 138/138 [17:41<00:00,  7.69s/it]


Epoch [4/50] Train Loss: 0.2385 Train Acc: 92.68%


Epoch 4/50 - Validation: 100%|██████████| 35/35 [00:53<00:00,  1.54s/it]


         Val Loss: 7.3696 Val Acc: 10.38%


Epoch 5/50 - Training: 100%|██████████| 138/138 [17:37<00:00,  7.67s/it]


Epoch [5/50] Train Loss: 0.1826 Train Acc: 94.54%


Epoch 5/50 - Validation: 100%|██████████| 35/35 [00:53<00:00,  1.53s/it]


         Val Loss: 7.6812 Val Acc: 10.38%


Epoch 6/50 - Training: 100%|██████████| 138/138 [17:43<00:00,  7.71s/it]


Epoch [6/50] Train Loss: 0.1405 Train Acc: 95.38%


Epoch 6/50 - Validation: 100%|██████████| 35/35 [00:55<00:00,  1.58s/it]


         Val Loss: 7.3781 Val Acc: 10.38%


Epoch 7/50 - Training: 100%|██████████| 138/138 [17:42<00:00,  7.70s/it]


Epoch [7/50] Train Loss: 0.1214 Train Acc: 96.25%


Epoch 7/50 - Validation: 100%|██████████| 35/35 [00:54<00:00,  1.57s/it]


         Val Loss: 7.7385 Val Acc: 10.38%


Epoch 8/50 - Training: 100%|██████████| 138/138 [17:34<00:00,  7.64s/it]


Epoch [8/50] Train Loss: 0.0878 Train Acc: 97.43%


Epoch 8/50 - Validation: 100%|██████████| 35/35 [00:53<00:00,  1.54s/it]


         Val Loss: 7.9762 Val Acc: 10.38%


Epoch 9/50 - Training: 100%|██████████| 138/138 [17:35<00:00,  7.65s/it]


Epoch [9/50] Train Loss: 0.0737 Train Acc: 97.82%


Epoch 9/50 - Validation: 100%|██████████| 35/35 [00:55<00:00,  1.59s/it]


         Val Loss: 8.2179 Val Acc: 10.38%


Epoch 10/50 - Training: 100%|██████████| 138/138 [17:35<00:00,  7.65s/it]


Epoch [10/50] Train Loss: 0.0664 Train Acc: 98.25%


Epoch 10/50 - Validation: 100%|██████████| 35/35 [00:55<00:00,  1.57s/it]


         Val Loss: 8.6251 Val Acc: 10.38%


Epoch 11/50 - Training:  56%|█████▌    | 77/138 [09:56<07:34,  7.45s/it]

# Step 7

In [None]:
"""
Improved Pest Classification Training Pipeline
===========================================

Modern training pipeline using EfficientNet-B0 with comprehensive validation,
agricultural-specific augmentations, and uncertainty quantification.
"""

import os
import sys
import json
import time
import random
import logging
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Union
from collections import defaultdict

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Try to import ML dependencies
try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
    from torchvision import transforms, models
    from torchvision.datasets import ImageFolder
    import torchvision.transforms.functional as TF
    from PIL import Image
    ML_AVAILABLE = True
    logger.info(f"Full ML dependencies loaded successfully - PyTorch {torch.__version__}")

    # Check PyTorch version for compatibility
    torch_version = tuple(map(int, torch.__version__.split('.')[:2]))
    if torch_version < (1, 8):
        logger.warning(f"PyTorch version {torch.__version__} is quite old. Consider upgrading for best compatibility.")

except ImportError as e:
    ML_AVAILABLE = False
    logger.error(f"ML dependencies not available: {e}")
    print("Please install: pip install torch torchvision matplotlib seaborn scikit-learn")
    sys.exit(1)


class AgriculturalAugmentations:
    """Agricultural-specific data augmentations for pest images."""

    def __init__(self, image_size: int = 224):
        self.image_size = image_size

    def get_train_transforms(self):
        """Get training augmentations optimized for pest imagery."""
        return transforms.Compose([
            transforms.Resize((self.image_size + 32, self.image_size + 32)),
            transforms.RandomCrop(self.image_size),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.3),  # Pests can be in any orientation
            transforms.RandomRotation(degrees=30, fill=0),
            transforms.ColorJitter(
                brightness=0.3,    # Agricultural lighting varies
                contrast=0.3,      # Different background contrasts
                saturation=0.2,    # Natural color variations
                hue=0.1           # Slight hue shifts
            ),
            transforms.RandomApply([
                transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0))
            ], p=0.3),
            transforms.RandomApply([
                transforms.RandomPerspective(distortion_scale=0.2, p=0.5)
            ], p=0.3),
            # Convert to tensor and normalize
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],  # ImageNet pre-trained mean
                std=[0.229, 0.224, 0.225]   # ImageNet pre-trained std
            ),
            # Additional agricultural-specific augmentations
            transforms.RandomApply([
                self._add_dirt_spots
            ], p=0.2),
        ])

    def get_val_transforms(self):
        """Get validation transforms - minimal processing."""
        return transforms.Compose([
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def _add_dirt_spots(self, tensor):
        """Add random dirt spots to simulate real agricultural conditions."""
        if random.random() < 0.5:
            # Add small dark spots
            num_spots = random.randint(1, 3)
            for _ in range(num_spots):
                x = random.randint(0, tensor.shape[1] - 5)
                y = random.randint(0, tensor.shape[2] - 5)
                spot_size = random.randint(2, 4)
                tensor[:, x:x+spot_size, y:y+spot_size] *= random.uniform(0.3, 0.7)
        return tensor


class ImprovedPestDataset(Dataset):
    """Enhanced dataset class with better handling of pest images."""

    def __init__(self, data_dir: str, transform=None, class_mapping: Dict = None):
        self.data_dir = Path(data_dir)
        self.transform = transform
        self.samples = []
        self.classes = []
        self.class_to_idx = {}

        # Build dataset
        self._build_dataset(class_mapping)

    def _build_dataset(self, class_mapping: Dict = None):
        """Build dataset with proper class mapping."""
        # Get all pest directories
        pest_dirs = [d for d in self.data_dir.iterdir() if d.is_dir()]

        if class_mapping and isinstance(class_mapping, dict):
            # Check if it's a proper class mapping with 'classes' and 'class_to_idx'
            if 'classes' in class_mapping and 'class_to_idx' in class_mapping:
                self.classes = class_mapping['classes']
                self.class_to_idx = class_mapping['class_to_idx']
            elif isinstance(class_mapping, dict) and all(isinstance(v, int) for v in class_mapping.values()):
                # Direct mapping format {class_name: index}
                self.classes = list(class_mapping.keys())
                self.class_to_idx = class_mapping
            else:
                # Fallback to auto-detect
                self.classes = sorted([d.name for d in pest_dirs])
                self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        else:
            # Auto-detect classes
            self.classes = sorted([d.name for d in pest_dirs])
            self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}

        # Build samples list
        for pest_dir in pest_dirs:
            if pest_dir.name not in self.class_to_idx:
                continue

            class_idx = self.class_to_idx[pest_dir.name]

            # Get all image files
            image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
            image_files = []
            for ext in image_extensions:
                image_files.extend(list(pest_dir.glob(ext)))

            # Add to samples
            for img_path in image_files:
                self.samples.append((str(img_path), class_idx))

        logger.info(f"Dataset built: {len(self.samples)} samples, {len(self.classes)} classes")
        logger.info(f"Classes: {self.classes}")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, class_idx = self.samples[idx]

        # Load image
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            logger.warning(f"Failed to load image {img_path}: {e}")
            # Return a black image as fallback
            image = Image.new('RGB', (224, 224), (0, 0, 0))

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, class_idx

    def get_class_distribution(self):
        """Get distribution of classes in dataset."""
        class_counts = defaultdict(int)
        for _, class_idx in self.samples:
            class_counts[class_idx] += 1

        return dict(class_counts)


class EfficientNetPestClassifier(nn.Module):
    """EfficientNet-B0 based pest classifier with uncertainty estimation."""

    def __init__(self, num_classes: int, dropout_rate: float = 0.3):
        super().__init__()

        # Load pre-trained EfficientNet-B0 (compatible with different PyTorch versions)
        try:
            # New style (PyTorch 0.13+)
            self.backbone = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        except (AttributeError, TypeError):
            # Old style (PyTorch < 0.13)
            self.backbone = models.efficientnet_b0(pretrained=True)

        # Replace classifier head
        in_features = self.backbone.classifier[1].in_features
        self.backbone.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(in_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate / 2),
            nn.Linear(512, num_classes)
        )

        # For uncertainty estimation - add temperature scaling
        self.temperature = nn.Parameter(torch.ones(1))

    def forward(self, x):
        logits = self.backbone(x)
        return logits

    def forward_with_temperature(self, x):
        """Forward pass with temperature scaling for calibration."""
        logits = self.backbone(x)
        return logits / self.temperature


class ImprovedTrainer:
    """Modern training pipeline with comprehensive validation."""

    def __init__(self, data_dir: str, output_dir: str = "models/improved"):
        self.data_dir = Path(data_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

        # Training configuration
        self.config = {
            'image_size': 224,
            'batch_size': 32,
            'num_epochs': 100,
            'learning_rate': 1e-4,
            'weight_decay': 1e-4,
            'patience': 15,
            'min_delta': 1e-4,
            'num_folds': 5,
            'device': 'cuda' if torch.cuda.is_available() else 'cpu'
        }

        # Initialize components
        self.augmentations = AgriculturalAugmentations(self.config['image_size'])
        self.device = torch.device(self.config['device'])

        logger.info(f"Training on device: {self.device}")

    def prepare_data(self):
        """Prepare dataset with stratified splits."""
        logger.info("Preparing dataset...")

        # Load full dataset
        dataset = ImprovedPestDataset(
            self.data_dir,
            transform=self.augmentations.get_val_transforms()  # No augmentation for splitting
        )

        if len(dataset) == 0:
            raise ValueError(f"No images found in {self.data_dir}")

        # Get class distribution
        class_dist = dataset.get_class_distribution()
        logger.info(f"Class distribution: {class_dist}")

        # Check for class imbalance
        min_samples = min(class_dist.values())
        max_samples = max(class_dist.values())
        imbalance_ratio = max_samples / min_samples

        if imbalance_ratio > 10:
            logger.warning(f"Severe class imbalance detected (ratio: {imbalance_ratio:.1f})")

        # Store dataset info
        self.num_classes = len(dataset.classes)
        self.class_names = dataset.classes
        self.class_to_idx = dataset.class_to_idx

        # Save class mapping
        class_mapping = {
            'classes': self.class_names,
            'class_to_idx': self.class_to_idx,
            'num_classes': self.num_classes
        }

        with open(self.output_dir / 'class_mapping.json', 'w') as f:
            json.dump(class_mapping, f, indent=2)

        return dataset, class_dist

    def create_weighted_sampler(self, dataset, class_dist):
        """Create weighted sampler to handle class imbalance."""
        # Calculate weights for each class (inverse frequency)
        total_samples = len(dataset)
        class_weights = {}

        for class_idx, count in class_dist.items():
            class_weights[class_idx] = total_samples / (len(class_dist) * count)

        # Create sample weights
        sample_weights = []
        for _, class_idx in dataset.samples:
            sample_weights.append(class_weights[class_idx])

        return WeightedRandomSampler(
            weights=sample_weights,
            num_samples=len(sample_weights),
            replacement=True
        )

    def train_fold(self, train_dataset, val_dataset, fold_num: int):
        """Train a single fold."""
        logger.info(f"Training fold {fold_num + 1}/{self.config['num_folds']}")

        # Create data loaders (reduced workers for stability on CPU)
        num_workers = 2 if self.device.type == 'cuda' else 0  # 0 workers on CPU to avoid issues

        train_loader = DataLoader(
            train_dataset,
            batch_size=self.config['batch_size'],
            shuffle=True,
            num_workers=num_workers,
            pin_memory=True if self.device.type == 'cuda' else False
        )

        val_loader = DataLoader(
            val_dataset,
            batch_size=self.config['batch_size'],
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True if self.device.type == 'cuda' else False
        )

        # Initialize model
        model = EfficientNetPestClassifier(
            num_classes=self.num_classes,
            dropout_rate=0.3
        ).to(self.device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(
            model.parameters(),
            lr=self.config['learning_rate'],
            weight_decay=self.config['weight_decay']
        )

        # Learning rate scheduler (compatible with different PyTorch versions)
        try:
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',
                factor=0.5,
                patience=5,
                verbose=True
            )
        except TypeError:
            # Older PyTorch versions don't support verbose parameter
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='min',
                factor=0.5,
                patience=5
            )

        # Training loop
        best_val_acc = 0.0
        patience_counter = 0
        train_losses = []
        val_losses = []
        val_accuracies = []

        for epoch in range(self.config['num_epochs']):
            # Training phase
            model.train()
            running_loss = 0.0
            correct_predictions = 0
            total_predictions = 0

            for batch_idx, (images, labels) in enumerate(train_loader):
                images, labels = images.to(self.device), labels.to(self.device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_predictions += labels.size(0)
                correct_predictions += (predicted == labels).sum().item()

                if batch_idx % 50 == 0:
                    logger.info(f'Fold {fold_num+1}, Epoch {epoch+1}, Batch {batch_idx}, '
                              f'Loss: {loss.item():.4f}')

            # Validation phase
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()

            # Calculate metrics
            train_acc = 100.0 * correct_predictions / total_predictions
            val_acc = 100.0 * val_correct / val_total
            avg_train_loss = running_loss / len(train_loader)
            avg_val_loss = val_loss / len(val_loader)

            train_losses.append(avg_train_loss)
            val_losses.append(avg_val_loss)
            val_accuracies.append(val_acc)

            logger.info(f'Fold {fold_num+1}, Epoch {epoch+1}: '
                       f'Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%, '
                       f'Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%')

            # Learning rate scheduling
            scheduler.step(avg_val_loss)

            # Early stopping check
            if val_acc > best_val_acc + self.config['min_delta']:
                best_val_acc = val_acc
                patience_counter = 0

                # Save best model
                torch.save({
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': epoch,
                    'val_acc': val_acc,
                    'fold': fold_num,
                    'class_mapping': {
                        'classes': self.class_names,
                        'class_to_idx': self.class_to_idx,
                        'num_classes': self.num_classes
                    }
                }, self.output_dir / f'best_model_fold_{fold_num}.pth')
            else:
                patience_counter += 1

            if patience_counter > self.config['patience']:
                logger.info(f'Early stopping at epoch {epoch+1}')
                break

        return {
            'best_val_acc': best_val_acc,
            'train_losses': train_losses,
            'val_losses': val_losses,
            'val_accuracies': val_accuracies,
            'final_model': model
        }

    def cross_validate(self):
        """Perform cross-validation training."""
        logger.info("Starting cross-validation training...")

        # Prepare data
        full_dataset, class_dist = self.prepare_data()

        # Extract labels for stratification
        labels = [sample[1] for sample in full_dataset.samples]

        # Stratified K-Fold
        skf = StratifiedKFold(n_splits=self.config['num_folds'], shuffle=True, random_state=42)

        fold_results = []

        for fold_num, (train_idx, val_idx) in enumerate(skf.split(range(len(full_dataset)), labels)):
            # Create fold datasets
            train_samples = [full_dataset.samples[i] for i in train_idx]
            val_samples = [full_dataset.samples[i] for i in val_idx]

            # Create fold-specific datasets
            train_dataset = ImprovedPestDataset(
                self.data_dir,
                transform=self.augmentations.get_train_transforms(),
                class_mapping={cls: idx for cls, idx in self.class_to_idx.items()}
            )
            train_dataset.samples = train_samples

            val_dataset = ImprovedPestDataset(
                self.data_dir,
                transform=self.augmentations.get_val_transforms(),
                class_mapping={cls: idx for cls, idx in self.class_to_idx.items()}
            )
            val_dataset.samples = val_samples

            # Train fold
            fold_result = self.train_fold(train_dataset, val_dataset, fold_num)
            fold_results.append(fold_result)

        # Calculate cross-validation metrics
        cv_accuracies = [result['best_val_acc'] for result in fold_results]
        mean_cv_acc = np.mean(cv_accuracies)
        std_cv_acc = np.std(cv_accuracies)

        logger.info(f"Cross-validation complete!")
        logger.info(f"Mean CV Accuracy: {mean_cv_acc:.2f}% ± {std_cv_acc:.2f}%")
        logger.info(f"Individual fold accuracies: {cv_accuracies}")

        # Save results
        cv_results = {
            'mean_accuracy': mean_cv_acc,
            'std_accuracy': std_cv_acc,
            'fold_accuracies': cv_accuracies,
            'config': self.config,
            'class_mapping': {
                'classes': self.class_names,
                'class_to_idx': self.class_to_idx,
                'num_classes': self.num_classes
            }
        }

        with open(self.output_dir / 'cv_results.json', 'w') as f:
            json.dump(cv_results, f, indent=2)

        return cv_results, fold_results


def main():
    """Main training function."""
    # Set random seeds for reproducibility
    random.seed(42)
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)

    print("Improved Pest Classification Training Pipeline")
    print("=" * 60)

    # Check dataset
    data_dir = "/content/datasets"
    if not Path(data_dir).exists():
        print(f"Dataset directory '{data_dir}' not found!")
        print("Please ensure the Agricultural Pests Image Dataset is available.")
        return

    # Initialize trainer
    trainer = ImprovedTrainer(data_dir)

    # Start training
    start_time = time.time()
    cv_results, fold_results = trainer.cross_validate()
    training_time = time.time() - start_time

    print(f"\nTraining completed in {training_time:.2f} seconds!")
    print(f"Final Results:")
    print(f"   Mean CV Accuracy: {cv_results['mean_accuracy']:.2f}% ± {cv_results['std_accuracy']:.2f}%")
    print(f"   Best Single Fold: {max(cv_results['fold_accuracies']):.2f}%")
    print(f"   Models saved in: {trainer.output_dir}")

    return cv_results, fold_results


if __name__ == "__main__":
    if not ML_AVAILABLE:
        print("ML dependencies not available. Please install required packages.")
        sys.exit(1)

    main()

Improved Pest Classification Training Pipeline




KeyboardInterrupt: 