# Setup

In [1]:
import os
import torch
import torch.nn as nn
import numpy as np
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from torch.cuda.amp import GradScaler, autocast
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.nn.functional import softmax
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  warn(


# Data Loader

In [2]:
# Transforms
transform_inception = transforms.Compose([
    transforms.Resize(320),
    transforms.RandomResizedCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_vit = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
# Paths
data_dir = './rose-leaf-disease-dataset/Rose'
train_dir = os.path.join(data_dir, 'train')

# Full datasets
full_dataset_incep = datasets.ImageFolder(train_dir, transform=transform_inception)
full_dataset_vit = datasets.ImageFolder(train_dir, transform=transform_vit)

# Shared split
dataset_size = len(full_dataset_incep)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size
indices = torch.randperm(dataset_size)
train_indices, val_indices = indices[:train_size], indices[train_size:]

# Subsets
train_dataset_incep = Subset(full_dataset_incep, train_indices)
val_dataset_incep = Subset(full_dataset_incep, val_indices)

train_dataset_vit = Subset(full_dataset_vit, train_indices)
val_dataset_vit = Subset(full_dataset_vit, val_indices)

# Dataloaders
train_loader_incep = DataLoader(train_dataset_incep, batch_size=32, shuffle=True)
val_loader_incep = DataLoader(val_dataset_incep, batch_size=32, shuffle=False)

train_loader_vit = DataLoader(train_dataset_vit, batch_size=32, shuffle=True)
val_loader_vit = DataLoader(val_dataset_vit, batch_size=32, shuffle=False)

In [6]:
# Class weights
labels = [y for _, y in full_dataset_incep]
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# Number of classes
num_classes = len(full_dataset_incep.classes)

# Inception V3

In [7]:
# InceptionV3
model_incep = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)
model_incep.fc = nn.Linear(model_incep.fc.in_features, num_classes)
model_incep.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer_incep = torch.optim.Adam(model_incep.parameters(), lr=0.001)
scheduler_incep = torch.optim.lr_scheduler.StepLR(optimizer_incep, step_size=10, gamma=0.1)
scaler_incep = GradScaler()

best_val_acc_incep = 0

for epoch in range(30):
    model_incep.train()
    train_loss = 0
    for images, labels in train_loader_incep:
        images, labels = images.to(device), labels.to(device)
        optimizer_incep.zero_grad()
        with autocast():
            outputs = model_incep(images)
            if isinstance(outputs, tuple):
                outputs = outputs[0]
            loss = criterion(outputs, labels)
        scaler_incep.scale(loss).backward()
        scaler_incep.step(optimizer_incep)
        scaler_incep.update()
        train_loss += loss.item() * images.size(0)
    train_loss /= len(train_loader_incep.dataset)

    # Validation
    model_incep.eval()
    val_preds, val_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader_incep:
            images, labels = images.to(device), labels.to(device)
            outputs = model_incep(images)
            if isinstance(outputs, tuple):
                outputs = outputs[0]
            preds = torch.argmax(outputs, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    val_acc = accuracy_score(val_labels, val_preds)

    if val_acc > best_val_acc_incep:
        best_val_acc_incep = val_acc
        torch.save(model_incep.state_dict(), 'best_model_inception.pth')

    scheduler_incep.step()

    print(f"[Inception] Epoch {epoch+1}/30 | Train Loss: {train_loss:.4f} | Val Acc: {val_acc:.4f}")

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /home/hice1/slee3593/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


100%|██████████| 104M/104M [00:03<00:00, 34.5MB/s] 
  scaler_incep = GradScaler()
  with autocast():


[Inception] Epoch 1/30 | Train Loss: 0.4161 | Val Acc: 0.8265


  with autocast():


[Inception] Epoch 2/30 | Train Loss: 0.3005 | Val Acc: 0.8755


  with autocast():


[Inception] Epoch 3/30 | Train Loss: 0.2106 | Val Acc: 0.8850


  with autocast():


[Inception] Epoch 4/30 | Train Loss: 0.1918 | Val Acc: 0.9456


  with autocast():


[Inception] Epoch 5/30 | Train Loss: 0.1664 | Val Acc: 0.9272


  with autocast():


[Inception] Epoch 6/30 | Train Loss: 0.1580 | Val Acc: 0.9456


  with autocast():


[Inception] Epoch 7/30 | Train Loss: 0.1486 | Val Acc: 0.9633


  with autocast():


[Inception] Epoch 8/30 | Train Loss: 0.1220 | Val Acc: 0.9633


  with autocast():


[Inception] Epoch 9/30 | Train Loss: 0.1288 | Val Acc: 0.9707


  with autocast():


[Inception] Epoch 10/30 | Train Loss: 0.1177 | Val Acc: 0.9687


  with autocast():


[Inception] Epoch 11/30 | Train Loss: 0.0836 | Val Acc: 0.9810


  with autocast():


[Inception] Epoch 12/30 | Train Loss: 0.0670 | Val Acc: 0.9803


  with autocast():


[Inception] Epoch 13/30 | Train Loss: 0.0579 | Val Acc: 0.9782


  with autocast():


[Inception] Epoch 14/30 | Train Loss: 0.0562 | Val Acc: 0.9782


  with autocast():


[Inception] Epoch 15/30 | Train Loss: 0.0565 | Val Acc: 0.9884


  with autocast():


[Inception] Epoch 16/30 | Train Loss: 0.0556 | Val Acc: 0.9857


  with autocast():


[Inception] Epoch 17/30 | Train Loss: 0.0523 | Val Acc: 0.9864


  with autocast():


[Inception] Epoch 18/30 | Train Loss: 0.0485 | Val Acc: 0.9864


  with autocast():


[Inception] Epoch 19/30 | Train Loss: 0.0473 | Val Acc: 0.9871


  with autocast():


[Inception] Epoch 20/30 | Train Loss: 0.0521 | Val Acc: 0.9864


  with autocast():


[Inception] Epoch 21/30 | Train Loss: 0.0464 | Val Acc: 0.9864


  with autocast():


[Inception] Epoch 22/30 | Train Loss: 0.0464 | Val Acc: 0.9837


  with autocast():


[Inception] Epoch 23/30 | Train Loss: 0.0394 | Val Acc: 0.9891


  with autocast():


[Inception] Epoch 24/30 | Train Loss: 0.0409 | Val Acc: 0.9823


  with autocast():


[Inception] Epoch 25/30 | Train Loss: 0.0435 | Val Acc: 0.9857


  with autocast():


[Inception] Epoch 26/30 | Train Loss: 0.0405 | Val Acc: 0.9857


  with autocast():


[Inception] Epoch 27/30 | Train Loss: 0.0361 | Val Acc: 0.9878


  with autocast():


[Inception] Epoch 28/30 | Train Loss: 0.0453 | Val Acc: 0.9837


  with autocast():


[Inception] Epoch 29/30 | Train Loss: 0.0457 | Val Acc: 0.9891


  with autocast():


[Inception] Epoch 30/30 | Train Loss: 0.0376 | Val Acc: 0.9857


In [8]:
model_incep.load_state_dict(torch.load('best_model_inception.pth'))
model_incep.eval()

val_preds, val_labels = [], []
with torch.no_grad():
    for images, labels in val_loader_incep:
        images, labels = images.to(device), labels.to(device)
        outputs = model_incep(images)
        if isinstance(outputs, tuple):
            outputs = outputs[0]
        preds = torch.argmax(outputs, dim=1)
        val_preds.extend(preds.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

print("InceptionV3 Final Accuracy:", accuracy_score(val_labels, val_preds))
print(classification_report(val_labels, val_preds))
print(confusion_matrix(val_labels, val_preds))

InceptionV3 Final Accuracy: 0.9877551020408163
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       496
           1       0.99      0.99      0.99       489
           2       0.98      0.99      0.98       485

    accuracy                           0.99      1470
   macro avg       0.99      0.99      0.99      1470
weighted avg       0.99      0.99      0.99      1470

[[487   0   9]
 [  2 484   3]
 [  0   4 481]]


# ViT

In [9]:
model_vit = models.vit_b_16(weights=models.ViT_B_16_Weights.DEFAULT)
model_vit.heads.head = nn.Linear(model_vit.heads.head.in_features, num_classes)
model_vit.to(device)

# Freeze backbone initially
for param in model_vit.parameters():
    param.requires_grad = False
for param in model_vit.heads.parameters():
    param.requires_grad = True

optimizer_vit = AdamW(model_vit.parameters(), lr=5e-5, weight_decay=0.01)
scheduler_vit = CosineAnnealingWarmRestarts(optimizer_vit, T_0=10, T_mult=2)
scaler_vit = GradScaler()
criterion_vit = nn.CrossEntropyLoss(weight=class_weights)

best_val_acc_vit = 0
freeze_epochs = 5

for epoch in range(30):
    if epoch == freeze_epochs:
        # Unfreeze backbone
        for param in model_vit.parameters():
            param.requires_grad = True
        optimizer_vit = AdamW(model_vit.parameters(), lr=5e-5, weight_decay=0.01)
        scheduler_vit = CosineAnnealingWarmRestarts(optimizer_vit, T_0=10, T_mult=2)
        print("Unfroze ViT backbone.")

    model_vit.train()
    train_loss = 0
    for images, labels in train_loader_vit:
        images, labels = images.to(device), labels.to(device)
        optimizer_vit.zero_grad()
        with autocast():
            outputs = model_vit(images)
            loss = criterion_vit(outputs, labels)
        scaler_vit.scale(loss).backward()
        scaler_vit.step(optimizer_vit)
        scaler_vit.update()
        train_loss += loss.item() * images.size(0)
    train_loss /= len(train_loader_vit.dataset)

    # Validation
    model_vit.eval()
    val_preds, val_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader_vit:
            images, labels = images.to(device), labels.to(device)
            outputs = model_vit(images)
            preds = torch.argmax(outputs, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    val_acc = accuracy_score(val_labels, val_preds)

    if val_acc > best_val_acc_vit:
        best_val_acc_vit = val_acc
        torch.save(model_vit.state_dict(), 'best_model_vit.pth')

    scheduler_vit.step()

    print(f"[ViT] Epoch {epoch+1}/30 | Train Loss: {train_loss:.4f} | Val Acc: {val_acc:.4f}")

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /home/hice1/slee3593/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth


100%|██████████| 330M/330M [00:00<00:00, 449MB/s] 
  scaler_vit = GradScaler()
  with autocast():


[ViT] Epoch 1/30 | Train Loss: 0.9006 | Val Acc: 0.8143


  with autocast():


[ViT] Epoch 2/30 | Train Loss: 0.5860 | Val Acc: 0.8878


  with autocast():


[ViT] Epoch 3/30 | Train Loss: 0.4456 | Val Acc: 0.9211


  with autocast():


[ViT] Epoch 4/30 | Train Loss: 0.3713 | Val Acc: 0.9272


  with autocast():


[ViT] Epoch 5/30 | Train Loss: 0.3267 | Val Acc: 0.9272
Unfroze ViT backbone.


  with autocast():


[ViT] Epoch 6/30 | Train Loss: 0.1009 | Val Acc: 0.9735


  with autocast():


[ViT] Epoch 7/30 | Train Loss: 0.0629 | Val Acc: 0.9796


  with autocast():


[ViT] Epoch 8/30 | Train Loss: 0.0516 | Val Acc: 0.9918


  with autocast():


[ViT] Epoch 9/30 | Train Loss: 0.0303 | Val Acc: 0.9925


  with autocast():


[ViT] Epoch 10/30 | Train Loss: 0.0285 | Val Acc: 0.9796


  with autocast():


[ViT] Epoch 11/30 | Train Loss: 0.0279 | Val Acc: 0.9905


  with autocast():


[ViT] Epoch 12/30 | Train Loss: 0.0242 | Val Acc: 0.9925


  with autocast():


[ViT] Epoch 13/30 | Train Loss: 0.0181 | Val Acc: 0.9973


  with autocast():


[ViT] Epoch 14/30 | Train Loss: 0.0149 | Val Acc: 0.9952


  with autocast():


[ViT] Epoch 15/30 | Train Loss: 0.0161 | Val Acc: 0.9918


  with autocast():


[ViT] Epoch 16/30 | Train Loss: 0.0560 | Val Acc: 0.9762


  with autocast():


[ViT] Epoch 17/30 | Train Loss: 0.0566 | Val Acc: 0.9782


  with autocast():


[ViT] Epoch 18/30 | Train Loss: 0.0352 | Val Acc: 0.9884


  with autocast():


[ViT] Epoch 19/30 | Train Loss: 0.0373 | Val Acc: 0.9660


  with autocast():


[ViT] Epoch 20/30 | Train Loss: 0.0389 | Val Acc: 0.9646


  with autocast():


[ViT] Epoch 21/30 | Train Loss: 0.0295 | Val Acc: 0.9905


  with autocast():


[ViT] Epoch 22/30 | Train Loss: 0.0295 | Val Acc: 0.9871


  with autocast():


[ViT] Epoch 23/30 | Train Loss: 0.0266 | Val Acc: 0.9959


  with autocast():


[ViT] Epoch 24/30 | Train Loss: 0.0201 | Val Acc: 0.9918


  with autocast():


[ViT] Epoch 25/30 | Train Loss: 0.0155 | Val Acc: 0.9925


  with autocast():


[ViT] Epoch 26/30 | Train Loss: 0.0229 | Val Acc: 0.9939


  with autocast():


[ViT] Epoch 27/30 | Train Loss: 0.0155 | Val Acc: 0.9946


  with autocast():


[ViT] Epoch 28/30 | Train Loss: 0.0142 | Val Acc: 0.9946


  with autocast():


[ViT] Epoch 29/30 | Train Loss: 0.0093 | Val Acc: 0.9918


  with autocast():


[ViT] Epoch 30/30 | Train Loss: 0.0099 | Val Acc: 0.9932


In [10]:
model_vit.load_state_dict(torch.load('best_model_vit.pth'))
model_vit.eval()

val_preds_vit, val_labels_vit = [], []
with torch.no_grad():
    for images, labels in val_loader_vit:
        images, labels = images.to(device), labels.to(device)
        outputs = model_vit(images)
        preds = torch.argmax(outputs, dim=1)
        val_preds_vit.extend(preds.cpu().numpy())
        val_labels_vit.extend(labels.cpu().numpy())

print("ViT Final Accuracy:", accuracy_score(val_labels_vit, val_preds_vit))
print(classification_report(val_labels_vit, val_preds_vit))
print(confusion_matrix(val_labels_vit, val_preds_vit))

ViT Final Accuracy: 0.9931972789115646
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       496
           1       1.00      0.98      0.99       489
           2       0.99      1.00      0.99       485

    accuracy                           0.99      1470
   macro avg       0.99      0.99      0.99      1470
weighted avg       0.99      0.99      0.99      1470

[[496   0   0]
 [  5 480   4]
 [  1   0 484]]


# Ensemble

In [11]:
# Load best models
model_incep.load_state_dict(torch.load('best_model_inception.pth'))
model_incep.eval()
model_vit.load_state_dict(torch.load('best_model_vit.pth'))
model_vit.eval()

# Get softmax logits
inception_logits = []
vit_logits = []
true_labels = []

# Inception predictions
with torch.no_grad():
    for images, labels in val_loader_incep:
        images = images.to(device)
        outputs = model_incep(images)
        if isinstance(outputs, tuple):
            outputs = outputs[0]
        inception_logits.append(softmax(outputs, dim=1).cpu())
        true_labels.extend(labels.cpu().numpy())

# ViT predictions
with torch.no_grad():
    for images, _ in val_loader_vit:
        images = images.to(device)
        outputs = model_vit(images)
        vit_logits.append(softmax(outputs, dim=1).cpu())

# Stack
inception_logits = torch.cat(inception_logits, dim=0)
vit_logits = torch.cat(vit_logits, dim=0)

# Weighted ensemble
ensemble_logits = 0.6 * inception_logits + 0.4 * vit_logits
ensemble_preds = torch.argmax(ensemble_logits, dim=1).numpy()

# Evaluation
print("Final Ensemble Accuracy:", accuracy_score(true_labels, ensemble_preds))
print(classification_report(true_labels, ensemble_preds))
print(confusion_matrix(true_labels, ensemble_preds))

Final Ensemble Accuracy: 0.9952380952380953
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       496
           1       0.99      1.00      1.00       489
           2       1.00      0.99      0.99       485

    accuracy                           1.00      1470
   macro avg       1.00      1.00      1.00      1470
weighted avg       1.00      1.00      1.00      1470

[[495   0   1]
 [  0 488   1]
 [  2   3 480]]
