In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

# ---------- CONFIG ----------
DATA_DIR = r"C:\\Users\\New User\\OneDrive\\Desktop\\plant_disease\\data\\New Plant Diseases Dataset(Augmented)\\New Plant Diseases Dataset(Augmented)\\train"
BATCH_SIZE = 32
IMAGE_SIZE = 64
NUM_EPOCHS = 10
LEARNING_RATE = 1e-3
VALID_SPLIT = 0.2
SEED = 42
MODEL_SAVE_PATH = "cnn_plant_classifier.pth"
# ----------------------------

# reproducibility
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# check data path
if not os.path.exists(DATA_DIR):
    raise FileNotFoundError(f"Data directory not found: {DATA_DIR}\nPlease update DATA_DIR to the folder containing class subfolders.")

# transforms (train has light augmentation, test only normalization)
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# load dataset
full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=train_transform)
class_names = full_dataset.classes
num_classes = len(class_names)
print(f"Found {len(full_dataset)} images belonging to {num_classes} classes.")

# split
num_val = int(VALID_SPLIT * len(full_dataset))
num_train = len(full_dataset) - num_val
train_dataset, val_dataset = random_split(full_dataset, [num_train, num_val],
                                         generator=torch.Generator().manual_seed(SEED))
# set val dataset to use test_transform (override transform)
val_dataset.dataset = datasets.ImageFolder(root=DATA_DIR, transform=test_transform)

# DataLoader: set num_workers=0 for Windows / if user has issues, try >0 on Linux
num_workers = 0  # change to 4 if on Linux and it works
pin_memory = True if torch.cuda.is_available() else False

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=num_workers, pin_memory=pin_memory)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory)

# ---------- Model ----------
class CNN_Classification(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Classification, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 64x64
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 32x32
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 16x16
        )
        # adaptive pooling to make it robust to input size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))  # -> 128 * 4 * 4
        self.classifier = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.adaptive_pool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = CNN_Classification(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# optional scheduler:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3,)

# ---------- Helper functions ----------
def evaluate(model, dataloader, device):
    model.eval()
    preds = []
    labels = []
    running_loss = 0.0
    with torch.no_grad():
        for imgs, labs in dataloader:
            imgs = imgs.to(device)
            labs = labs.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labs)
            running_loss += loss.item() * imgs.size(0)
            _, p = torch.max(outputs, 1)
            preds.extend(p.cpu().numpy())
            labels.extend(labs.cpu().numpy())
    model.train()
    avg_loss = running_loss / len(dataloader.dataset)
    acc = accuracy_score(labels, preds)
    return avg_loss, acc, labels, preds

# ---------- Training loop ----------
best_val_acc = 0.0
for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}", unit="batch")
    for imgs, labs in loop:
        imgs = imgs.to(device)
        labs = labs.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labs)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labs.data).item()

        loop.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects / len(train_loader.dataset)

    # validation
    val_loss, val_acc, val_labels, val_preds = evaluate(model, val_loader, device)
    scheduler.step(val_loss)

    print(f"\nEpoch {epoch} summary:")
    print(f"  Train loss: {epoch_loss:.4f}  Train acc: {epoch_acc:.4f}")
    print(f"  Val   loss: {val_loss:.4f}  Val   acc: {val_acc:.4f}")

    # save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'class_names': class_names
        }, MODEL_SAVE_PATH)
        print(f"  Saved best model (val_acc={val_acc:.4f}) -> {MODEL_SAVE_PATH}")

# ---------- Final evaluation ----------
print("\nLoading best model and running final evaluation on validation set...")
checkpoint = torch.load(MODEL_SAVE_PATH, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

val_loss, val_acc, val_labels, val_preds = evaluate(model, val_loader, device)
print(f"Final Val loss: {val_loss:.4f}    Final Val acc: {val_acc:.4f}\n")

# classification report
print("Classification report on Validation set:")
print(classification_report(val_labels, val_preds, target_names=class_names, digits=4, zero_division=0))


Using device: cpu
Found 70295 images belonging to 37 classes.


Epoch 1/10: 100%|██████████| 1758/1758 [40:05<00:00,  1.37s/batch, loss=1.39] 



Epoch 1 summary:
  Train loss: 1.6387  Train acc: 0.5034
  Val   loss: 0.6973  Val   acc: 0.7882
  Saved best model (val_acc=0.7882) -> cnn_plant_classifier.pth


Epoch 2/10: 100%|██████████| 1758/1758 [40:41<00:00,  1.39s/batch, loss=1.51] 



Epoch 2 summary:
  Train loss: 0.9736  Train acc: 0.6924
  Val   loss: 0.5115  Val   acc: 0.8364
  Saved best model (val_acc=0.8364) -> cnn_plant_classifier.pth


Epoch 3/10: 100%|██████████| 1758/1758 [33:52<00:00,  1.16s/batch, loss=0.578]



Epoch 3 summary:
  Train loss: 0.7642  Train acc: 0.7525
  Val   loss: 0.2994  Val   acc: 0.9038
  Saved best model (val_acc=0.9038) -> cnn_plant_classifier.pth


Epoch 4/10: 100%|██████████| 1758/1758 [34:38<00:00,  1.18s/batch, loss=2.85] 



Epoch 4 summary:
  Train loss: 0.6382  Train acc: 0.7938
  Val   loss: 0.2542  Val   acc: 0.9185
  Saved best model (val_acc=0.9185) -> cnn_plant_classifier.pth


Epoch 5/10: 100%|██████████| 1758/1758 [34:54<00:00,  1.19s/batch, loss=0.64] 



Epoch 5 summary:
  Train loss: 0.5499  Train acc: 0.8227
  Val   loss: 0.1942  Val   acc: 0.9365
  Saved best model (val_acc=0.9365) -> cnn_plant_classifier.pth


Epoch 6/10: 100%|██████████| 1758/1758 [31:59<00:00,  1.09s/batch, loss=1.15] 



Epoch 6 summary:
  Train loss: 0.4892  Train acc: 0.8407
  Val   loss: 0.1691  Val   acc: 0.9453
  Saved best model (val_acc=0.9453) -> cnn_plant_classifier.pth


Epoch 7/10: 100%|██████████| 1758/1758 [33:20<00:00,  1.14s/batch, loss=0.107] 



Epoch 7 summary:
  Train loss: 0.4417  Train acc: 0.8591
  Val   loss: 0.1539  Val   acc: 0.9491
  Saved best model (val_acc=0.9491) -> cnn_plant_classifier.pth


Epoch 8/10: 100%|██████████| 1758/1758 [32:55<00:00,  1.12s/batch, loss=0.342] 



Epoch 8 summary:
  Train loss: 0.3963  Train acc: 0.8714
  Val   loss: 0.1289  Val   acc: 0.9562
  Saved best model (val_acc=0.9562) -> cnn_plant_classifier.pth


Epoch 9/10: 100%|██████████| 1758/1758 [32:15<00:00,  1.10s/batch, loss=0.504] 



Epoch 9 summary:
  Train loss: 0.3630  Train acc: 0.8822
  Val   loss: 0.1195  Val   acc: 0.9631
  Saved best model (val_acc=0.9631) -> cnn_plant_classifier.pth


Epoch 10/10: 100%|██████████| 1758/1758 [31:03<00:00,  1.06s/batch, loss=0.185] 



Epoch 10 summary:
  Train loss: 0.3388  Train acc: 0.8898
  Val   loss: 0.1087  Val   acc: 0.9637
  Saved best model (val_acc=0.9637) -> cnn_plant_classifier.pth

Loading best model and running final evaluation on validation set...
Final Val loss: 0.1087    Final Val acc: 0.9637

Classification report on Validation set:
                                                    precision    recall  f1-score   support

                                 Apple___Black_rot     0.9707    0.9954    0.9829       432
                          Apple___Cedar_apple_rust     0.9476    0.9864    0.9666       733
                                   Apple___healthy     0.9724    0.9628    0.9676       403
                               Blueberry___healthy     0.9532    0.9802    0.9665       353
          Cherry_(including_sour)___Powdery_mildew     0.9970    0.9883    0.9926       341
                 Cherry_(including_sour)___healthy     0.9707    0.9785    0.9746       372
Corn_(maize)___Cercospora_leaf_s

In [2]:
from PIL import Image

# ---------- Load and preprocess a single test image ----------
test_image_path = r"C:\Users\New User\OneDrive\Desktop\plant_disease\data\test\test\CornCommonRust2.JPG"

img = Image.open(test_image_path).convert("RGB")  # ensure RGB
img = test_transform(img).unsqueeze(0).to(device)  # apply same transform as validation/test

# ---------- Make prediction ----------
model.eval()
with torch.no_grad():
    output = model(img)
    _, predicted = torch.max(output, 1)

predicted_class = class_names[predicted.item()]
print(f"Predicted class: {predicted_class}")


Predicted class: Corn_(maize)___Common_rust_


In [3]:
from PIL import Image

# ---------- Load and preprocess a single test image ----------
test_image_path = r"C:\Users\New User\OneDrive\Desktop\plant_disease\data\test\test\PotatoEarlyBlight1.JPG"

img = Image.open(test_image_path).convert("RGB")  # ensure RGB
img = test_transform(img).unsqueeze(0).to(device)  # apply same transform as validation/test

# ---------- Make prediction ----------
model.eval()
with torch.no_grad():
    output = model(img)
    _, predicted = torch.max(output, 1)

predicted_class = class_names[predicted.item()]
print(f"Predicted class: {predicted_class}")


Predicted class: Potato___Early_blight


In [4]:
torch.save(model.state_dict(), "cnn_plant_classifier.pth")


In [5]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'class_names': class_names
}, "cnn_plant_classifier.pth")
