Import the libraries

In [46]:
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


CUSTOM DATASET CLASS

In [40]:
class PlantDataset(Dataset):
    """
    Custom Dataset for loading plant leaf images

    How it works:
    1. Scans folders and creates list of (image_path, label) pairs
    2. __len__ returns total number of images
    3. __getitem__ loads one image when requested
    """
    def __init__(self,root_dir,transform=None):
        """
        Args:
            root_dir (str): Path to Train or Test folder
            transform: PyTorch transforms to apply to images
        """
        self.root_dir=Path(root_dir)
        self.transform = transform
        self.images = []
        self.labels = []

        #Map the labels
        self.class_map = {
            'Potato_sick_late': 0,
            'Potato_sick_early': 1,
            'Potato_healthy': 2
        }

        # Scan all folders and collect image paths
        for class_name, label in self.class_map.items():
          class_folder = self.root_dir/ class_name
          if class_folder.exists():
              all_images = (
                    sorted(class_folder.glob("*.jpg")) +    # lowercase
                    sorted(class_folder.glob("*.JPG")) +    # uppercase
                    sorted(class_folder.glob("*.jpeg")) +   # lowercase
                    sorted(class_folder.glob("*.JPEG")) +   # uppercase
                    sorted(class_folder.glob("*.png")) +    # lowercase
                    sorted(class_folder.glob("*.PNG"))      # uppercase
                )

              for img_path in all_images:
                  self.images.append(str(img_path))
                  self.labels.append(label)

        print(f"Loaded {len(self.images)} images from {root_dir}")
        print(f"Class distribution: ", {k: self.labels.count(v) for k, v in self.class_map.items()})

    def __len__(self):
      return len(self.images)

    def __getitem__(self, idx):
      img_path =self.images[idx]
      image = Image.open(img_path).convert('RGB')
      label = self.labels[idx]

      # Apply transformations if specified
      if self.transform:
        image = self.transform(image)

      return image ,label


DATA AUGMENTATION & TRANSFORMS

In [19]:
# Training transforms WITH augmentation
train_transforms = transforms.Compose([
    transforms.Resize((180,180)),
    transforms.RandomHorizontalFlip(p=0.5),     # 50% chance to flip horizontally
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
        transforms.Normalize(                       # Normalize with ImageNet stats
        mean=[0.485, 0.456, 0.406],            # Standard for pre-trained models
        std=[0.229, 0.224, 0.225]
    )
])

test_transforms = transforms.Compose([
    transforms.Resize((180, 180)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


LOAD DATASETS

In [41]:
TRAIN_PATH = '/content/drive/MyDrive/plants/Train'
TEST_PATH = '/content/drive/MyDrive/plants/Test'

train_dataset = PlantDataset(TRAIN_PATH, transform=train_transforms)
test_dataset = PlantDataset(TEST_PATH, transform=test_transforms)

# DataLoader: Batches data, shuffles, enables parallel loading
train_loader = DataLoader(
    train_dataset,
    batch_size=20,
    shuffle = True,
    num_workers = 2
)

test_loader = DataLoader(
    test_dataset,
    batch_size=20,
    shuffle = False,
    num_workers = 2
)

Loaded 2100 images from /content/drive/MyDrive/plants/Train
Class distribution:  {'Potato_sick_late': 700, 'Potato_sick_early': 700, 'Potato_healthy': 700}
Loaded 900 images from /content/drive/MyDrive/plants/Test
Class distribution:  {'Potato_sick_late': 300, 'Potato_sick_early': 300, 'Potato_healthy': 300}


DEFINE CNN ARCHITECTURE


In [43]:
class PlantCNN(nn.Module):
  def __init__(self):
    super(PlantCNN,self).__init__()

    self.conv1 = nn.Conv2d(in_channels=3,out_channels=32, kernel_size=3,padding=1)
    self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3,padding=1)
    self.conv3 = nn.Conv2d(in_channels=64,out_channels=128, kernel_size=3,padding=1)

    self.pool = nn.MaxPool2d(kernel_size=2,stride=2)

    self.relu = nn.ReLU()

    self.dropout = nn.Dropout(0.3)

    # Calculate flattened size: 180 -> 90 -> 45 -> 22 after 3 pooling layers
    # 22 * 22 * 128 = 61952
    self.fc1 = nn.Linear(128 * 22 * 22, 256)
    self.fc2 = nn.Linear(256,3)

  def forward(self,x):
  # Block 1: Conv -> ReLU -> Pool
    x = self.pool(self.RELU(self.conv1(x)))
          # Block 2
    x = self.pool(self.relu(self.conv2(x)))  # (64, 45, 45)

    # Block 3
    x = self.pool(self.relu(self.conv3(x)))  # (128, 22, 22)

    # Flatten for fully connected layers
    x = x.view(x.size(0), -1)  # (batch_size, 128*22*22)

    # Fully connected layers
    x = self.dropout(self.relu(self.fc1(x)))
    x = self.fc2(x)  # Raw logits (no softmax needed for CrossEntropyLoss)

    return x

# Initialize model
model = PlantCNN().to(device)
print(f"\nModel architecture:\n{model}")

# Count trainable parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal trainable parameters: {total_params:,}")


Model architecture:
PlantCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=61952, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=3, bias=True)
)

Total trainable parameters: 15,953,987


LOSS FUNCTION & OPTIMIZER

In [50]:
# CrossEntropyLoss: Combines LogSoftmax + NLLLoss
# Perfect for multi-class classification
criterion = nn.CrossEntropyLoss()

# Adam optimizer: Adaptive learning rate, works well for most cases
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler: Reduces LR when validation loss plateaus
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=3,
)

TRAINING FUNCTION