### Importing libraries
Necessary python libraries to be used in this project: Pytorch, OpenCV, Numpy.

In [4]:
# Importing PyTorch and torchvision for deep learning
import torch
import torch.nn as nn  
import torch.optim as optim  
import torchvision  
import torchvision.transforms as transforms  

# Importing PIL for image handling
from PIL import Image

# Importing OpenCV for image processing
import cv2

# Importing Numpy for numerical operations
import numpy as np  

# Importing OS and glob for file handling
import os  
import glob  
import shutil 

# Importing dataset utilities from torchvision
from torchvision import datasets, models  
from torch.utils.data import DataLoader, random_split  

### Severity classification
Classifying images in "Powdery Mildew" into two levels namely; "Mild" and "Severe", based on leaf infection percentages.

In [None]:
# Severity thresholds for classification based on infection percentage
SEVERITY_THRESHOLDS = {
    "Mild": (0, 50), # Infection percentage between 0% and 50% is classified as Mild
    "Severe": (50, 100) # Infection percentage between 50% and 100% is classified as Severe
}

def calculate_infection_percentage(image_path):
    """
    Calculates the percentage of infected pixels in an image.
    
    The function converts the image to HSV and applies a color threshold to 
    detect white fungal regions.
    
    Args:
        image_path (str): Path to the image file.
    
    Returns:
        float: Percentage of infected pixels.
    """
    image = cv2.imread(image_path) # Read an image
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert to HSV 

    # Defining the HSV range for detecting infected regions
    lower_white = np.array([0, 0, 160], dtype=np.uint8)
    upper_white = np.array([255, 40, 255], dtype=np.uint8)

    # Binary mask where white regions are detected
    mask = cv2.inRange(hsv, lower_white, upper_white)

    # Calculating total and infected pixels
    total_pixels = mask.size
    infected_pixels = np.count_nonzero(mask)

    # Computing infection percentage
    return (infected_pixels / total_pixels) * 100

def classify_severity(image_path):
    """
    Classifies the severity of Powdery Mildew infection based on infection percentage.

    Args:
        image_path (str): Path to the image file.

    Returns:
        str or None: Returns "Mild" or "Severe" based on thresholds, or None if undefined.
    """
    percentage = calculate_infection_percentage(image_path)
    
    # Determining severity category based on infection percentage
    for severity, (lower, upper) in SEVERITY_THRESHOLDS.items():
        if lower <= percentage < upper:
            return severity
    return None

# Input and output directories
input_directory = r"D:\Computer Science Y4\CS Y4S2\Project II\Deep-Learning-for-Powdery-Mildew-Disease-Detection-in-mango-leaves\MangoLeaf Dataset\Powdery Mildew"
output_directory = r"D:\Computer Science Y4\CS Y4S2\Project II\Deep-Learning-for-Powdery-Mildew-Disease-Detection-in-mango-leaves\MangoLeaf Dataset\Severity_Dataset"

# Subdirectories for each severity category
for severity in SEVERITY_THRESHOLDS.keys():
    os.makedirs(os.path.join(output_directory, severity), exist_ok=True)

# Looping through images in the input directory and classifying them
for filename in os.listdir(input_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        img_path = os.path.join(input_directory, filename)
        severity = classify_severity(img_path)
        if severity:
            # Moving each image to the corresponding severity folder
            shutil.copy(img_path, os.path.join(output_directory, severity, filename))

print("Severity classification complete")

### Augmentation
Oversampling images in under-represented classes using data augmentation, to ensures that each class in the dataset has 5000 images. 

In [3]:
# Defining dataset directory and target class sizes(5000 images)
dataset_dir = r"D:\Computer Science Y4\CS Y4S2\Project II\Deep-Learning-for-Powdery-Mildew-Disease-Detection-in-mango-leaves\MangoLeaf Dataset"
classes = ["Healthy", "Mild", "Severe"]
target_size = 5000  

# Augmentation transformations
augment = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.7, contrast=0.2),
    transforms.RandomHorizontalFlip()
])

# Data augmentation for undersampled classes
for class_name in classes:
    class_dir = os.path.join(dataset_dir, class_name)
    images = glob.glob(os.path.join(class_dir, "*.jpg"))
    current_size = len(images)

    # Augment images to the target sizes
    if current_size < target_size:
        extra_needed = target_size - current_size

        for i in range(extra_needed):
            img_path = images[i % current_size] 
            image = Image.open(img_path)
            
            augmented_image = augment(image)
            new_img_name = os.path.join(class_dir, f"aug_{i+1}.jpg")
            augmented_image.save(new_img_name)

        print(f"Oversampled {class_name} to 5000 images.")

print("Dataset balancing complete! All classes now have 5000 images")

Dataset balancing complete! All classes now have 5000 images.


### Dataset Splitting
Splitting Dataset into Training(70%), Validation(20%) and Testing(10%)

In [5]:
# Dataset directory and batch size
dataset_dir = r'D:\Computer Science Y4\CS Y4S2\Project II\Deep-Learning-for-Powdery-Mildew-Disease-Detection-in-mango-leaves\MangoLeaf Dataset'
batch_size = 32
num_classes = 3 # Number of classification categories

# Transformations: resize, convert to tensor, and normalize
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Loading dataset and applying transformations
dataset = datasets.ImageFolder(root=dataset_dir, transform=data_transforms)

# Splitting dataset into training (70%), validation (20%), and testing (10%)
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Dataset splitting
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Data loaders for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Dataset split complete! \nTrain: {train_size} \nValidation: {val_size} \nTest: {test_size}")

Dataset split complete! 
Train: 10500 
Validation: 3000 
Test: 1500


### Loading Pretrained ResNet model

In [6]:
device = torch.device("cpu")

# Loading pre-trained ResNet18 model with default weights
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Freezing all layers to retain pre-trained features
for param in model.parameters():
    param.requires_grad = False  

# Modifying the fully connected layer to match the number of classes
num_features = model.fc.in_features # Get the number of input features in the FC layer
num_classes = 3  # Number of output classes
model.fc = nn.Linear(num_features, num_classes) # Replace FC layer with a new one

# Move model to the specified device
model = model.to(device)
print("ResNet18 model is ready for transfer learning!")

ResNet18 model is ready for transfer learning!


### Model Training

In [7]:
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 10 # Number of training epochs

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    correct_train = 0
    total_train = 0

    # Training phase
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    train_loss = total_train_loss / len(train_loader) # Computing average training loss
    train_acc = correct_train / total_train # Computing training accuracy

    model.eval()
    total_val_loss = 0
    correct_val = 0
    total_val = 0
    tp = torch.zeros(3, device=device)
    fp = torch.zeros(3, device=device)
    fn = torch.zeros(3, device=device)

    # Validation phase
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            total_val_loss += loss.item()

            preds = outputs.argmax(dim=1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)

            # Compute TP, FP, FN for each class
            for i in range(3):  
                tp[i] += ((preds == i) & (labels == i)).sum()
                fp[i] += ((preds == i) & (labels != i)).sum()
                fn[i] += ((preds != i) & (labels == i)).sum()

    val_loss = total_val_loss / len(val_loader) # Computeing average validation loss
    val_acc = correct_val / total_val # Computing validation accuracy

    # Computing Precision, Recall, and F1-score for each class
    precision = tp / (tp + fp + 1e-8)
    recall = tp / (tp + fn + 1e-8)
    f1_score = 2 * (precision * recall) / (precision + recall + 1e-8)

    # Computing average metrics across all classes
    val_precision = precision.mean().item()
    val_recall = recall.mean().item()
    val_f1 = f1_score.mean().item()

    # Printing training and validation metrics
    print(f"Epoch {epoch+1}:")
    print(f"  Train -> Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")
    print(f"  Val   -> Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1-score: {val_f1:.4f}")

print("Training complete!")

Epoch 1:
  Train -> Loss: 0.7315, Accuracy: 0.7370
  Val   -> Loss: 0.5255, Accuracy: 0.8460, Precision: 0.8449, Recall: 0.8470, F1-score: 0.8441
Epoch 2:
  Train -> Loss: 0.4601, Accuracy: 0.8657
  Val   -> Loss: 0.3906, Accuracy: 0.8793, Precision: 0.8794, Recall: 0.8793, F1-score: 0.8792
Epoch 3:
  Train -> Loss: 0.3709, Accuracy: 0.8858
  Val   -> Loss: 0.3292, Accuracy: 0.9000, Precision: 0.9000, Recall: 0.9001, F1-score: 0.9000
Epoch 4:
  Train -> Loss: 0.3307, Accuracy: 0.8956
  Val   -> Loss: 0.2923, Accuracy: 0.9043, Precision: 0.9044, Recall: 0.9047, F1-score: 0.9044
Epoch 5:
  Train -> Loss: 0.2990, Accuracy: 0.9002
  Val   -> Loss: 0.2814, Accuracy: 0.9003, Precision: 0.9038, Recall: 0.9014, F1-score: 0.9001
Epoch 6:
  Train -> Loss: 0.2789, Accuracy: 0.9044
  Val   -> Loss: 0.2530, Accuracy: 0.9157, Precision: 0.9157, Recall: 0.9158, F1-score: 0.9157
Epoch 7:
  Train -> Loss: 0.2646, Accuracy: 0.9064
  Val   -> Loss: 0.2448, Accuracy: 0.9150, Precision: 0.9154, Recall: 0.9

### Model Evaluation

In [9]:
model.eval()
test_loss, correct, total = 0, 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        test_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

# Computing test accuracy
test_accuracy = 100 * correct / total
print(f"Test Loss: {test_loss/len(test_loader)}, Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.22663296917651563, Test Accuracy: 91.00%


In [10]:
# Saving the trained model's state dictionary
torch.save(model.state_dict(), "resnet18_model.pth")
print("Model saved successfully!")

Model saved successfully!
