<a href="https://colab.research.google.com/github/karandeep7/Image-Classification-of-Plant-Diseases-Using-CNN/blob/main/plant_leaf_disease_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Fetching the data from source

###Download datasets from kaggle

In [None]:
!kaggle datasets download -d rizwan123456789/potato-disease-leaf-datasetpld
!kaggle datasets download -d adilmubashirchaudhry/plant-village-dataset
!kaggle datasets download -d mexwell/crop-diseases-classification


Dataset URL: https://www.kaggle.com/datasets/rizwan123456789/potato-disease-leaf-datasetpld
License(s): DbCL-1.0
Downloading potato-disease-leaf-datasetpld.zip to /content
 56% 21.0M/37.4M [00:00<00:00, 95.9MB/s]
100% 37.4M/37.4M [00:00<00:00, 111MB/s] 
Dataset URL: https://www.kaggle.com/datasets/adilmubashirchaudhry/plant-village-dataset
License(s): CC-BY-NC-SA-4.0
Downloading plant-village-dataset.zip to /content
 99% 979M/989M [00:15<00:00, 78.5MB/s]
100% 989M/989M [00:15<00:00, 68.4MB/s]
Dataset URL: https://www.kaggle.com/datasets/mexwell/crop-diseases-classification
License(s): other
Downloading crop-diseases-classification.zip to /content
100% 1.99G/2.00G [00:29<00:00, 75.1MB/s]
100% 2.00G/2.00G [00:29<00:00, 72.1MB/s]


###Unzip the datasets

In [None]:
import zipfile
with zipfile.ZipFile('/content/crop-diseases-classification.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/crop-diseases-classification')

with zipfile.ZipFile('/content/plant-village-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/plant-village-dataset')

with zipfile.ZipFile('/content/potato-disease-leaf-datasetpld.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/potato-disease-leaf-datasetpld')




#Preprocessing


###Import required libraries

In [None]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import pandas as pd
import os
from PIL import Image
import json
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from torch import tensor
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import vgg16
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

###Preprocessing functions for the dataloader

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to a standard size
    transforms.ToTensor(),           # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize

])

###Create a custom dataset class for Crop Diseases Classification dataset


In [None]:
class CropDiseaseDataset(Dataset):
    def __init__(self, csv_file, img_dir, json_file, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

        # Load the JSON file with label mappings
        with open(json_file, 'r') as f:
            self.label_mapping = json.load(f)

        # Filter out rows where the image file is missing
        self.annotations = self.annotations[self.annotations['image_id'].apply(
            lambda x: os.path.exists(os.path.join(self.img_dir, x))
        )].reset_index(drop=True)
        self.classes = list(self.label_mapping.values())

    def __len__(self):
        return len(self.annotations)


    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])  # image_id
        image = Image.open(img_path)

        # Convert numerical label to disease name
        label_num = int(self.annotations.iloc[idx, 1])
        label = self.label_mapping[str(label_num)]

        # Convert the label to a numerical value if necessary
        label_index = label_num  # Assuming the label_num is the correct index for your class
        # Alternatively, you can map the label to an index if required

        if self.transform:
            image = self.transform(image)

        return image, tensor(label_index)  # Convert to tensor


###Create dataloader for Crop Diseases Classification Dataset

In [None]:
# Define paths
csv_file = '/content/crop-diseases-classification/Data/train.csv'   # Update with actual path
img_dir = '/content/crop-diseases-classification/Data/train_images'        # Update with actual path
json_file = '/content/crop-diseases-classification/Data/label_num_to_disease_map.json' # Update with actual path

# Load full dataset
dataset_2 = CropDiseaseDataset(csv_file=csv_file, img_dir=img_dir, json_file=json_file, transform=transform)

# Split indices for train, validation, and test sets
train_indices, temp_indices = train_test_split(range(len(dataset_2)), test_size=0.3, random_state=42)
val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=42)

# Create subsets
train_dataset_2 = Subset(dataset_2, train_indices)
val_dataset_2 = Subset(dataset_2, val_indices)
test_dataset_2 = Subset(dataset_2, test_indices)

# Create DataLoaders
batch_size = 32

train_loader_2 = DataLoader(train_dataset_2, batch_size=batch_size, shuffle=True)
val_loader_2 = DataLoader(val_dataset_2, batch_size=batch_size, shuffle=True)
test_loader_2 = DataLoader(test_dataset_2, batch_size=batch_size, shuffle=True)



###Create a dataloader for Potato disease leaf dataset


In [None]:
train_dir_1 = '/content/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Training'
test_dir_1 = '/content/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Testing'
val_dir_1 = '/content/potato-disease-leaf-datasetpld/PLD_3_Classes_256/Validation'

train_dataset_1 = ImageFolder(root=train_dir_1, transform=transform)
test_dataset_1 = ImageFolder(root=test_dir_1, transform=transform)
val_dataset_1 = ImageFolder(root=val_dir_1, transform=transform)

batch_size = 32

train_loader_1 = DataLoader(train_dataset_1, batch_size=batch_size, shuffle=True)
test_loader_1 = DataLoader(test_dataset_1, batch_size=batch_size, shuffle=True)
val_loader_1 = DataLoader(val_dataset_1, batch_size=batch_size, shuffle=True)


###Create a dataloader for Plant Village Dataset

In [None]:

train_dir_3 = '/content/plant-village-dataset/PlantVillageDataset/train_val_test/train'
test_dir_3 = '/content/plant-village-dataset/PlantVillageDataset/train_val_test/test'
val_dir_3 = '/content/plant-village-dataset/PlantVillageDataset/train_val_test/val'

train_dataset_3 = ImageFolder(root=train_dir_3, transform=transform)
test_dataset_3 = ImageFolder(root=test_dir_3, transform=transform)
val_dataset_3 = ImageFolder(root=val_dir_3, transform=transform)

batch_size = 32

train_loader_3 = DataLoader(train_dataset_3, batch_size=batch_size, shuffle=True)
test_loader_3 = DataLoader(test_dataset_3, batch_size=batch_size, shuffle=True)
val_loader_3 = DataLoader(val_dataset_3, batch_size=batch_size, shuffle=True)


###Testing the dataloaders

In [None]:

# Retrieve a single batch of images and labels from the train DataLoader
data_iter = iter(train_loader_3)
images, labels = next(data_iter)

# Print out basic info about the batch
print(f"Batch size: {images.size(0)}")
print(f"Image shape: {images.shape}")
print(f"Label sample: {labels[:5]}")    # Print first 5 labels to verify format and mapping

# Optional: Display a few images with their labels
def show_images(images, labels, num=5):
    plt.figure(figsize=(10, 4))
    for i in range(num):
        plt.subplot(1, num, i + 1)
        plt.imshow(images[i].permute(1, 2, 0).numpy())
        plt.title(labels[i])
        plt.axis('off')
    plt.show()

# Convert images back to original scale before displaying (reverse normalization)
inv_transform = transforms.Normalize(
    mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
    std=[1 / 0.229, 1 / 0.224, 1 / 0.225]
)
images_inv = inv_transform(images)

# Display images
show_images(images_inv, labels)


#Convolutional Neural Networks

In [None]:
# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

###Define Resnet-18 Model

In [None]:
def get_resnet_model(classes):

    # Initialize the ResNet-18 model
    model = resnet18(num_classes=len(classes)).to(device)  # Change to the correct number of classes

    return model

###Define vgg-16 Model

In [None]:
def get_vgg16_model(classes):
    # Initialize the VGG-16 model
    model = vgg16(num_classes=len(classes)).to(device)  # Set pretrained=True if you want to use pre-trained weights

    return model

###Define MobileNet Model


In [None]:
def get_mobilenet_model(classes):
    # Initialize the MobileNet v2 model
    model = mobilenet_v2(num_classes=len(classes)).to(device)  # Change to the correct number of classes
    return model

###Define the pretrained ResNet-18 model for transfer learning


In [None]:
def get_resnet_transfer_model(classes):
    # Load the pretrained ResNet-18 model
    model = resnet18(weights=ResNet18_Weights.DEFAULT).to(device)

    # Freeze all layers except the last block
    for param in model.parameters():
        param.requires_grad = False

    # Unfreeze the last fully connected layer (fc)
    for param in model.fc.parameters():
        param.requires_grad = True

    for param in model.layer4.parameters():  # Unfreeze the last residual block
        param.requires_grad = True

    # Modify the final fully connected layer to match the number of classes
    model.fc = nn.Linear(model.fc.in_features, len(classes)).to(device)

    return model

## add your code here

###Function to train the models

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=5, learning_rate = 0.01):

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct = 0
        total = 0

         # Wrap the train_loader with tqdm to get a progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch [{epoch + 1}/{num_epochs}]")

            for images, labels in tepoch:
                images, labels = images.to(device), labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)

                # Backward pass and optimization
                loss.backward()
                optimizer.step()

                # Statistics
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update progress bar description
                tepoch.set_postfix(loss=loss.item(), accuracy=correct / total)

        # Calculate accuracy
        accuracy = correct / total
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}')

        # Validate the model
        validate_model(model, val_loader)

def validate_model(model, val_loader):
    model.eval()  # Set the model to evaluation mode
    total = 0
    correct = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Validation Accuracy: {accuracy:.4f}')

###Define functions to save and reload the models

In [None]:
# Function to save the model
def save_model(model, path):
    # Save the model's state_dict to a file
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

# Function to load the model
def load_model(model, path):
    # Load the model's state_dict from the file
    model.load_state_dict(torch.load(path))
    model.eval()  # Set the model to evaluation mode
    print(f"Model loaded from {path}")
    return model

##Training the models on all three datasets

###Training the Resnet-18 model on Potato Disease Leaf Dataset

In [None]:
print("Training Resnet-18 on Potato Disease Leaf Dataset")
resnet_model_1 = get_resnet_model(classes=train_dataset_1.classes)
train_model(resnet_model_1, train_loader_1, val_loader_1, num_epochs = 10)
save_model(resnet_model_1, "resnet_model_1.pth")

###Training the Resnet-18 model on Crop Diseases Classification Dataset

In [None]:
print("\nTraining on Crop Diseases Classification Dataset")
resnet_model_2 = get_resnet_model(classes=train_dataset_2.dataset.classes)
train_model(resnet_model_2, train_loader_2, val_loader_2, num_epochs = 10)
save_model(resnet_model_2, "resnet_model_2.pth")

###Training the Resnet-18 model on Plant Village Dataset

In [None]:
print("\nTraining Resnet-18 model on Plant Village Dataset")
resnet_model_3 = get_resnet_model(classes=train_dataset_3.classes)
train_model(resnet_model_3, train_loader_3, val_loader_3, num_epochs = 10)
save_model(resnet_model_3, "resnet_model_3.pth")

###Training VGG-16 on Potato Disease Leaf Dataset

In [None]:
print("Training VGG-16 on Potato Disease Leaf Dataset")
vgg_model_1 = get_vgg16_model(classes=train_dataset_1.classes)
train_model(vgg_model_1, train_loader_1, val_loader_1, num_epochs = 10)
save_model(vgg_model_1, "vgg_model_1.pth")

Training VGG-16 on Potato Disease Leaf Dataset


Epoch [1/10]: 100%|██████████| 102/102 [00:54<00:00,  1.88batch/s, accuracy=0.411, loss=1.08]


Epoch [1/10], Loss: 1.0617, Accuracy: 0.4110
Validation Accuracy: 0.4087


Epoch [2/10]: 100%|██████████| 102/102 [00:53<00:00,  1.91batch/s, accuracy=0.413, loss=1.12]


Epoch [2/10], Loss: 1.0616, Accuracy: 0.4128
Validation Accuracy: 0.2452


Epoch [3/10]: 100%|██████████| 102/102 [00:53<00:00,  1.90batch/s, accuracy=0.441, loss=1.04]


Epoch [3/10], Loss: 1.0423, Accuracy: 0.4414
Validation Accuracy: 0.5024


Epoch [4/10]: 100%|██████████| 102/102 [00:53<00:00,  1.90batch/s, accuracy=0.426, loss=1.23]


Epoch [4/10], Loss: 1.0525, Accuracy: 0.4257
Validation Accuracy: 0.4447


Epoch [5/10]: 100%|██████████| 102/102 [00:53<00:00,  1.89batch/s, accuracy=0.489, loss=0.968]


Epoch [5/10], Loss: 1.0085, Accuracy: 0.4888
Validation Accuracy: 0.5168


Epoch [6/10]: 100%|██████████| 102/102 [00:54<00:00,  1.89batch/s, accuracy=0.504, loss=0.943]


Epoch [6/10], Loss: 0.9739, Accuracy: 0.5045
Validation Accuracy: 0.5409


Epoch [7/10]: 100%|██████████| 102/102 [00:54<00:00,  1.89batch/s, accuracy=0.549, loss=0.826]


Epoch [7/10], Loss: 0.9139, Accuracy: 0.5491
Validation Accuracy: 0.5601


Epoch [8/10]: 100%|██████████| 102/102 [00:54<00:00,  1.88batch/s, accuracy=0.59, loss=0.74]


Epoch [8/10], Loss: 0.8815, Accuracy: 0.5900
Validation Accuracy: 0.6058


Epoch [9/10]: 100%|██████████| 102/102 [00:53<00:00,  1.90batch/s, accuracy=0.687, loss=0.483]


Epoch [9/10], Loss: 0.7376, Accuracy: 0.6866
Validation Accuracy: 0.7812


Epoch [10/10]: 100%|██████████| 102/102 [00:53<00:00,  1.90batch/s, accuracy=0.686, loss=1.3]


Epoch [10/10], Loss: 0.7255, Accuracy: 0.6859
Validation Accuracy: 0.6130
Model saved to vgg_model_1.pth


###Training VGG-16 on Crop Disease Dataset

In [None]:
print("\nTraining VGG-16 on Crop Disease Dataset")
vgg_model_2 = get_vgg16_model(classes=train_dataset_2.dataset.classes)
train_model(vgg_model_2, train_loader_2, val_loader_2, num_epochs = 10)
save_model(vgg_model_2, "vgg_model_2.pth")


Training VGG-16 on Crop Disease Dataset


Epoch [1/10]:  86%|████████▌ | 337/393 [03:57<00:40,  1.38batch/s, accuracy=0.617, loss=1.01]

###Training VGG-16 on Plant Village Dataset

In [None]:
print("\nTraining VGG-16 on Plant Village Dataset")
vgg_model_3 = get_vgg16_model(classes=train_dataset_3.classes)
train_model(vgg_model_3, train_loader_3, val_loader_3, num_epochs = 10)
save_model(vgg_model_3, "vgg_model_3.pth")