## Dependencies

In [None]:
!pip install torch==2.1.1

In [10]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt
import os                       # for working with files

## Reading Data

In [3]:
import os
from PIL import Image
import numpy as np

def load_dataset(directory):
    images = []
    labels = []

    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.endswith(('.JPG', '.jpg', '.jpeg', '.png')):  # Assuming images are in common formats
                img_path = os.path.join(root, filename)
                img = Image.open(img_path).convert('RGB')
                img = img.resize((150, 150))  # Resize the image to a consistent size
                img_array = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
                images.append(img_array)
                
                # Extract the class label from the subdirectory name
                class_name = os.path.basename(root)
                labels.append(class_name)

    return np.array(images), np.array(labels)

# Example usage
dataset_directory = './Dataset for Crop Pest and Disease Detection/CCMT Dataset-Augmented/'
images, labels = load_dataset(dataset_directory)

# Now 'images' is a NumPy array containing your image data, and 'labels' contains corresponding labels.

KeyboardInterrupt: 

#### Read images data

In [11]:
data_dir= "./PlantVillage-Dataset/data_distribution_for_SVM"
train_dir = data_dir + "/train"
valid_dir = data_dir + "/valid"
diseases = os.listdir(train_dir)
diseases

['0',
 '1',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '2',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '3',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9']

## Predictive Model for Crop Pest and Disease Detection    

In [12]:
print("Total disease classes are: {}".format(len(diseases)))

Total disease classes are: 38


In [14]:
plants = []
NumberOfDiseases = 0
for plant in diseases:
    if plant.split('___')[0] not in plants:
        plants.append(plant.split('___')[0])
    if plant.split('___')[1] != 'healthy':
        NumberOfDiseases += 1

IndexError: list index out of range

In [15]:
# unique plants in the dataset
print(f"Unique Plants are: \n{plants}")


Unique Plants are: 
['0']


In [16]:
# number of unique plants
print("Number of plants: {}".format(len(plants)))

Number of plants: 1


In [17]:
# number of unique diseases
print("Number of diseases: {}".format(NumberOfDiseases))

Number of diseases: 0


So we have images of leaves of 14 plants and while excluding healthy leaves, we have 26 types of images that show a particular disease in a particular plant.

In [18]:
# Number of images for each disease
nums = {}
for disease in diseases:
    nums[disease] = len(os.listdir(train_dir + '/' + disease))
    
# converting the nums dictionary to pandas dataframe passing index as plant name and number of images as column

img_per_class = pd.DataFrame(nums.values(), index=nums.keys(), columns=["no. of images"])
img_per_class 

Unnamed: 0,no. of images
0,111
1,114
10,212
11,187
12,233
13,159
14,67
15,847
16,386
17,74


In [9]:
import matplotlib.pyplot as plt
# plotting number of images available for each disease
index = [n for n in range(38)]
plt.figure(figsize=(20, 5))
plt.bar(index, [n for n in nums.values()], width=0.3)
plt.xlabel('Plants/Diseases', fontsize=10)
plt.ylabel('No of images available', fontsize=10)
plt.xticks(index, diseases, fontsize=5, rotation=90)
plt.title('Images per each class of plant disease')

NameError: name 'nums' is not defined

<Figure size 1440x360 with 0 Axes>

We can see that the dataset is almost balanced for all classes, so we are good to go forward

#### Images available for training

In [19]:
n_train = 0
for value in nums.values():
    n_train += value
print(f"There are {n_train} images for training")

There are 8751 images for training


In [21]:

import torch                    # Pytorch module 
import matplotlib.pyplot as plt # for plotting informations on graph and images using tensors
import torch.nn as nn           # for creating  neural networks
from torch.utils.data import DataLoader # for dataloaders 
from PIL import Image           # for checking images
# import torch.nn.functional as F # for functions for calculating loss
# import torchvision.transforms as transforms   # for transforming images into tensors 
from torchvision.utils import make_grid   
from torchvision.datasets import ImageFolder  # for working with classes and images
# datasets for validation and training
train = ImageFolder(train_dir, transform=transforms.ToTensor())
valid = ImageFolder(valid_dir, transform=transforms.ToTensor()) 

ModuleNotFoundError: No module named 'torchvision'

#### Image shape 

In [23]:
img, label = train[0]
print(img.shape, label)

NameError: name 'train' is not defined

In [24]:
# total number of classes in train set
len(train.classes)

NameError: name 'train' is not defined

In [25]:
# for checking some images from training dataset
def show_image(image, label):
    print("Label :" + train.classes[label] + "(" + str(label) + ")")
    plt.imshow(image.permute(1, 2, 0))

In [26]:
show_image(*train[0])

NameError: name 'train' is not defined

In [27]:
show_image(*train[70000])

NameError: name 'train' is not defined

In [28]:
train[30000]

NameError: name 'train' is not defined

In [29]:
show_image(*train[30000])

NameError: name 'train' is not defined

In [30]:
# Setting the seed value
random_seed = 7
torch.manual_seed(random_seed)

<torch._C.Generator at 0x1b0db9d9c30>

In [31]:
# setting the batch size
batch_size = 32

In [32]:
# DataLoaders for training and validation
train_dl = DataLoader(train, batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_dl = DataLoader(valid, batch_size, num_workers=2, pin_memory=True)

NameError: name 'train' is not defined

- `DataLoader` is a subclass which comes from `torch.utils.data`. It helps in loading large and memory consuming datasets. It takes in `batch_size` which denotes the number of samples contained in each generated batch. 

- Setting `shuffle=True` shuffles the dataset. It is heplful so that batches between epochs do not look alike. Doing so will eventually make our model more robust.

- `num_workers`, denotes the number of processes that generate batches in parallel. If you have more cores in your CPU, you can set it to number of cores in your CPU. Since, Kaggle provides a 2 core CPU, I have set it to 2


In [1]:
# helper function to show a batch of training instances
def show_batch(data):
    for images, labels in data:
        fig, ax = plt.subplots(figsize=(30, 30))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=8).permute(1, 2, 0))
        break

In [2]:
# Images for first batch of training
show_batch(train_dl) 

NameError: name 'train_dl' is not defined

# Modelling

In [3]:
# for moving data into GPU (if available)
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available:
        return torch.device("cuda")
    else:
        return torch.device("cpu")

# for moving data to device (CPU or GPU)
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

# for loading in the device (GPU if available else CPU)
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)
        
    def __len__(self):
        """Number of batches"""
        return len(self.dl)

Checking the device we are working with

In [1]:
device = get_default_device()
device

NameError: name 'get_default_device' is not defined

Wrap up our training and validation data loaders using `DeviceDataLoader` for automatically transferring batches of data to the GPU (if available)

In [3]:
# Moving data into GPU
train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

NameError: name 'DeviceDataLoader' is not defined

## Building the model architecture 

*We are going to use **ResNet**, which have been one of the major breakthrough in computer vision since they were introduced in 2015.*

#### Residual Block code implementation

In [4]:
class SimpleResidualBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.conv2(out)
        return self.relu2(out) + x # ReLU can be applied before or after adding the input

NameError: name 'nn' is not defined

In [5]:
# for calculating the accuracy
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))


# base class for the model
class ImageClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch
        out = self(images)                   # Generate prediction
        loss = F.cross_entropy(out, labels)  # Calculate loss
        acc = accuracy(out, labels)          # Calculate accuracy
        return {"val_loss": loss.detach(), "val_accuracy": acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x["val_loss"] for x in outputs]
        batch_accuracy = [x["val_accuracy"] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()       # Combine loss  
        epoch_accuracy = torch.stack(batch_accuracy).mean()
        return {"val_loss": epoch_loss, "val_accuracy": epoch_accuracy} # Combine accuracies
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_accuracy']))
        

NameError: name 'nn' is not defined