# **Description of the dataset 📝**
*This dataset is created using offline augmentation from the original dataset.
This dataset consists of about 87K rgb images of healthy and diseased crop leaves which is categorized into 38 different classes.
A new directory containing 33 test images is created later for prediction purpose.
*
* data at link: [https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset/data](http://)

# **Our goal 🎯**
*Goal is clear and simple. We need to build a model, which can classify between healthy and diseased crop leaves and also if the crop have any disease, predict which disease is it.*

# Let's import required modules..



In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Import necessary libraries
import tensorflow as tf  # TensorFlow for machine learning tasks
import seaborn as sns    # Seaborn for statistical visualization
import matplotlib.pyplot as plt  # Matplotlib for plotting
from tensorflow import keras     # Keras for building neural networks
import numpy as np      # NumPy for numerical computations
import os              # OS module for interacting with the operating system
import pandas as pd    # Pandas for data manipulation and analysis
import itertools       # Itertools for creating iterators
from tensorflow.keras.utils import image_dataset_from_directory  # Utility for loading image datasets
from tensorflow.keras.layers import BatchNormalization, Dropout   # Layers for neural networks
from sklearn.metrics import precision_score, accuracy_score, recall_score, confusion_matrix, ConfusionMatrixDisplay  # Metrics for model evaluation
from tensorflow.keras.preprocessing import image   # Utility for image preprocessing

# **Exploring the data 🧭**
*note that data in kaggle exist and ready to use..*


In [None]:
#Loading the data..^_^

# Define the paths to the training and validation datasets
train_data = "../input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"
valid_data = "../input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid"

# Load the training and validation dataset as an image dataset
train_gen = image_dataset_from_directory(directory=train_data, image_size=(256, 256))
valid_gen = image_dataset_from_directory(directory=valid_data, image_size=(256, 256))

# Normalize the pixel values of images in the training and validation dataset
train_gen = train_gen.map(lambda image, label: (image / 255.0, label))
valid_gen = valid_gen.map(lambda image, label: (image / 255.0, label))

In [None]:
#Our Classes^_^

# Get the list of class names by listing directories in the training dataset
class_names = sorted(os.listdir("../input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"))
# Print the total number of classes
print(f'The total number of classes is: {len(class_names)}')

# Iterate through the class names and print each one
for class_name in class_names:
    print(class_name)

In [None]:
# Identify unique plants and count the number of diseases ^_^

# Get the list of diseases by listing directories in the training data
diseases = os.listdir(train_data)
# Initialize lists to store unique plants and count the number of diseases
plants = []
NumberOfDiseases = 0

# Iterate through each disease
for plant in diseases:
    # Check if the plant name is not already in the list of unique plants
    if plant.split('_')[0] not in plants:
        # Add the unique plant name to the list
        plants.append(plant.split('_')[0])
    # Check if the disease is not labeled as 'healthy'
    if plant.split('_')[1] != 'healthy':
        # Increment the count of diseases
        NumberOfDiseases += 1

# Print the number of unique plants in the dataset
print("Number of plants: {}".format(len(plants)))
print('-'*50)
print(f"Unique Plants are: \n{plants}")
print()

# Print the number of unique diseases (excluding 'healthy')
print("Number of unique diseases (without healthy): {}".format(NumberOfDiseases))
print('-'*50)
print(f"Unique Plants are: \n{diseases}")


In [None]:
#LOOK at shape of one photo :)

# Define the path to the image file
import cv2
img_path = "/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train/Apple___Apple_scab/00075aa8-d81a-4184-8541-b692b78d398a___FREC_Scab 3335.JPG"
# Read the image using OpenCV
img = cv2.imread(img_path)
# Print the shape of the image
print("Image shape:", img.shape)


# **Data visualization 📊**
*some calculations of the data samples*

In [None]:
#Empty dictionaries to store count of images for each class in training and validation datasets ^_^
nums_train = {}
nums_valid = {}

# Iterate over each disease in the list of diseases
for disease in diseases:
    #Count and store number of images for the current disease in the training and validation dataset 
    nums_train[disease] = len(os.listdir(train_data+'/'+disease))
    nums_valid[disease] = len(os.listdir(vaild_data+'/'+disease))

# Create pandas DataFrames to display the count of images for each disease in the training and validation datasets
image_class_count_train = pd.DataFrame(nums_train.values(), index=nums_train.keys(), columns=['No. of images'])
image_class_count_valid = pd.DataFrame(nums_valid.values(), index=nums_valid.keys(), columns=['No. of images'])


In [None]:
# Print the count of training data images per class ^_^
print('Training data images count per class : ')
print(image_class_count_train)

# Create a bar plot showing the count of training images per class
plt.figure(figsize=(15,15))
plt.title("Training data images count per class",fontsize=38)
plt.xlabel('Number of images', fontsize=35)
plt.ylabel('Classes', fontsize=35)

# Extract keys and values from the dictionary
keys = list(nums_train.keys())
vals = list(nums_train.values())

# Plot the bar plot using seaborn
sns.barplot(y=keys, x=vals)


In [None]:
# Print the count of validation data images per class ^_^
print('Validation data images count per class : ')
print(image_class_count_valid)

# Create a bar plot showing the count of validation images per class
plt.figure(figsize=(15,15))
plt.title("Validation data images count per class",fontsize=38)
plt.xlabel('Number of images', fontsize=35)
plt.ylabel('Classes', fontsize=35)

# Extract keys and values from the dictionary
keys = list(nums_valid.keys())
vals = list(nums_valid.values())

# Plot the bar plot using seaborn
sns.barplot(y=keys, x=vals)


In [None]:
# Visualize Images :)

plt.figure(figsize=(40,30))#Set figure size to 40 inches in width and 30 inches in height
plt.subplots_adjust(wspace=0.1,hspace=0.1)# width and height space between subplots to 0.1

# Iterate over a range of 24 (to display 24 images)
for i in range(24):
    #Get a random folder index within the range of available folders in the training data
    random_folder = np.random.randint(0, len(os.listdir(train_data)))
    #Get the path of the randomly selected folder
    random_folder_path = os.path.join(train_data, os.listdir(train_data)[random_folder])  
    #Get a random image index within the range of available images in the selected folder
    random_image = np.random.randint(0, len(os.listdir(random_folder_path))) 
    
    #Get the path of the randomly selected image and Read the image using OpenCV
    random_image_path = os.path.join(random_folder_path, os.listdir(random_folder_path)[random_image])  
    image = cv2.imread(random_image_path)  
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #Convert the image from (BGR to RGB) 
    
    #Get the class name for the current image
    class_name = class_names[random_folder]  
    
    # Create a subplot with 4 rows and 6 columns, and set the current subplot index
    plt.subplot(4, 6, i + 1)  
    plt.imshow(image)#Display the image
    plt.axis("off")#Turn off the axis
    plt.title(class_name)#Set the title of the subplot to the class name  
    
# Display the entire plot
plt.show()


# **Modelling 🏗️**
* *It is advisable to use GPU instead of CPU when dealing with images dataset because CPUs are generalized for general purpose and GPUs are optimized for training deep learning models as they can process multiple computations simultaneously.*
* *They have a large number of cores, which allows for better computation of multiple parallel processes.*
* *Additionally, computations in deep learning need to handle huge amounts of data, this makes a GPU’s memory bandwidth most suitable.*

# **Building the model architecture 👷**
*We are going to use CNN..*

In [None]:
model = keras.Sequential()

model.add(keras.layers.Conv2D(32,(3,3),activation="relu",padding="same",input_shape=(256,256,3)))
model.add(keras.layers.Conv2D(32,(3,3),activation="relu",padding="same"))
model.add(keras.layers.MaxPooling2D(3,3))
model.add(BatchNormalization())

model.add(keras.layers.Conv2D(64,(3,3),activation="relu",padding="same"))
model.add(keras.layers.Conv2D(64,(3,3),activation="relu",padding="same"))
model.add(keras.layers.MaxPooling2D(3,3))

model.add(keras.layers.Conv2D(128,(3,3),activation="relu",padding="same"))
model.add(keras.layers.Conv2D(128,(3,3),activation="relu",padding="same"))
model.add(keras.layers.MaxPooling2D(3,3))
model.add(BatchNormalization())

model.add(keras.layers.Conv2D(256,(3,3),activation="relu",padding="same"))
model.add(keras.layers.Conv2D(256,(3,3),activation="relu",padding="same"))

model.add(keras.layers.Conv2D(512,(5,5),activation="relu",padding="same"))
model.add(keras.layers.Conv2D(512,(5,5),activation="relu",padding="same"))

model.add(keras.layers.Flatten())

model.add(keras.layers.Dense(1568,activation="relu"))
model.add(keras.layers.Dropout(0.5))
model.add(BatchNormalization())
model.add(keras.layers.Dense(38,activation="softmax"))

opt = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt,loss="sparse_categorical_crossentropy",metrics=['accuracy'])
model.summary()

# **Training the model.. 🏋️**

In [None]:
# Train the model on the training data and validate it on the validation data ^_^
history = model.fit(
    train_gen,  # Training data generator
    validation_data=vaild_gen,  # Validation data generator
    epochs=10  # Number of epochs for training
)


# **Plotting 📈**
*plots it will make my model easy to understand characteristics of it..*

In [None]:
# Plotting the training and validation loss and accuracy ^_^
plt.figure(figsize=(20, 5))

# Subplot for training and validation loss
plt.subplot(1, 2, 1)
plt.title("Train and Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.xlim(0, 10)
plt.ylim(0.0, 1.0)
plt.legend()

# Subplot for training and validation accuracy
plt.subplot(1, 2, 2)
plt.title("Train and Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.xlim(0, 9.25)
plt.ylim(0.75, 1.0)
plt.legend()

plt.tight_layout()


In [None]:
# Obtain and Flatten Labels and Predictions for Validation Data ^_^
import tensorflow as tf
import itertools

# Initialize lists to store labels and predictions
labels = []
predictions = []

# Iterate over the validation generator to get labels and predictions
for x, y in vaild_gen:
    labels.append(list(y.numpy()))#Append true labels
    predictions.append(tf.argmax(model.predict(x), 1).numpy())#Append predicted labels

# Flatten the lists of lists
predictions = list(itertools.chain.from_iterable(predictions))
labels = list(itertools.chain.from_iterable(labels))
   


# **Display model performance :) 📋**

In [None]:
# Print evaluation metrics based on the model's performance ^_^
#All multiplied by 100 and formatted to two decimal places.

#Train Accuracy: retrieves the last recorded training accuracy from the training history
print("Train Accuracy: {:.2f} %".format(history.history['accuracy'][-1]*100))

#Test Accuracy: computes accuracy score by comparing true labels with predicted labels
print("Test Accuracy: {:.2f} %".format(accuracy_score(labels, predictions) * 100))

#Precision Score: computes the precision score using the micro averaging strategy.
print("Precision Score: {:.2f} %".format(precision_score(labels, predictions, average='micro') * 100))

#Recall Score: computes the recall score using the micro averaging strategy
print("Recall Score: {:.2f} %".format(recall_score(labels, predictions, average='micro') * 100))

# confusion matrix ..🧮

In [None]:
# Plot a confusion matrix based on the true labels and predicted labels ^_^
plt.figure(figsize= (20,5))
cm = confusion_matrix(labels, predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(range(1,39)))
fig, ax = plt.subplots(figsize=(12,12))
disp.plot(ax=ax,colorbar= False,cmap = 'YlGnBu')
plt.title("Confusion Matrix")
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# visualize and predict one image :)

In [None]:
#prepare the image, make it ready to be predicted bu the model ^_^

# Load and preprocess the image
from tensorflow.keras.preprocessing import image
img_path = '/kaggle/input/new-plant-diseases-dataset/test/test/AppleCedarRust1.JPG'
img = image.load_img(img_path, target_size=(256,256))
img_array = image.img_to_array(img)
img_array = img_array.astype("float32") / 255.0 
img_array = tf.expand_dims(img_array, 0)


In [None]:
#visualize your image using function load_prep ^_^
def load_prep(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_image(img)
    img = tf.image.resize(img, size=(224,224))
    return img

# Load and preprocess the image
image = load_prep(img_path)

# Visualize the preprocessed image
plt.imshow(image/255.)
plt.title('AppleCedarRust1.JPG')
plt.suptitle(image.shape)


In [None]:
predictions = model.predict(img_array)
predictions

In [None]:
predicted_class = np.argmax(predictions)
print(f"this image : Predicted Class: {predicted_class}, Class name: {class_names[predicted_class]}")

# **Testing model on test data 🧪**
*here we go to the test directory prepare it to apply our model on it let's see :)*


In [None]:
# Predict plant diseases for each image in the test directory ^_^
from tensorflow.keras.preprocessing import image
images_dir ="/kaggle/input/new-plant-diseases-dataset/test/test"

# Iterate through each image in the directory
image_files = os.listdir(images_dir)
for img_file in image_files:
    # Construct the full path to the image
    img_path = os.path.join(images_dir, img_file)
    
    # Load the image and preprocess it
    img = image.load_img(img_path, target_size=(256, 256))
    img_array = image.img_to_array(img)
    img_array = img_array.astype("float32") / 255.0
    img_array_batch = np.expand_dims(img_array, axis=0)
    
    # Make predictions using the model
    predictions = model.predict(img_array_batch)
    predicted_class = np.argmax(predictions)
    
    # Print the prediction results
    print(f"Image: {img_file}, Predicted Class: {predicted_class}, Class name: {class_names[predicted_class]}")

# Visualize random images with their predicted classes..🍀
 note that you can increase number of images to br predicted and change style of display..

In [None]:
# Create a figure to display the images ^_^
from tensorflow.keras.preprocessing import image
import random 
plt.figure(figsize=(20,20))

# Iterate through 9 random images
for i in range(9):
    plt.subplot(3,3,i+1)
    
    # Select a random image from the directory
    rn = random.choice(os.listdir(images_dir))
    img_path = os.path.join(images_dir, rn)
    
    # Preprocess the image
    new_img = load_prep(img_path)
    
    # Load and preprocess the image for prediction
    photo = image.load_img(img_path, target_size=(256,256))  
    photo_array = image.img_to_array(photo)
    photo_array = photo_array.astype("float32") / 255.0 
    photo_array = tf.expand_dims(photo_array, 0) 
    predictions_photo_array = model.predict(photo_array)
    pred_class_ = np.argmax(predictions_photo_array)
    
    # Display the image and its predicted class
    plt.imshow(new_img/255.)
    plt.title(f'true:{rn} \npred_class:{pred_class_} \nClassname: {class_names[pred_class_]}')
    plt.axis(False)


# **Finally..Saving our model 📂**

In [None]:
# save it as a h5 file ^_^
from tensorflow.keras.models import load_model
model.save('model.h5')