**Importing The Required Libraries**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# import system libs
import os
import time
import shutil
import pathlib
import itertools

# import data handling tools
import cv2
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/plant-pathology-2021-fgvc8'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Loading the Data**

In [None]:
# Dataset Files Paths
train_image_path = '../input/plant-pathology-2021-fgvc8/train_images'
test_image_path = '../input/plant-pathology-2021-fgvc8/test_images'
train_df_path = '../input/plant-pathology-2021-fgvc8/train.csv'
test_df_path = '../input/plant-pathology-2021-fgvc8/sample_submission.csv'

In [None]:
# loading data from directory
train = pd.read_csv(train_df_path)
test = pd.read_csv(test_df_path)

**Analyzing and Reading the Data**

In [None]:
train.head(50)

In [None]:
# looking at basic infos
train.info()

In [None]:
# Checking the number of images
print("Number of images:",len(train))

In [None]:
train.labels.value_counts()

**Visualizing the Labels** 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Count the occurrences of each label in the dataset
label_counts = train['labels'].value_counts().reset_index()

# Rename the columns for clarity
label_counts.columns = ['Label', 'Count']

# Create a bar plot using Seaborn
plt.figure(figsize=(12, 6))
sns.barplot(x='Count', y='Label', data=label_counts, palette="viridis")

# Set labels and title
plt.xlabel('Count')
plt.ylabel('Label')
plt.title('Distribution of Dataset Labels')

# Show the plot
plt.show()


In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# preparing the path to images
train["path"] = "../input/plant-pathology-2021-fgvc8/train_images/" + train["image"]

# taking a sample of the dataset
data_sample = train.sample(25)

# Showing image sample
plt.figure(figsize=(14, 9))
n = 1
for i in data_sample.index:
    plt.subplot(5, 5, n)
    
    testImage = mpimg.imread(data_sample["path"][i])  # Use mpimg to read the image

    # displaying the image
    plt.imshow(testImage)

    plt.title(data_sample["labels"][i])
    plt.axis("off")
    n += 1
_ = plt.suptitle("Images sample")


In [None]:
def batch_visualize_with_label(df,batch_size,path,label): 
    sample_df = train[train["labels"]==label].sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        image = cv2.imread(os.path.join(path, image_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image)
        plt.axis("off")
    plt.show()

In [None]:
batch_visualize_with_label(train,9,train_image_path,'healthy')

In [None]:
batch_visualize_with_label(train,9,train_image_path,'scab')

In [None]:
batch_visualize_with_label(train,9,train_image_path,'frog_eye_leaf_spot')

In [None]:
batch_visualize_with_label(train,9,train_image_path,'rust')

In [None]:
batch_visualize_with_label(train,9,train_image_path,'complex')

In [None]:
batch_visualize_with_label(train,9,train_image_path,'powdery_mildew')

In [None]:
test.head()

In [None]:
test.info

In [None]:
# Checking the number of images
print("Number of images:",len(test))

In [None]:
test.labels.value_counts()

**Plotting Piechart of labels**

In [None]:
# Count the occurrences of each label in the dataset
label_counts = train['labels'].value_counts()

# Get the label names and their corresponding counts
label_list = label_counts.index
label_counts = label_counts.values

# Create a pie chart
plt.figure(figsize=(10, 10))
plt.pie(label_counts, labels=label_list, autopct='%1.1f%%')
plt.title('Distribution of Dataset Labels')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

# Display the pie chart
plt.show()

**CNN Model with Data Augmentation Technique**

In [None]:
HEIGHT = 128
WIDTH=128
SEED = 45
BATCH_SIZE= 64

train_datagen = ImageDataGenerator(rescale = 1/255.,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split = 0.2,
    zoom_range = 0.2,
    shear_range = 0.2,
    vertical_flip = False)

train_dataset = train_datagen.flow_from_dataframe(
    train,
    directory = train_image_path,
    x_col = "image",
    y_col = "labels",
    target_size = (HEIGHT,WIDTH),
    class_mode='categorical',
    batch_size = BATCH_SIZE,
    subset = "training",
    shuffle = True,
    seed = SEED,
    validate_filenames = False
)

validation_dataset = train_datagen.flow_from_dataframe(
    train,
    directory = train_image_path,
    x_col = "image",
    y_col = "labels",
    target_size = (HEIGHT,WIDTH),
    class_mode='categorical',
    batch_size = BATCH_SIZE,
    subset = "validation",
    shuffle = True,
    seed = SEED,
    validate_filenames = False
)

test_datagen = ImageDataGenerator(
    rescale = 1./255
)
INPUT_SIZE = (HEIGHT,WIDTH,3)
test_dataset=test_datagen.flow_from_dataframe(
    test,
    directory=test_image_path,
    x_col='image',
    y_col=None,
    class_mode=None,
    target_size=INPUT_SIZE[:2]
)

In [None]:
model=Sequential()
model.add(Conv2D(32,(3,3),activation='relu',padding='same',input_shape=(HEIGHT,WIDTH,3)))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(12,activation='softmax'))

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy'])
model.summary()

In [None]:
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
cnn_model=model.fit_generator(train_dataset,
                                  validation_data=validation_dataset,
                                  epochs=20,
                                  verbose=1,
                                  shuffle=True,
                                  steps_per_epoch=train_dataset.samples//128,
                                  validation_steps=validation_dataset.samples//128,
                                  callbacks=[cp_callback]
                                 )

In [None]:
model_history = cnn_model.history

plt.figure()
plt.plot(model_history['accuracy'])
plt.plot(model_history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.savefig('accuracy')
plt.show()

**Classification Report of CNN Model Using Data Augmuntation Technique**

In [None]:
from sklearn.metrics import classification_report

# Make predictions on the validation dataset
validation_predictions = model.predict(validation_dataset)

# Convert one-hot encoded labels back to class labels
predicted_labels = [np.argmax(pred) for pred in validation_predictions]
true_labels = [np.argmax(label) for label in validation_dataset.labels]

# Get the class labels
class_labels = list(validation_dataset.class_indices.keys())

# Generate a classification report
report = classification_report(true_labels, predicted_labels, labels=class_labels)

# Print the classification report
print(report)

**CNN Model without Using Data Augmentation Technique**

In [None]:
# Define the batch size and input size
BATCH_SIZE = 64
HEIGHT = 128
WIDTH = 128
SEED = 45
# Create a data generator without data augmentation
datagen = ImageDataGenerator(rescale=1/255.)

train_dataset = datagen.flow_from_dataframe(
    train,
    directory=train_image_path,
    x_col="image",
    y_col="labels",
    target_size=(HEIGHT, WIDTH),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
    validate_filenames=False
)

In [None]:
# Create a model without data augmentation
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(HEIGHT, WIDTH, 3)))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(12, activation='softmax'))

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:
# Train the model without data augmentation
cnn_model_without_data_augmentation = model.fit_generator(
    train_dataset,
    validation_data=validation_dataset,
    epochs=10,
    verbose=1,
    shuffle=True,
    steps_per_epoch=train_dataset.samples // 128,
    validation_steps=validation_dataset.samples // 128
)


In [None]:
model_history = cnn_model_without_data_augmentation.history

plt.figure()
plt.plot(model_history['accuracy'])
plt.plot(model_history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.savefig('accuracy')
plt.show()

**Classification Report of CNN Model without Using Data Augmuntation Technique**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Evaluate the model without data augmentation
y_pred_without_data_augmentation = model.predict(train_dataset)
y_true = train_dataset.classes

# Convert one-hot encoded predictions to class labels
y_pred_labels_without_data_augmentation = np.argmax(y_pred_without_data_augmentation, axis=1)

# Generate classification report and confusion matrix for the model without data augmentation
classification_rep_without_data_augmentation = classification_report(y_true, y_pred_labels_without_data_augmentation, target_names=label_list)
confusion_matrix_without_data_augmentation = confusion_matrix(y_true, y_pred_labels_without_data_augmentation)

# Evaluate the model with data augmentation
y_pred = model.predict(validation_dataset)

# Convert one-hot encoded predictions to class labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Generate classification report and confusion matrix for the model with data augmentation
classification_rep = classification_report(validation_dataset.classes, y_pred_labels, target_names=label_list)
confusion_matrix_with_data_augmentation = confusion_matrix(validation_dataset.classes, y_pred_labels)

print("Classification Report for Model without Data Augmentation:")
print(classification_rep_without_data_augmentation)
print("\nConfusion Matrix for Model without Data Augmentation:")
print(confusion_matrix_without_data_augmentation)

print("\nClassification Report for Model with Data Augmentation:")
print(classification_rep)
print("\nConfusion Matrix for Model with Data Augmentation:")
print(confusion_matrix_with_data_augmentation)

****InceptionResNetV2 Model And Comparison with other cnn methods****

In [None]:
# Load and preprocess the training dataset with one-hot encoded labels
train_dataset = train_datagen.flow_from_dataframe(
    train,
    directory=train_image_path,
    x_col="image",
    y_col="labels",
    target_size=(HEIGHT, WIDTH),
    class_mode='categorical',  # This automatically one-hot encodes the labels
    batch_size=BATCH_SIZE,
    subset="training",
    shuffle=True,
    seed=SEED,
    validate_filenames=False
)

# Load and preprocess the validation dataset with one-hot encoded labels
validation_dataset = train_datagen.flow_from_dataframe(
    train,
    directory=train_image_path,
    x_col="image",
    y_col="labels",
    target_size=(HEIGHT, WIDTH),
    class_mode='categorical',  # This automatically one-hot encodes the labels
    batch_size=BATCH_SIZE,
    subset="validation",
    shuffle=True,
    seed=SEED,
    validate_filenames=False
)


In [None]:
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense

# Define the number of classes in your dataset
NUM_CLASSES = 12  # Replace with the actual number of classes

# Define the InceptionResNetV2 model without weights
inception_model = InceptionResNetV2(weights=None, include_top=False, input_shape=(HEIGHT, WIDTH, 3))

# Add custom output layers
flatten_layer = Flatten()(inception_model.output)
output_layer = Dense(NUM_CLASSES, activation='softmax')(flatten_layer)

# Create a new model with custom output
custom_inception_model = Model(inputs=inception_model.input, outputs=output_layer)

# Compile the model with appropriate loss and metrics
custom_inception_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model to verify the architectured
custom_inception_model.summary()

# Train the custom InceptionResNetV2 model
custom_inception_model.fit(train_dataset, epochs=5, validation_data=validation_dataset)
