#Mounting google drive

In [None]:
%pwd

##Change into the directory

In [None]:
%cd /kaggle/input/plant-disease-classifcation/Plant-Disease-v1

In [None]:
%ls -lrtgh

#Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import random
import cv2
import tensorflow as tf
from PIL import Image
from keras.preprocessing import image
from tensorflow.keras.utils import img_to_array, array_to_img
from keras.optimizers import AdamW, Adam
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout, Input
from sklearn.model_selection import train_test_split
from keras.models import model_from_json
from keras.utils import to_categorical
from keras.applications.vgg16 import VGG16
from tensorflow.keras.metrics import Accuracy, Precision, Recall, F1Score
import time
from keras.callbacks import Callback

In [None]:
tf.__version__

##Checking the GPU specs

In [None]:
!nvidia-smi

#Resizing the images to (224, 224) as required by VGG16

##Images aren't resized

In [None]:
#img_path = '/content/drive/MyDrive/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img_path = '/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img = mpimg.imread(img_path)
print(img.shape)
plt.imshow(img)
plt.axis('off')
plt.show()

##Resizing all the images and saving them in the same directory(overriding)

In [None]:
inp_dir = '/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset'

target_size = (224, 224)

print(os.listdir(inp_dir))

for folder in os.listdir(inp_dir):


  folder_path = os.path.join(inp_dir, folder)
  if os.path.isdir(folder_path):

    print(f'Processing images in the folder: {folder}')
    for filename in os.listdir(folder_path):

      if filename.endswith(('.jpg', '.jpeg', '.png')):



        #load the img
        img_path = os.path.join(folder_path, filename)
        img = mpimg.imread(img_path)

        #resizing
        resized_img = cv2.resize(img, target_size)

        #overwriting the input images with the resized images
        mpimg.imsave(img_path, resized_img)


print('Done with Resizing')

##Checking whether or not the image have been resized

In [None]:
#img_path = '/content/drive/MyDrive/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img_path = '/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img = mpimg.imread(img_path)
print(img.shape)
plt.imshow(img)
plt.axis('off')
plt.show()

#Converting the images to array and normalizing them

In [None]:
def convert_img_to_array(image_dir):
  try:
    img = cv2.imread(image_dir)
    if img is not None:
      return img_to_array(img)
    else:
      return np.array([])

  except Exception as e:
    print(f'Error: {e}')
    return None

Reading the images one by one and converting them into an array

In [None]:
#dir = '/content/drive/MyDrive/Plant-Disease-v1/Dataset'
dir = '/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset'
image_list, label_list = [], []
#all_labels = os.listdir('/content/drive/MyDrive/Plant-Disease-v1/Dataset')
all_labels = os.listdir('/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset')
binary_labels = []
temp = -1

for i in range(0, 27):
  binary_labels.append(i)

#Reading and Converting the image into to numpy array

for directory in all_labels:
  plant_img_list = os.listdir(f'{dir}/{directory}')
  temp += 1
  for files in plant_img_list:
    img_path = f'{dir}/{directory}/{files}'
    image_list.append(convert_img_to_array(img_path))
    label_list.append(binary_labels[temp])

As it is visible there is an imbalance in all the classes

In [None]:
labels_count = pd.DataFrame(label_list).value_counts()
labels_count

All the images are now resized to (224, 224) and are converted to array

In [None]:
for i in range(0, len(image_list)):
  print(image_list[i].shape)

All the images are now stored in the image_list and their ground truth are stored in the label_list

#Splitting the data into train and test

In [None]:
x_train, x_test, y_train, y_test = train_test_split(image_list, label_list, test_size = 0.2, random_state = 67)

Normalizing the data

In [None]:
x_train = np.array(x_train, dtype = np.float16) / 255.0
x_test = np.array(x_test, dtype = np.float16) / 255.0
x_train = x_train.reshape(-1, 224, 224, 3)
x_test = x_test.reshape(-1, 224, 224, 3)

#Performing One Hot Encoding

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

#Building the VGG16 Architecture

<img src='https://drive.google.com/uc?id=1bKY5n1LhU4ojEzwwsb04zo0HbmVDFi_I'>

VGG16, by default, expects color input images to be resized to (224, 224).
Which is why the resizing was done earlier.

In [None]:
def VGG16custom(input_tensor = None, Classes = 27):
  img_rows, img_cols = 224, 224
  img_channels = 3

  img_dim = (img_rows, img_cols, img_channels)

  img_input = Input(shape = img_dim)

  # Block 1
  x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
  x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool1')(x)

  # Block 2
  x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
  x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool1')(x)

  # Block 3
  x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
  x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
  x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool1')(x)

  # Block 4
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool1')(x)

  # Block 5
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
  x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool1')(x)

  # Classification Block
  x = Flatten(name='flatten')(x)
  x = Dense(4096, activation='relu', name='fc1')(x)
  x = Dense(4096, activation='relu', name='fc2')(x)
  x = Dense(Classes, activation='softmax', name='predicitons')(x)

  # Create the model

  model = Model(inputs=img_input, outputs=x, name='vgg16custom')

  return model

In [None]:
%mv /kaggle/input/vgg-16-wights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5 

In [None]:
IMAGE_SIZE = [224, 224]
vgg = VGG16(input_shape = IMAGE_SIZE + [3], weights = '/kaggle/input/vgg-16-wights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False)

In [None]:
for layer in vgg.layers:
    layer.trainable = False

In [None]:
x = Flatten()(vgg.output)
prediction = Dense(27, activation='softmax')(x)
model = Model(inputs=vgg.input, outputs=prediction)
model.summary()

Compiling the model

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(0.0001), metrics=['accuracy'])

Splitting the training set into train and validation sets

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=67)

In [None]:
%pwd

#Training the Model

In [None]:
epochs = 100
batch_size = 4
start_time = time.time()
history = model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (x_val, y_val))
end_time = time.time()
total_time = end_time - start_time
print('Training time is: ', total_time)
model.save('/kaggle/working/models/plant_disease_model_vgg16.h5')

In [None]:
!nvidia-smi

# Plots for Loss and Accuracy

## Training loss and accuracy

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train', 'val'])
plt.show()

In [None]:
scores = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {scores[1] * 100}')
scores

In [None]:
y_pred = model.predict(x_test)

In [None]:
img = array_to_img(x_test[11])
img

In [None]:
print('original label: ', all_labels[np.argmax(y_test[11])])
print('predicted label: ', all_labels[np.argmax(y_test[11])])
print(y_pred[2])
print((y_test).shape)

In [None]:
for i in range(0, len(y_test)):
    print(all_labels[np.argmax(y_test[i])], ' - ', all_labels[np.argmax(y_pred[i])])