# Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import random
import cv2
import tensorflow as tf
from PIL import Image
from keras.preprocessing import image
from tensorflow.keras.utils import img_to_array, array_to_img
from keras.optimizers import AdamW, Adam
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout, Input
from sklearn.model_selection import train_test_split
from keras.models import model_from_json
from keras.utils import to_categorical
from keras.applications import MobileNet
from tensorflow.keras.metrics import Accuracy, Precision, Recall, F1Score
import time
from keras.callbacks import Callback
from concurrent.futures import ProcessPoolExecutor



In [None]:
tf.__version__

In [None]:
!nvidia-smi

# Resizing the images to (224, 224) as required by MobileNet

## Images aren't resized

In [None]:
#img_path = '/content/drive/MyDrive/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img_path = '/kaggle/input/plantvillage/Dataset-color/Apple___Apple_scab/00075aa8-d81a-4184-8541-b692b78d398a___frec_scab_3335.jpg'
img = mpimg.imread(img_path)
print(img.shape)
plt.imshow(img)
plt.axis('off')
plt.show()

## Resizing the images

In [None]:
inp_dir = '/kaggle/input/plant-disease-classifcation/Plant-Disease-v1/Dataset'

target_size = (224, 224)

print(os.listdir(inp_dir))

for folder in os.listdir(inp_dir):


  folder_path = os.path.join(inp_dir, folder)
  if os.path.isdir(folder_path):

    print(f'Processing images in the folder: {folder}')
    for filename in os.listdir(folder_path):

      if filename.endswith(('.jpg', '.jpeg', '.png')):



        #load the img
        img_path = os.path.join(folder_path, filename)
        img = mpimg.imread(img_path)

        #resizing
        resized_img = cv2.resize(img, target_size)

        #overwriting the input images with the resized images
        mpimg.imsave(img_path, resized_img)


print('Done with Resizing')

## Checking whether or not the image have been resized

In [None]:
#img_path = '/content/drive/MyDrive/Plant-Disease-v1/Dataset/Bell_pepper leaf/Bell_pepper leaf (10).jpg'
img_path = '/kaggle/input/plantvillage/Dataset-color/Apple___Apple_scab/00075aa8-d81a-4184-8541-b692b78d398a___frec_scab_3335.jpg'
img = mpimg.imread(img_path)
print(img.shape)
plt.imshow(img)
plt.axis('off')
plt.show()

# Converting the images to array and normalizing them

In [2]:
def convert_img_to_array(image_dir):
  try:
    img = cv2.imread(image_dir)
    if img is not None:
      return img_to_array(img)
    else:
      return np.array([])

  except Exception as e:
    print(f'Error: {e}')
    return None

# Reading the images and converting them into an array

In [3]:
def process_directory(args):
    directory, temp = args
    images = []
    labels = []
    for files in os.listdir(directory):
        img_path = os.path.join(directory, files)
        images.append(convert_img_to_array(img_path))
        labels.append(binary_labels[temp])
    return images, labels

## Parallel Processing the image_list and label_list

In [4]:
dir = '/kaggle/input/plantvillage/Dataset-color'
all_labels = os.listdir(dir)
binary_labels = list(range(38))

# Use ProcessPoolExecutor for parallel processing
with ProcessPoolExecutor() as executor:
    args_list = [(os.path.join(dir, directory), temp) for temp, directory in enumerate(all_labels[:19])]
    results = list(executor.map(process_directory, args_list))

# Unpack the results
image_list, label_list = zip(*results)
image_list = [item for sublist in image_list for item in sublist]
label_list = [item for sublist in label_list for item in sublist]

## Reading the images one by one and converting them into an array

In [None]:
'''
#dir = '/content/drive/MyDrive/Plant-Disease-v1/Dataset'
dir = '/kaggle/input/plantvillage/Dataset-color'
image_list, label_list = [], []
#all_labels = os.listdir('/content/drive/MyDrive/Plant-Disease-v1/Dataset')
all_labels = os.listdir('/kaggle/input/plantvillage/Dataset-color')
binary_labels = []
#temp = -1

binary_labels = list(range(38))

#Reading and Converting the image into to numpy array
#Since the datasize of PlantVillage is >50,000 images, the process is divided and processed in two batches
#as the cpu was unable to process this in a single go

#Processing the first 19 directories
for temp, directory in enumerate(all_labels[:19]):
    plant_img_list = os.listdir(f'{dir}/{directory}')
    for files in plant_img_list:
        img_path = f'{dir}/{directory}/{files}'
        image_list.append(convert_img_to_array(img_path))
        label_list.append(binary_labels[temp])

#Use the code below if you have high performing CPUs
for directory in all_labels:
  plant_img_list = os.listdir(f'{dir}/{directory}')
  temp += 1
  for files in plant_img_list:
    img_path = f'{dir}/{directory}/{files}'
    image_list.append(convert_img_to_array(img_path))
    label_list.append(binary_labels[temp])
    '''

In [None]:
'''
#Processing the next 19 directories
for temp, directory in enumerate(all_labels[19:]):
    plant_img_list = os.listdir(f'{dir}/{directory}')
    for files in plant_img_list:
        img_path = f'{dir}/{directory}/{files}'
        image_list.append(convert_img_to_array(img_path))
        label_list.append(binary_labels[temp + 19])
        '''

Checking the frequecny of images in each class

In [5]:
labels_count = pd.DataFrame(label_list).value_counts()

# Splitting the data into train test split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(image_list, label_list, test_size = 0.2, random_state = 67)

# Normalizing the data

In [None]:
x_train = np.array(x_train, dtype = np.float16) / 255.0
x_test = np.array(x_test, dtype = np.float16) / 255.0
x_train = x_train.reshape(-1, 224, 224, 3)
x_test = x_test.reshape(-1, 224, 224, 3)

# One Hot Encoding 

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Creating the model

In [None]:
IMAGE_SIZE = [224, 224]
mobilenet = MobileNet(input_shape = IMAGE_SIZE + [3], weights = '/kaggle/input/mobilenet-pre-trained-weights/mobilenet_1_0_192_tf_no_top.h5', include_top=False)

In [None]:
for layer in mobilenet.layers:
    layer.trainable = False

In [None]:
x = Flatten()(mobilenet.output)
prediction = Dense(38, activation='softmax')(x)
model = Model(inputs=mobilenet.input, outputs=prediction)
model.summary()

# Compiling the model

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(0.0001), metrics=['accuracy'])

# Splitting the training set into train and validation set

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=67)

# Training the Model

In [None]:
epochs = 100
batch_size = 4
start_time = time.time()
history = model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (x_val, y_val))
end_time = time.time()
total_time = end_time - start_time
print('Training time is: ', total_time)
model.save('/kaggle/working/models/plant_disease_model_mobilemet.h5')