In [1]:
### using seed for reproducing same result

import random
random.seed(42)

import numpy as np
np.random.seed(42)

import tensorflow as tf
tf.random.set_seed(42)

KeyboardInterrupt: 

In [None]:
## importing dependencies

import os
import json
from zipfile import ZipFile
from PIL import Image

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

In [None]:
!pip install kaggle --quiet

In [None]:
kaggle_credential = json.load(open('/content/kaggle.json'))

In [None]:
os.environ['Kaggle_Username'] = kaggle_credential['username']
os.environ['Kaggle_key'] = kaggle_credential['key']

### Using **Copy API Command** to download the dataset

In [None]:
!kaggle datasets download -d abdallahalidev/plantvillage-dataset

In [None]:
!ls

In [None]:
with ZipFile('plantvillage-dataset.zip', 'r') as zip_ref:
  zip_ref.extractall()

In [None]:
!ls plantvillage\ dataset

In [None]:
!ls plantvillage\ dataset/*

In [None]:
print(os.listdir('plantvillage dataset'))
print('')
print(len(os.listdir('plantvillage dataset/segmented')))
print(os.listdir('plantvillage dataset/segmented')[:5])

print('')
print(len(os.listdir('plantvillage dataset/grayscale')))
print(os.listdir('plantvillage dataset/grayscale')[:5])

print('')
print(len(os.listdir('plantvillage dataset/color')))
print(os.listdir('plantvillage dataset/color')[:5])


### So there are 38 classes and we will be working with the color dataset

In [None]:
print(len(os.listdir('/content/plantvillage dataset/color/Apple___healthy')))

print(os.listdir('/content/plantvillage dataset/color/Apple___healthy')[:5])

## Data preprocessing

In [None]:
base_dir = '/content/plantvillage dataset/color/'

In [None]:
image_path = '/content/plantvillage dataset/color/Apple___Apple_scab/03eccb1a-0368-4ac7-9f48-7546037b775a___FREC_Scab 3334.JPG'

img = mpimg.imread(image_path)

print(img.shape)

plt.imshow(img)
plt.axis('off')
plt.show()


In [None]:
print(img)

## Train test split

In [None]:
# image parameters

img_size = 224 # usual size which can capture most information
batch_size = 32

In [None]:
data_gen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.2
)


**flow_from_directory**

It assumes a directory structure where each subdirectory represents a class, and the images belonging to that class are stored within that subdirectory. This organization is convenient for large datasets as it helps keep the data organized and easily accessible.


In [None]:
## Train generator

train_generator = data_gen.flow_from_directory(
    base_dir,
    target_size = (img_size, img_size),
    batch_size = batch_size,
    subset = 'training',
    class_mode = 'categorical'
)


In [None]:
## Validation generator

val_generator = data_gen.flow_from_directory(
    base_dir,
    target_size = (img_size, img_size),
    batch_size = batch_size,
    subset = 'validation',
    class_mode = 'categorical'
)

Below command helps you the know the attributes and method of a generator object

In [None]:
print(type(train_generator))

print(dir(train_generator))

## Convulational neural network

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, MaxPooling2D, Conv2D, Dropout

from keras.callbacks import EarlyStopping

In [None]:
model = Sequential([
    Conv2D(32, (3,3), activation = 'relu', input_shape = (img_size, img_size, 3)),
    MaxPooling2D((2,2)),
    Dropout(0.25),

    Conv2D(64, (3,3), activation = 'relu'),
    MaxPooling2D((2,2)),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation = 'relu'),
    Dropout(0.25),
    Dense(128, activation = 'relu'),
    Dense(train_generator.num_classes, activation = 'softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy']
)

early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    epochs = 25,
    validation_data = val_generator,
    validation_steps = val_generator.samples // batch_size,
    callbacks = [early_stopping]
)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.legend(['training data', 'validation data'])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['training data', 'validation data'])

### Building a predictive system

In [None]:
def load_and_preprocess_image(image_path, target_size =(224, 224)):

  img = Image.open(image_path)

  img = img.resize(target_size)

  img_array = np.array(img)

  # add batch dimension as the model needs input in terms of batch (look above)
  img_array = np.expand_dims(img_array, axis = 0)

  img_array = img_array.astype('float32')/255

  return img_array


def predict_image_class(model, image_path, class_indices):
  preprocessed_img = load_and_preprocess_image(image_path)
  predictions = model.predict(preprocessed_img)

  predicted_class_index = np.argmax(predictions, axis = 1)[0]
  predicted_class_name = class_indices[predicted_class_index]

  return predicted_class_name

In [None]:
train_generator.class_indices

In [None]:
# Create a mapping from class indices to class names

class_indices = {val: key for key, val in train_generator.class_indices.items()}

In [None]:
class_indices

#### One example usage


Atm it doesnt have 100% accuracy

In [None]:
image_path = '/content/plantvillage dataset/segmented/Corn_(maize)___Common_rust_/007f6e89-3e7c-4e6a-8ef6-8058c7133799___RS_Rust 1933_final_masked.jpg'
image_path2 = '/content/plantvillage dataset/color/Potato___Late_blight/0051e5e8-d1c4-4a84-bf3a-a426cdad6285___RS_LB 4640.JPG'

predicted_class_name = predict_image_class(model, image_path, class_indices)
print(f'Predicted class name is {predicted_class_name}')


In [None]:
img2 = mpimg.imread('/content/plantvillage dataset/segmented/Blueberry___healthy/01ca7a9f-80a6-4693-a067-50684beda81d___RS_HL 2545_final_masked.jpg')

img3 = mpimg.imread('/content/plantvillage dataset/color/Blueberry___healthy/0137389a-9b78-4a94-afdb-c2b50861f89c___RS_HL 2347.JPG')


fig, axes = plt.subplots(1,2)

axes[0].imshow(img2)
axes[0].axis('off')
axes[0].set_title('Segmented folder')

axes[1].imshow(img2)
axes[1].axis('off')
axes[1].set_title('Colored folder')


plt.show()

### save the model and class indices

In [None]:
json.dump(class_indices, open('class_indices.json', 'w'))

In [None]:
model.save('/Users/pragya/Desktop/LEARN/Kaggle_practice/plant_disease_prediction_model.h5')

In [None]:
model.save('plant_disease_prediction_model.h5')


## NOTE:
use drive.mount for future purposes

In [None]:
## from google.colab import drive
# drive.mount('/content/drive')

# # Save the model to Google Drive
# model.save('/content/drive/My Drive/plant_disease_prediction_model.h5')