# Description of the dataset 📝
This dataset is created using offline augmentation from the original dataset. This dataset consists of about 87K rgb images of healthy and diseased crop leaves which is categorized into 38 different classes. The total dataset is divided into 80/20 ratio of training and validation set preserving the directory structure. A new directory containing 33 test images is created later for prediction purpose.

# Our goal 🎯
Goal is clear and simple. We need to build a model, which can classify between healthy and diseased crop leaves and also if the crop have any disease, predict which disease is it.

# Step 1: Import the Required Modules

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
import warnings
warnings.filterwarnings("ignore")

# Step 2: Define data generators for training and validation data

In [None]:
# image preprocessing
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 128
base_dir = "../input/new-plant-diseases-dataset/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)"

training_set = train_datagen.flow_from_directory(base_dir+'/train',
                                                 target_size=(224, 224),
                                                 batch_size=batch_size,
                                                 class_mode='categorical')

valid_set = valid_datagen.flow_from_directory(base_dir+'/valid',
                                            target_size=(224, 224),
                                            batch_size=batch_size,
                                            class_mode='categorical')

# 🧭 Exploring the data 🧭

In [None]:
class_dict = training_set.class_indices
total_classes = list(class_dict.keys())
print(total_classes)

# Step 3: Load the pre-trained VGG16 model and add custom layers for classification task

In [None]:
# Load the pre-trained VGG16 model (excluding the top classification layers)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the weights of the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers on top of the pre-trained model
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(38, activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=x)

# Step 4: Compile the Model

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 5: Train the model

In [None]:
epochs = 30  # Adjust the number of epochs as needed
history = model.fit(training_set, validation_data=valid_set, epochs=epochs)

In [None]:
history.history

In [None]:
#plotting training values
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

#accuracy plot
plt.plot(epochs, acc, color='green', label='Training Accuracy')
plt.plot(epochs, val_acc, color='blue', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.figure()
#loss plot
plt.plot(epochs, loss, color='pink', label='Training Loss')
plt.plot(epochs, val_loss, color='red', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img

# Define the path to the image you want to predict
image_path = "../input/new-plant-diseases-dataset/test/test/TomatoEarlyBlight1.JPG"

# Load and preprocess the image
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_array = tf.expand_dims(img_array, axis=0)  # Add batch dimension
img_array /= 255.0  # Normalize the image

# Make predictions
predictions = model.predict(img_array)

d = predictions.flatten()
j = d.max()
for index,item in enumerate(d):
    if item == j:
        class_name = total_classes[index]

# Get the class label with the highest predicted probability
predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]

# Print the predicted class label
print(f"Predicted Class: {predicted_class}")

#ploting image with predicted class name        
plt.figure(figsize = (4,4))
plt.imshow(img)
plt.axis('off')
plt.title(class_name)
plt.show()