# Plant Disease Detection using CNN
**SmartKisan - Plant Disease Classification Model**

This notebook trains a CNN model to classify plant diseases from leaf images.
- **38 classes** covering diseases across multiple crops
- Uses **Transfer Learning** with MobileNetV2 for better accuracy
- Dataset: Plant Disease Dataset (train/valid/test splits)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/karishma-devs-0/smart_kisan/blob/main/plantDetection/Plant_Disease_Detection.ipynb)

## 1. Setup & Mount Google Drive
Upload your `Plant_Disease_Dataset` folder to Google Drive first.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Set the path to your dataset in Google Drive
# Update this path to match where you uploaded the dataset
DATASET_PATH = '/content/drive/MyDrive/Plant_Disease_Dataset'

TRAIN_DIR = f'{DATASET_PATH}/train'
VALID_DIR = f'{DATASET_PATH}/valid'
TEST_DIR = f'{DATASET_PATH}/test/test'

In [None]:
!pip install -q tensorflow matplotlib seaborn scikit-learn numpy pandas

## 2. Import Libraries

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
import json

print(f'TensorFlow version: {tf.__version__}')
print(f'GPU available: {len(tf.config.list_physical_devices("GPU")) > 0}')

## 3. Configuration

In [None]:
# Model configuration
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 25
LEARNING_RATE = 0.001

# Where to save the trained model
MODEL_SAVE_PATH = '/content/drive/MyDrive/plant_disease_model'
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

## 4. Data Loading & Augmentation

In [None]:
# Training data with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    fill_mode='nearest'
)

# Validation & test data (no augmentation, only rescaling)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

valid_generator = val_datagen.flow_from_directory(
    VALID_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

NUM_CLASSES = train_generator.num_classes
CLASS_NAMES = list(train_generator.class_indices.keys())

print(f'Number of classes: {NUM_CLASSES}')
print(f'Training samples: {train_generator.samples}')
print(f'Validation samples: {valid_generator.samples}')
print(f'\nClasses: {CLASS_NAMES}')

## 5. Visualize Sample Images

In [None]:
# Display sample images from the dataset
batch_images, batch_labels = next(train_generator)

fig, axes = plt.subplots(3, 4, figsize=(15, 10))
for i, ax in enumerate(axes.flat):
    ax.imshow(batch_images[i])
    label_idx = np.argmax(batch_labels[i])
    ax.set_title(CLASS_NAMES[label_idx], fontsize=9)
    ax.axis('off')
plt.suptitle('Sample Training Images', fontsize=16)
plt.tight_layout()
plt.show()

## 6. Build the Model (Transfer Learning - MobileNetV2)

In [None]:
# Load MobileNetV2 pretrained on ImageNet (without top classification layer)
base_model = MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)

# Freeze the base model layers initially
base_model.trainable = False

# Build the full model
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

## 7. Train the Model (Phase 1 - Feature Extraction)

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
    ModelCheckpoint(
        f'{MODEL_SAVE_PATH}/best_model.keras',
        monitor='val_accuracy',
        save_best_only=True
    )
]

print('Phase 1: Training with frozen base model...')
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=valid_generator,
    callbacks=callbacks
)

## 8. Fine-Tune the Model (Phase 2)

In [None]:
# Unfreeze the last 30 layers of the base model for fine-tuning
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

# Recompile with a lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print('Phase 2: Fine-tuning with unfrozen layers...')
history_fine = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=valid_generator,
    callbacks=callbacks
)

## 9. Training History Visualization

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Combine both training phases
acc = history.history['accuracy'] + history_fine.history['accuracy']
val_acc = history.history['val_accuracy'] + history_fine.history['val_accuracy']
loss = history.history['loss'] + history_fine.history['loss']
val_loss = history.history['val_loss'] + history_fine.history['val_loss']

ax1.plot(acc, label='Train Accuracy')
ax1.plot(val_acc, label='Val Accuracy')
ax1.axvline(x=len(history.history['accuracy'])-1, color='gray', linestyle='--', label='Fine-tuning start')
ax1.set_title('Model Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()

ax2.plot(loss, label='Train Loss')
ax2.plot(val_loss, label='Val Loss')
ax2.axvline(x=len(history.history['loss'])-1, color='gray', linestyle='--', label='Fine-tuning start')
ax2.set_title('Model Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()

plt.tight_layout()
plt.savefig(f'{MODEL_SAVE_PATH}/training_history.png', dpi=150)
plt.show()

## 10. Evaluate on Validation Set

In [None]:
# Get predictions
valid_generator.reset()
predictions = model.predict(valid_generator)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = valid_generator.classes

# Classification report
print('Classification Report:\n')
print(classification_report(true_classes, predicted_classes, target_names=CLASS_NAMES))

In [None]:
# Confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(20, 16))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.xticks(rotation=90, fontsize=7)
plt.yticks(fontsize=7)
plt.tight_layout()
plt.savefig(f'{MODEL_SAVE_PATH}/confusion_matrix.png', dpi=150)
plt.show()

## 11. Save the Model & Class Labels

In [None]:
# Save the final model
model.save(f'{MODEL_SAVE_PATH}/plant_disease_model.keras')

# Save class names mapping
class_indices = train_generator.class_indices
# Reverse mapping: index -> class name
index_to_class = {v: k for k, v in class_indices.items()}

with open(f'{MODEL_SAVE_PATH}/class_labels.json', 'w') as f:
    json.dump(index_to_class, f, indent=2)

print(f'Model saved to: {MODEL_SAVE_PATH}/plant_disease_model.keras')
print(f'Class labels saved to: {MODEL_SAVE_PATH}/class_labels.json')

## 12. Test Prediction on a Single Image

In [None]:
from tensorflow.keras.preprocessing import image

def predict_disease(img_path, model, class_names):
    """Predict plant disease from a leaf image."""
    img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    prediction = model.predict(img_array)
    predicted_idx = np.argmax(prediction[0])
    confidence = prediction[0][predicted_idx] * 100

    # Get top 3 predictions
    top3_idx = np.argsort(prediction[0])[-3:][::-1]

    plt.figure(figsize=(8, 6))
    plt.imshow(image.load_img(img_path))
    plt.title(f'Prediction: {class_names[predicted_idx]}\nConfidence: {confidence:.1f}%', fontsize=14)
    plt.axis('off')
    plt.show()

    print('Top 3 Predictions:')
    for idx in top3_idx:
        print(f'  {class_names[idx]}: {prediction[0][idx]*100:.1f}%')

    return class_names[predicted_idx], confidence

In [None]:
# Test with a sample image from the validation set
# Change this path to test with your own images
sample_class = CLASS_NAMES[0]
sample_dir = os.path.join(VALID_DIR, sample_class)
sample_img = os.path.join(sample_dir, os.listdir(sample_dir)[0])

predict_disease(sample_img, model, CLASS_NAMES)

## 13. Upload & Test Your Own Image

In [None]:
from google.colab import files

print('Upload a leaf image to test:')
uploaded = files.upload()

for filename in uploaded.keys():
    print(f'\nPredicting for: {filename}')
    predict_disease(filename, model, CLASS_NAMES)