In [None]:
# Install Kaggle (if not present) and import libraries
!pip install -q kaggle

# Standard imports
import os, shutil, random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

print("TensorFlow version:", tf.__version__)

print("GPU available:", tf.test.is_gpu_available())


In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
#for dataset download
!kaggle datasets download -d emmarex/plantdisease

# Unzip into folder 'plant_disease'
!unzip -q plantdisease.zip -d plant_disease
!ls -la plant_disease | sed -n '1,120p'

In [None]:
data_dir = "/kaggle/input/plantdisease/PlantVillage"   #  dataset folder 
base_dir = "/kaggle/working/dataset_binary"


In [None]:
# Prepare binary dataset: Healthy vs Diseased

data_dir = "/kaggle/input/plantdisease/PlantVillage"
base_dir = "/kaggle/working/dataset_binary"

import shutil
from sklearn.model_selection import train_test_split
for split in ["train","val","test"]:
    for cls in ["Healthy","Diseased"]:
        os.makedirs(os.path.join(base_dir, split, cls), exist_ok=True)

# Move/copy images into binary folders
for cls in sorted(os.listdir(data_dir)):
    cls_path = os.path.join(data_dir, cls)
    if not os.path.isdir(cls_path):
        continue
    label = "Healthy" if "healthy" in cls.lower() else "Diseased"
    imgs = [os.path.join(cls_path, f) for f in os.listdir(cls_path) if f.lower().endswith(('.png','.jpg','.jpeg'))]
    if len(imgs) == 0:
        continue
    train, test = train_test_split(imgs, test_size=0.2, random_state=42)
    val, test = train_test_split(test, test_size=0.5, random_state=42)
    for subset, subset_dir in zip([train, val, test], [os.path.join(base_dir,'train',label), os.path.join(base_dir,'val',label), os.path.join(base_dir,'test',label)]):
        for img in subset:
            try:
                shutil.copy(img, subset_dir)
            except Exception as e:
                print("Could not copy", img, "=>", e)

# Quick counts
for split in ["train","val","test"]:
    print("\nCounts in", split)
    for cls in ["Healthy","Diseased"]:
        p = os.path.join(base_dir, split, cls)
        n = len([f for f in os.listdir(p) if f.lower().endswith(('.png','.jpg','.jpeg'))])
        print(f"  {cls}: {n}")

In [None]:
#imageDataGenerator
IMG_SIZE = (224,224)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    os.path.join(base_dir,'train'), 
    target_size=IMG_SIZE, 
    batch_size=BATCH_SIZE, 
    class_mode='binary'
)
val_gen = val_test_datagen.flow_from_directory(
    os.path.join(base_dir,'val'), 
    target_size=IMG_SIZE, 
    batch_size=BATCH_SIZE, 
    class_mode='binary'
)
test_gen = val_test_datagen.flow_from_directory(
    os.path.join(base_dir,'test'), 
    target_size=IMG_SIZE, 
    batch_size=BATCH_SIZE, 
    class_mode='binary', 
    shuffle=False
)


In [None]:
import os, shutil
from sklearn.model_selection import train_test_split

# Kaggle dataset path
data_dir = "/kaggle/input/plantdisease/PlantVillage"   #  new dataset folder 
base_dir = "/kaggle/working/dataset_binary"            # dataset save 

# Create binary directories
for split in ["train","val","test"]:
    for cls in ["Healthy","Diseased"]:
        os.makedirs(os.path.join(base_dir, split, cls), exist_ok=True)

# Move/copy images into binary folders
for cls in sorted(os.listdir(data_dir)):
    cls_path = os.path.join(data_dir, cls)
    if not os.path.isdir(cls_path):
        continue
    label = "Healthy" if "healthy" in cls.lower() else "Diseased"
    imgs = [os.path.join(cls_path, f) for f in os.listdir(cls_path) if f.lower().endswith(('.png','.jpg','.jpeg'))]
    if len(imgs) == 0:
        continue
    train, test = train_test_split(imgs, test_size=0.2, random_state=42)
    val, test = train_test_split(test, test_size=0.5, random_state=42)
    for subset, subset_dir in zip(
        [train, val, test],
        [os.path.join(base_dir,'train',label),
         os.path.join(base_dir,'val',label),
         os.path.join(base_dir,'test',label)]
    ):
        for img in subset:
            try:
                shutil.copy(img, subset_dir)
            except Exception as e:
                print("Could not copy", img, "=>", e)

# Quick counts
for split in ["train","val","test"]:
    print("\nCounts in", split)
    for cls in ["Healthy","Diseased"]:
        p = os.path.join(base_dir, split, cls)
        n = len([f for f in os.listdir(p) if f.lower().endswith(('.png','.jpg','.jpeg'))])
        print(f"  {cls}: {n}")


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os


# Dataset base directory (already created in the previous step)
base_dir = "/kaggle/working/dataset_binary"

# ImageDataGenerators (augmentation for train)   image size
IMG_SIZE = (224,224)
BATCH_SIZE = 32

# Train data generator with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)
# Validation/Test generator (only rescale)
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_gen = train_datagen.flow_from_directory(
    os.path.join(base_dir,'train'), 
    target_size=IMG_SIZE, 
    batch_size=BATCH_SIZE, 
    class_mode='binary'
)
val_gen = val_test_datagen.flow_from_directory(
    os.path.join(base_dir,'val'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
test_gen = val_test_datagen.flow_from_directory(
    os.path.join(base_dir,'test'),
    target_size=IMG_SIZE, 
    batch_size=BATCH_SIZE, 
    class_mode='binary', 
    shuffle=False
)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# ---------------- CNN MODEL ----------------
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(224,224,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # binary classification
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train CNN
history_cnn = cnn_model.fit(train_gen, validation_data=val_gen, epochs=10)


# ---------------- Evaluate CNN ----------------
y_true = test_gen.classes
y_prob_cnn = cnn_model.predict(test_gen)
y_pred_cnn = (y_prob_cnn > 0.5).astype('int32')

print("✅ CNN Classification Report:\n", classification_report(y_true, y_pred_cnn))





In [None]:
# ---------------- MobileNetV2 MODEL ----------------
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # freeze base layers

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

mobilenet_model = Model(inputs=base_model.input, outputs=output)
mobilenet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train MobileNetV2
history_mobilenet = mobilenet_model.fit(train_gen, validation_data=val_gen, epochs=10)


# ---------------- Evaluate MobileNetV2 ----------------
y_prob_mnet = mobilenet_model.predict(test_gen)
y_pred_mnet = (y_prob_mnet > 0.5).astype('int32')

print("✅ MobileNetV2 Classification Report:\n", classification_report(y_true, y_pred_mnet))


# ---------------- Confusion Matrix (MobileNetV2) ----------------
cm = confusion_matrix(y_true, y_pred_mnet)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=["Healthy","Diseased"], yticklabels=["Healthy","Diseased"], cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix (MobileNetV2)")
plt.show()

In [None]:
# ---------------- Summary Table ----------------
results = {
    "Model": ["CNN","MobileNetV2"],
    "Accuracy": [accuracy_score(y_true, y_pred_cnn), accuracy_score(y_true, y_pred_mnet)],
    "Precision": [precision_score(y_true, y_pred_cnn), precision_score(y_true, y_pred_mnet)],
    "Recall": [recall_score(y_true, y_pred_cnn), recall_score(y_true, y_pred_mnet)],
    "F1": [f1_score(y_true, y_pred_cnn), f1_score(y_true, y_pred_mnet)]
}
df_results = pd.DataFrame(results)
print("\n✅ Summary Table:\n", df_results)

In [None]:
# Grad-CAM example for MobileNetV2 (choose a conv layer name if needed)
import tensorflow as tf
import cv2, numpy as np
def get_gradcam_heatmap(model, img_array, last_conv_layer_name):
    grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, 0]
    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0,1,2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = np.maximum(heatmap, 0) / (np.max(heatmap) + 1e-8)
    return heatmap

# Use a test image
if len(test_gen.filepaths) > 0:
    img_path = test_gen.filepaths[0]
    img = tf.keras.utils.load_img(img_path, target_size=IMG_SIZE)
    img_array = tf.keras.utils.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    # Last conv layer name in MobileNetV2 often 'Conv_1' but may vary
    layer_name = None
    for layer in mobilenet_model.layers[::-1]:
        if isinstance(layer, tf.keras.layers.Conv2D):
            layer_name = layer.name
            break
    print("Using last conv layer:", layer_name)
    heatmap = get_gradcam_heatmap(mobilenet_model, img_array, layer_name)
    plt.matshow(heatmap)
    plt.title("Grad-CAM heatmap")
    plt.show()
else:
    print("No test images found.")

In [None]:
# Save trained models to Drive (optional)
os.makedirs('/content/drive/MyDrive/plant_models', exist_ok=True)
cnn_model.save('/content/drive/MyDrive/plant_models/cnn_model.keras')
mobilenet_model.save('/content/drive/MyDrive/plant_models/mobilenet_model.keras')
print("Models saved to /content/drive/MyDrive/plant_models/")

In [None]:
# ✅ Save trained models in Kaggle
import os

save_dir = "/kaggle/working/plant_models"
os.makedirs(save_dir, exist_ok=True)

cnn_model.save(os.path.join(save_dir, "cnn_model.keras"))
mobilenet_model.save(os.path.join(save_dir, "mobilenet_model.keras"))

print(f"✅ Models saved in {save_dir}")


In [None]:
import os

# Dataset folder path
data_dir = "/kaggle/input/plantdisease"

# Check files inside
print(os.listdir(data_dir))


In [None]:
# ---------------- Training Curves (CNN) ----------------
plt.figure(figsize=(12,5))

# Accuracy
plt.subplot(1,2,1)
plt.plot(history_cnn.history['accuracy'], label='Train Accuracy')
plt.plot(history_cnn.history['val_accuracy'], label='Val Accuracy')
plt.title("CNN Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Loss
plt.subplot(1,2,2)
plt.plot(history_cnn.history['loss'], label='Train Loss')
plt.plot(history_cnn.history['val_loss'], label='Val Loss')
plt.title("CNN Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.show()




In [None]:
# ---------------- Training Curves (MobileNetV2) ----------------
plt.figure(figsize=(12,5))

# Accuracy
plt.subplot(1,2,1)
plt.plot(history_mobilenet.history['accuracy'], label='Train Accuracy')
plt.plot(history_mobilenet.history['val_accuracy'], label='Val Accuracy')
plt.title("MobileNetV2 Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Loss
plt.subplot(1,2,2)
plt.plot(history_mobilenet.history['loss'], label='Train Loss')
plt.plot(history_mobilenet.history['val_loss'], label='Val Loss')
plt.title("MobileNetV2 Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.show()