In [1]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2

In [2]:
# Paths to Kaggle dataset folders
train_dir = "/kaggle/input/healthy-and-diseased-plants/Project Data/Project Data/Train/images"
val_dir = "/kaggle/input/healthy-and-diseased-plants/Project Data/Project Data/Val/images"

# Paths for organized dataset (9 plant types)
output_train_dir = "/kaggle/working/Train_Organized"
output_val_dir = "/kaggle/working/Val_Organized"

In [3]:
# Define mapping for 9 plant types
plant_type_mapping = {
    "Apple___Apple_scab": "Apple",
    "Apple___Black_rot": "Apple",
    "Apple___Cedar_apple_rust": "Apple",
    "Apple___healthy": "Apple",
    "Cherry_(including_sour)___Powdery_mildew": "Cherry",
    "Cherry_(including_sour)___healthy": "Cherry",
    "Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot": "Corn",
    "Corn_(maize)___Common_rust_": "Corn",
    "Corn_(maize)___Northern_Leaf_Blight": "Corn",
    "Corn_(maize)___healthy": "Corn",
    "Grape___Black_rot": "Grape",
    "Grape___Esca_(Black_Measles)": "Grape",
    "Grape___Leaf_blight_(Isariopsis_Leaf_Spot)": "Grape",
    "Grape___healthy": "Grape",
    "Peach___Bacterial_spot": "Peach",
    "Peach___healthy": "Peach",
    "Pepper,_bell___Bacterial_spot": "Pepper",
    "Pepper,_bell___healthy": "Pepper",
    "Potato___Early_blight": "Potato",
    "Potato___Late_blight": "Potato",
    "Potato___healthy": "Potato",
    "Strawberry___Leaf_scorch": "Strawberry",
    "Strawberry___healthy": "Strawberry",
    "Tomato___Bacterial_spot": "Tomato",
    "Tomato___Early_blight": "Tomato",
    "Tomato___Late_blight": "Tomato",
    "Tomato___Leaf_Mold": "Tomato",
    "Tomato___Septoria_leaf_spot": "Tomato",
    "Tomato___Spider_mites Two-spotted_spider_mite": "Tomato",
    "Tomato___Target_Spot": "Tomato",
    "Tomato___Tomato_Yellow_Leaf_Curl_Virus": "Tomato",
    "Tomato___Tomato_mosaic_virus": "Tomato",
    "Tomato___healthy": "Tomato",
}

In [4]:
# Create directories for 9 plant types
for plant_type in set(plant_type_mapping.values()):
    os.makedirs(os.path.join(output_train_dir, plant_type), exist_ok=True)
    os.makedirs(os.path.join(output_val_dir, plant_type), exist_ok=True)

def organize_images(source_dir, target_dir):
    for category in os.listdir(source_dir):
        category_path = os.path.join(source_dir, category)
        if not os.path.isdir(category_path):
            continue
        
        # Check if category belongs to the mapping
        plant_type = plant_type_mapping.get(category)
        if plant_type:
            for image in os.listdir(category_path):
                source_image_path = os.path.join(category_path, image)
                target_image_path = os.path.join(target_dir, plant_type, image)
                shutil.copy(source_image_path, target_image_path)

# Organize training and validation images
organize_images(train_dir, output_train_dir)
organize_images(val_dir, output_val_dir)

print("✅ Images successfully organized into 9 plant types!")

✅ Images successfully organized into 9 plant types!


In [5]:
img_size = (224, 224)  # Resize images

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    output_train_dir,
    target_size=img_size,
    batch_size=32,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_directory(
    output_val_dir,
    target_size=img_size,
    batch_size=32,
    class_mode="categorical"
)

Found 1320 images belonging to 9 classes.
Found 330 images belonging to 9 classes.


In [6]:
# Load MobileNetV2 as the base model
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze the pretrained layers

# Build the model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(9, activation='softmax')  # 9 plant types
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Reduce learning rate if validation loss stops improving
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)

# Stop training if validation loss doesn't improve for 5 epochs
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,  # Train longer, but with early stopping
    callbacks=[lr_reduction, early_stop]
)

Epoch 1/20


  self._warn_if_super_not_called()


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 495ms/step - accuracy: 0.4743 - loss: 1.6816 - val_accuracy: 0.9030 - val_loss: 0.3774 - learning_rate: 0.0010
Epoch 2/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 273ms/step - accuracy: 0.8231 - loss: 0.5499 - val_accuracy: 0.9152 - val_loss: 0.2504 - learning_rate: 0.0010
Epoch 3/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 276ms/step - accuracy: 0.8685 - loss: 0.3739 - val_accuracy: 0.9152 - val_loss: 0.2734 - learning_rate: 0.0010
Epoch 4/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 272ms/step - accuracy: 0.8863 - loss: 0.3648 - val_accuracy: 0.9515 - val_loss: 0.1795 - learning_rate: 0.0010
Epoch 5/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 282ms/step - accuracy: 0.9096 - loss: 0.2571 - val_accuracy: 0.9394 - val_loss: 0.1757 - learning_rate: 0.0010
Epoch 6/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 

In [8]:
model.save("/kaggle/working/plant_type_classifier.h5")
print("✅ Model saved successfully!")

✅ Model saved successfully!


In [9]:
import os
import random
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths
data_dir = "/kaggle/working/Train_Organized"  # Organized dataset from Stage A
categories = os.listdir(data_dir)

# Create pairs of images (Positive & Negative)
pairs = []
labels = []  # 1 = same class, 0 = different class
img_size = (224, 224)

def load_image(image_path):
    """ Load image and convert to array """
    img = load_img(image_path, target_size=img_size)
    img = img_to_array(img) / 255.0  # Normalize
    return img

# Generate Positive and Negative Pairs
for category in categories:
    category_path = os.path.join(data_dir, category)
    images = os.listdir(category_path)

# Positive Pairs (Same Disease)
    for i in range(len(images) - 1):
        img1 = load_image(os.path.join(category_path, images[i]))
        img2 = load_image(os.path.join(category_path, images[i + 1]))
        pairs.append((img1, img2))
        labels.append(1)  # Same class

    # Negative Pairs (Different Diseases)
    other_category = random.choice([c for c in categories if c != category])
    other_images = os.listdir(os.path.join(data_dir, other_category))

    img1 = load_image(os.path.join(category_path, random.choice(images)))
    img2 = load_image(os.path.join(data_dir, other_category, random.choice(other_images)))
    
    pairs.append((img1, img2))
    labels.append(0)  # Different class

# Convert to NumPy Arrays
pairs = np.array(pairs)
labels = np.array(labels)

print("✅ Image pairs generated:", len(pairs))

✅ Image pairs generated: 1320


In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model

# Create CNN Model for Feature Extraction
def build_base_model():
    base_model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        layers.MaxPooling2D(2, 2),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Flatten(),
        layers.Dense(512, activation='relu')
    ])
    return base_model

# Define Input Layers
input_A = Input(shape=(224, 224, 3))
input_B = Input(shape=(224, 224, 3))

# Create Two CNN Branches
base_model = build_base_model()
encoded_A = base_model(input_A)
encoded_B = base_model(input_B)

# Compute Euclidean Distance Between Features
distance = layers.Lambda(lambda x: tf.math.abs(x[0] - x[1]))([encoded_A, encoded_B])

# Fully Connected Layer
output = layers.Dense(1, activation='sigmoid')(distance)

# Build Siamese Model
siamese_model = Model(inputs=[input_A, input_B], outputs=output)

# Compile Model
siamese_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
siamese_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
history = siamese_model.fit(
    [pairs[:, 0], pairs[:, 1]], labels,
    batch_size=32,
    epochs=20,
    validation_split=0.2
)

Epoch 1/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 376ms/step - accuracy: 0.9455 - loss: 0.2569 - val_accuracy: 0.9886 - val_loss: 0.5944
Epoch 2/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 227ms/step - accuracy: 0.9892 - loss: 0.3828 - val_accuracy: 0.9886 - val_loss: 0.0888
Epoch 3/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 226ms/step - accuracy: 0.9968 - loss: 0.0542 - val_accuracy: 0.9886 - val_loss: 0.0883
Epoch 4/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 227ms/step - accuracy: 0.9942 - loss: 0.0494 - val_accuracy: 0.9886 - val_loss: 0.1489
Epoch 5/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 226ms/step - accuracy: 0.9912 - loss: 0.0819 - val_accuracy: 0.9697 - val_loss: 0.1833
Epoch 6/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 225ms/step - accuracy: 0.9933 - loss: 0.0239 - val_accuracy: 0.9621 - val_loss: 0.1509
Epoch 7/20
[1m33/33[0m [

In [12]:
siamese_model.save("/kaggle/working/siamese_disease_recognition.h5")
print("✅ Siamese Model Saved Successfully!")

✅ Siamese Model Saved Successfully!
