In [17]:
import os
import shutil

MERGED_ALL = "/kaggle/working/merged_all"
os.makedirs(MERGED_ALL, exist_ok=True)

IMAGE_EXTS = (".jpg", ".jpeg", ".png")

DATASET_PATHS = [
    "/kaggle/input/new-plant-diseases-dataset",
    "/kaggle/input/plantdisease",
    "/kaggle/input/20k-multi-class-crop-disease-images"
]

CROPS = [
    "apple", "tomato", "potato", "corn", "maize", "rice",
    "cotton", "sugarcane", "pepper", "grape", "cherry",
    "peach", "soybean", "blueberry", "raspberry", "squash"
]

def get_crop(name):
    name = name.lower()
    for crop in CROPS:
        if crop in name:
            return crop.capitalize()
    return "UnknownCrop"

DISEASE_TYPE_MAP = {
    "black rot": "Fungal",
    "early blight": "Fungal",
    "late blight": "Fungal",
    "leaf blight": "Fungal",
    "leaf spot": "Fungal",
    "rust": "Fungal",
    "anthracnose": "Fungal",
    "powdery mildew": "Fungal",
    "downy mildew": "Fungal",
    "scab": "Fungal",
    "bacterial": "Bacterial",
    "mosaic": "Viral",
    "yellow leaf curl": "Viral",
    "tungro": "Viral",
    "armyworm": "Pest",
    "aphid": "Pest",
    "mite": "Pest"
}

def get_disease_type(name):
    name = name.lower()
    if "healthy" in name:
        return "Healthy"
    for k, v in DISEASE_TYPE_MAP.items():
        if k in name:
            return v
    return "Disease"

def process_class_folder(src_folder, folder_name):
    crop = get_crop(folder_name)
    disease = get_disease_type(folder_name)
    final_label = f"{crop}__{disease}"

    dst = os.path.join(MERGED_ALL, final_label)
    os.makedirs(dst, exist_ok=True)

    for img in os.listdir(src_folder):
        if img.lower().endswith(IMAGE_EXTS):
            shutil.copy2(
                os.path.join(src_folder, img),
                os.path.join(dst, img)
            )

def fast_merge_dataset(dataset_path):
    print("Processing:", dataset_path)
    for top in os.listdir(dataset_path):
        top_path = os.path.join(dataset_path, top)

        if not os.path.isdir(top_path):
            continue

        if top.lower() in ["train", "val", "valid", "validation", "test"]:
            for cls in os.listdir(top_path):
                cls_path = os.path.join(top_path, cls)
                if os.path.isdir(cls_path):
                    process_class_folder(cls_path, cls)
        else:
            process_class_folder(top_path, top)

for dataset in DATASET_PATHS:
    fast_merge_dataset(dataset)

print("✅ Merge completed.")

Processing: /kaggle/input/new-plant-diseases-dataset
Processing: /kaggle/input/plantdisease
Processing: /kaggle/input/20k-multi-class-crop-disease-images
✅ Merge completed.


In [18]:
import os
print("merged_all exists:", os.path.exists("/kaggle/working/merged_all"))
print("Classes:", os.listdir("/kaggle/working/merged_all")[:10])

merged_all exists: True
Classes: ['Cotton__Healthy', 'Sugarcane__Fungal', 'Sugarcane__Viral', 'Maize__Disease', 'Maize__Healthy', 'Sugarcane__Disease', 'UnknownCrop__Pest', 'UnknownCrop__Healthy', 'Cotton__Pest', 'UnknownCrop__Viral']


In [19]:
import os
import shutil
import random

MERGED_ALL = "/kaggle/working/merged_all"
FINAL_DATASET = "/kaggle/working/FinalDataset"

def split_dataset(src_dir, dest_dir, split=(0.7, 0.2, 0.1)):
    os.makedirs(dest_dir, exist_ok=True)

    for cls in os.listdir(src_dir):
        cls_path = os.path.join(src_dir, cls)
        if not os.path.isdir(cls_path):
            continue

        images = os.listdir(cls_path)
        random.shuffle(images)

        n = len(images)
        t = int(0.7 * n)
        v = int(0.9 * n)

        for phase, subset in zip(
            ["train", "val", "test"],
            [images[:t], images[t:v], images[v:]]
        ):
            out = os.path.join(dest_dir, phase, cls)
            os.makedirs(out, exist_ok=True)

            for img in subset:
                shutil.copy2(
                    os.path.join(cls_path, img),
                    os.path.join(out, img)
                )

split_dataset(MERGED_ALL, FINAL_DATASET)
print("✅ Dataset split completed.")

✅ Dataset split completed.


In [20]:
print(os.listdir("/kaggle/working/FinalDataset"))

['train', 'test', 'val']


In [21]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [22]:
DATASET_DIR = "/kaggle/working/FinalDataset"

TRAIN_DIR = os.path.join(DATASET_DIR, "train")
VAL_DIR   = os.path.join(DATASET_DIR, "val")
TEST_DIR  = os.path.join(DATASET_DIR, "test")

assert os.path.exists(TRAIN_DIR), "❌ Train directory not found"
assert os.path.exists(VAL_DIR),   "❌ Validation directory not found"
assert os.path.exists(TEST_DIR),  "❌ Test directory not found"

print("✅ Dataset paths verified")

✅ Dataset paths verified


In [23]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 3

In [24]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    zoom_range=0.2,
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

In [25]:
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator = val_test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

Found 14893 images belonging to 18 classes.
Found 5881 images belonging to 18 classes.
Found 3138 images belonging to 18 classes.


In [26]:
base_model = MobileNetV2(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3)
)

base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [27]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)
output = Dense(train_generator.num_classes, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=output)

In [28]:
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [29]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)

  self._warn_if_super_not_called()


Epoch 1/3
[1m225/466[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m2:07[0m 530ms/step - accuracy: 0.3078 - loss: 2.3679



[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 551ms/step - accuracy: 0.4084 - loss: 2.0241



[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 664ms/step - accuracy: 0.4087 - loss: 2.0230 - val_accuracy: 0.7613 - val_loss: 0.8397
Epoch 2/3
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 572ms/step - accuracy: 0.7127 - loss: 0.9535 - val_accuracy: 0.8291 - val_loss: 0.5947
Epoch 3/3
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 566ms/step - accuracy: 0.7762 - loss: 0.7296 - val_accuracy: 0.8568 - val_loss: 0.5020


In [30]:
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 282ms/step - accuracy: 0.8411 - loss: 0.5566
Test Accuracy: 85.02%


In [31]:
MODEL_PATH = "/kaggle/working/crop_disease_mobilenetv2.h5"
model.save(MODEL_PATH)
print(f"Model saved at {MODEL_PATH}")



Model saved at /kaggle/working/crop_disease_mobilenetv2.h5


In [32]:
def get_severity(confidence):
    if confidence > 0.85:
        return "High"
    elif confidence > 0.60:
        return "Medium"
    else:
        return "Low"

In [33]:
MAINTENANCE_MAP = {
    "Fungal": "Apply recommended fungicide and avoid excess moisture.",
    "Bacterial": "Remove infected parts and avoid overhead irrigation.",
    "Viral": "Control insect vectors and remove infected plants.",
    "Pest": "Use eco-friendly pesticide or neem-based solutions.",
    "Healthy": "No action required. Maintain monitoring.",
    "Disease": "Consult agricultural expert."
}

In [34]:
from tensorflow.keras.preprocessing import image

class_labels = list(train_generator.class_indices.keys())

def predict_crop_disease(img_path):
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img = image.img_to_array(img) / 255.0
    img = np.expand_dims(img, axis=0)

    preds = model.predict(img)[0]
    idx = np.argmax(preds)

    label = class_labels[idx]
    confidence = float(preds[idx])

    crop, disease_type = label.split("__")
    severity = get_severity(confidence)
    maintenance = MAINTENANCE_MAP.get(disease_type, "General care advised.")

    return {
        "crop": crop,
        "disease_type": disease_type,
        "confidence": confidence,
        "severity": severity,
        "maintenance": maintenance
    }

In [35]:
result = predict_crop_disease("/kaggle/input/new-plant-diseases-dataset/test/test/TomatoEarlyBlight6.JPG")
print(result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step
{'crop': 'UnknownCrop', 'disease_type': 'Disease', 'confidence': 0.7281001210212708, 'severity': 'Medium', 'maintenance': 'Consult agricultural expert.'}


In [36]:
model.save("plant_disease_model.h5")



In [37]:
import os
os.listdir("/content")

['.config', 'plant_disease_model.h5', 'merged_all', 'sample_data']

In [38]:
from google.colab import files
files.download("plant_disease_model.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>