In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("soumiknafiul/plantvillage-dataset-labeled")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'plantvillage-dataset-labeled' dataset.
Path to dataset files: /kaggle/input/plantvillage-dataset-labeled


In [None]:
!pip install -q tensorflow tensorflow-addons scikit-learn


[31mERROR: Could not find a version that satisfies the requirement tensorflow-addons (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow-addons[0m[31m
[0m

In [None]:
import os
from pathlib import Path
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
from pathlib import Path

ROOT = Path("/kaggle/input/plantvillage-dataset-labeled/PlantVillage Dataset (Labeled)")  # ← عدّله لو مكان مختلف

variants = [
    "Color Images",
    "Grayscale Images",
    "Segmented Images"
]

available = [v for v in variants if (ROOT / v).exists()]
print("Found variants:", available)


Found variants: ['Color Images', 'Grayscale Images', 'Segmented Images']


In [None]:
import numpy as np

def gather_variant_paths(variant):
    root = ROOT / variant
    class_dirs = [p for p in sorted(root.iterdir()) if p.is_dir()]
    class_names = [p.name for p in class_dirs]
    paths, labels = [], []

    for idx, c in enumerate(class_dirs):
        for img in c.glob("*"):
            if img.suffix.lower() in [".jpg", ".jpeg", ".png"]:
                paths.append(str(img))
                labels.append(idx)

    return {
        "variant": variant,
        "root": str(root),
        "class_dirs": class_dirs,
        "class_names": class_names,
        "paths": paths,
        "labels": labels
    }

data_info = {}
for v in available:
    info = gather_variant_paths(v)
    print(f"{v}: classes={len(info['class_names'])}, images={len(info['paths'])}")
    data_info[v] = info


Color Images: classes=19, images=15915
Grayscale Images: classes=25, images=27023
Segmented Images: classes=18, images=17842


In [None]:
from sklearn.model_selection import train_test_split

split_info = {}
seed = 123

for v, info in data_info.items():
    paths = np.array(info["paths"])
    labels = np.array(info["labels"])

    # stratified split 80/10/10
    train_paths, rest_paths, train_labels, rest_labels = train_test_split(
        paths, labels, test_size=0.2, random_state=seed, stratify=labels
    )
    val_paths, test_paths, val_labels, test_labels = train_test_split(
        rest_paths, rest_labels, test_size=0.5, random_state=seed, stratify=rest_labels
    )

    split_info[v] = {
        "train_paths": train_paths,
        "train_labels": train_labels,
        "val_paths": val_paths,
        "val_labels": val_labels,
        "test_paths": test_paths,
        "test_labels": test_labels,
        "class_names": info["class_names"]
    }

    print(f"{v} → Train={len(train_paths)}, Val={len(val_paths)}, Test={len(test_paths)}")


Color Images → Train=12732, Val=1591, Test=1592
Grayscale Images → Train=21618, Val=2702, Test=2703
Segmented Images → Train=14273, Val=1784, Test=1785


In [None]:
import tensorflow as tf

IMG_SIZE = (224,224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_path(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_image(image, channels=3, expand_animations=False)
    image = tf.image.resize(image, IMG_SIZE)
    image = tf.keras.applications.efficientnet.preprocess_input(image)
    return image, label

def make_dataset(paths, labels, shuffle=False, augment=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths), seed=seed)
    ds = ds.map(preprocess_path, num_parallel_calls=AUTOTUNE)

    if augment:
        aug = tf.keras.Sequential([
            tf.keras.layers.RandomFlip("horizontal_and_vertical"),
            tf.keras.layers.RandomRotation(0.2),
            tf.keras.layers.RandomZoom(0.15),
        ])
        ds = ds.map(lambda x, y: (aug(x), y), num_parallel_calls=AUTOTUNE)

    return ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)


In [None]:
from tensorflow.keras import layers, Model, Input

def build_model(num_classes):
    base = tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet", input_shape=IMG_SIZE+(3,))
    base.trainable = False

    inputs = Input(shape=IMG_SIZE+(3,))
    x = base(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = Model(inputs, outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


In [None]:
models = {}

for v, sp in split_info.items():
    print("\n==== Training:", v, "====")

    train_ds = make_dataset(sp["train_paths"], sp["train_labels"], augment=False)
    val_ds = make_dataset(sp["val_paths"], sp["val_labels"], shuffle=False)

    num_classes = len(sp["class_names"])
    model = build_model(num_classes)

    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
    ]

    model.fit(train_ds, validation_data=val_ds, epochs=12, callbacks=callbacks)

    models[v] = {
        "model": model,
        "class_names": sp["class_names"]
    }



==== Training: Color Images ====
Epoch 1/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 180ms/step - accuracy: 0.6779 - loss: 1.2256 - val_accuracy: 0.9415 - val_loss: 0.2507
Epoch 2/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 52ms/step - accuracy: 0.9362 - loss: 0.2607 - val_accuracy: 0.9623 - val_loss: 0.1578
Epoch 3/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 52ms/step - accuracy: 0.9537 - loss: 0.1781 - val_accuracy: 0.9667 - val_loss: 0.1274
Epoch 4/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 52ms/step - accuracy: 0.9634 - loss: 0.1396 - val_accuracy: 0.9705 - val_loss: 0.1076
Epoch 5/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 53ms/step - accuracy: 0.9649 - loss: 0.1241 - val_accuracy: 0.9730 - val_loss: 0.0960
Epoch 6/12
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 56ms/step - accuracy: 0.9722 - loss: 0.1051 - val_accuracy: 0.9749

In [None]:
res = predict("/Himachal-Pradesh-apple-scab-photos.jpg", variant="Color Images")
print(res)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
{'variant': 'Color Images', 'label': 'diseased', 'disease_name': 'Apple scab', 'probability': 0.813114583492279}


In [None]:
from pathlib import Path

# فولدر التخزين
SAVE_DIR = Path("/content/saved_models")
SAVE_DIR.mkdir(exist_ok=True, parents=True)

# حفظ كل موديل
for variant, entry in models.items():
    model = entry["model"]

    variant_dir = SAVE_DIR / variant.replace(" ", "_")
    variant_dir.mkdir(exist_ok=True)

    model_path = variant_dir / "model.h5"
    model.save(model_path)

    print(f"[✓] Saved model for {variant} → {model_path}")




[✓] Saved model for Color Images → /content/saved_models/Color_Images/model.h5




[✓] Saved model for Grayscale Images → /content/saved_models/Grayscale_Images/model.h5
[✓] Saved model for Segmented Images → /content/saved_models/Segmented_Images/model.h5
