<a href="https://www.kaggle.com/code/logeswarig/notebookf81beea1c2?scriptVersionId=283165016" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import cv2
import numpy as np
import pandas as pd

POTATO_PATH = "/kaggle/input/plantvillage-potato-disease-dataset"
MANGO_PATH = "/kaggle/input/mango-leaf-disease-dataset"

print("Potato path:", POTATO_PATH)
print("Mango path:", MANGO_PATH)
def load_dataset(path):
    images = []
    labels = []

    for label in os.listdir(path):
        class_dir = os.path.join(path, label)

        if os.path.isdir(class_dir):  # Only process folders
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)

                try:
                    img = cv2.imread(img_path)
                    if img is not None:
                        img = cv2.resize(img, (128, 128))
                        images.append(img)
                        labels.append(label)
                except:
                    pass

    return np.array(images), np.array(labels)


In [None]:
potato_images, potato_labels = load_dataset(POTATO_PATH)
mango_images, mango_labels = load_dataset(MANGO_PATH)

print("Potato dataset loaded:", potato_images.shape, potato_labels.shape)
print("Mango dataset loaded:", mango_images.shape, mango_labels.shape)


In [None]:
import os

print("Potato Dataset Structure:")
for root, dirs, files in os.walk("/kaggle/input/plantvillage-potato-disease-dataset"):
    print(root, "| Dirs:", dirs, "| Files:", files[:5])


In [None]:
import os
import numpy as np
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# ---------------------------------------------------
# Correct Dataset Paths
# ---------------------------------------------------
POTATO_PATH = "/kaggle/input/plantvillage-potato-disease-dataset/PlantVillage"
MANGO_PATH = "/kaggle/input/mango-leaf-disease-dataset"

IMG_HEIGHT = 224
IMG_WIDTH = 224

# ---------------------------------------------------
# Function to Load Dataset
# ---------------------------------------------------
def load_dataset(path):
    images = []
    labels = []

    for label in os.listdir(path):
        class_dir = os.path.join(path, label)
        if not os.path.isdir(class_dir):
            continue
        
        print("Loading:", label)

        for img_name in tqdm(os.listdir(class_dir)):
            img_path = os.path.join(class_dir, img_name)

            try:
                img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                img = img_to_array(img)

                # Normalization (0 to 1)
                img = img / 255.0

                images.append(img)
                labels.append(label)
            except:
                pass

    return np.array(images), np.array(labels)

# ---------------------------------------------------
# Load Both Datasets
# ---------------------------------------------------
potato_images, potato_labels = load_dataset(POTATO_PATH)
mango_images, mango_labels = load_dataset(MANGO_PATH)

print("Potato dataset loaded:", potato_images.shape, potato_labels.shape)
print("Mango dataset loaded:", mango_images.shape, mango_labels.shape)

# ---------------------------------------------------
# Combine Datasets
# ---------------------------------------------------
X = np.concatenate([potato_images, mango_images], axis=0)
y = np.concatenate([potato_labels, mango_labels], axis=0)

print("\nFinal Combined Dataset:", X.shape, y.shape)


In [None]:
# ============================================
# 1. IMPORT LIBRARIES
# ============================================
import os
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator

# ============================================
# 2. DATASET PATHS
# ============================================
POTATO_PATH = "/kaggle/input/plantvillage-potato-disease-dataset/PlantVillage"
MANGO_PATH = "/kaggle/input/mango-leaf-disease-dataset"

IMG_HEIGHT = 224
IMG_WIDTH = 224

# ============================================
# 3. FUNCTION TO LOAD + RESIZE + NORMALIZE IMAGES
# ============================================
def load_dataset(path):
    images = []
    labels = []

    for label in os.listdir(path):
        class_dir = os.path.join(path, label)
        if not os.path.isdir(class_dir):
            continue
        
        print("Loading:", label)

        for img_name in tqdm(os.listdir(class_dir)):
            img_path = os.path.join(class_dir, img_name)

            try:
                # Load + Resize
                img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                img = img_to_array(img)

                # Pixel Normalization (Equation 1)
                img = (img - img.min()) / (img.max() - img.min())

                images.append(img)
                labels.append(label)

            except:
                pass

    return np.array(images), np.array(labels)

# ============================================
# 4. LOAD BOTH DATASETS
# ============================================
potato_images, potato_labels = load_dataset(POTATO_PATH)
mango_images, mango_labels = load_dataset(MANGO_PATH)

print("\nPotato dataset:", potato_images.shape, potato_labels.shape)
print("Mango dataset:", mango_images.shape, mango_labels.shape)

# ============================================
# 5. MERGE DATASETS
# ============================================
X = np.concatenate([potato_images, mango_images], axis=0)
y = np.concatenate([potato_labels, mango_labels], axis=0)

print("\nCombined Dataset:", X.shape, y.shape)

# ============================================
# 6. LABEL ENCODING (String → Integer)
# ============================================
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

print("\nClasses:", encoder.classes_)
print("Encoded labels example:", y_encoded[:10])

# ============================================
# 7. DATA AUGMENTATION
# ============================================
augmentor = ImageDataGenerator(
    rotation_range=20,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    brightness_range=(0.7, 1.3),
    fill_mode='nearest'
)

# ============================================
# 8. TRAIN–TEST SPLIT (80:20) — Stratified
# ============================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.20, stratify=y_encoded, random_state=42
)

print("\nTraining samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

# ============================================
# 9. VALIDATION SPLIT FROM TRAINING SET (10%)
# ============================================
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.10, stratify=y_train, random_state=42
)

print("\nFinal Train:", X_train.shape)
print("Validation:", X_val.shape)
print("Test:", X_test.shape)


In [None]:
# ============================================
# 1. IMPORT LIBRARIES
# ============================================
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


# ============================================
# 2. BASELINE CNN MODEL (With Dropout = 0.4)
# ============================================
def build_baseline_cnn(input_shape=(224, 224, 3), num_classes=2, dropout_rate=0.4):

    model = Sequential()

    # ---- Convolution Block 1 ----
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same',
                     kernel_initializer='he_normal', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))

    # ---- Convolution Block 2 ----
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same',
                     kernel_initializer='he_normal'))
    model.add(MaxPooling2D((2, 2)))

    # ---- Convolution Block 3 ----
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same',
                     kernel_initializer='he_normal'))
    model.add(MaxPooling2D((2, 2)))

    # ---- Flatten ----
    model.add(Flatten())

    # ---- Dense Layer + Dropout (0.4) ----
    model.add(Dense(256, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(dropout_rate))

    # ---- Output Layer ----
    model.add(Dense(num_classes, activation='softmax'))

    return model


# ============================================
# 3. COMPILE MODEL WITH ADAM (LR = 0.0001)
# ============================================
def compile_model(model, learning_rate=0.0001):

    optimizer = Adam(learning_rate=learning_rate)

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# ============================================
# 4. TRAINING FUNCTION WITH
#    • Early Stopping (patience=5)
#    • LR Scheduler (ReduceLROnPlateau)
# ============================================
def train_model(model, X_train, y_train, X_val, y_val,
                epochs=30, batch_size=32):

    # Early Stopping
    early_stop = EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )

    # Learning Rate Scheduler
    lr_scheduler = ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.2,            # Reduce LR by factor
        patience=2,            # If no improvement for 2 epochs
        min_lr=1e-7,
        verbose=1
    )

    # Training
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        callbacks=[early_stop, lr_scheduler]
    )

    return history


# ============================================
# 5. EVALUATION FUNCTION
# ============================================
def evaluate_model(model, X_test, y_test):
    loss, acc = model.evaluate(X_test, y_test, verbose=1)
    print(f"\nTest Accuracy: {acc * 100:.2f}%")
    print(f"Test Loss: {loss:.4f}")
    return loss, acc


# ============================================
# 6. HOW TO USE
# ============================================
# num_classes = len(np.unique(y_train))

# model = build_baseline_cnn(input_shape=(224,224,3), num_classes=num_classes)
# model = compile_model(model, learning_rate=0.0001)
# history = train_model(model, X_train, y_train, X_val, y_val)
# evaluate_model(model, X_test, y_test)
# ============== RUN MODEL TRAINING ==============

num_classes = len(np.unique(y_train))

model = build_baseline_cnn(
    input_shape=(224, 224, 3),
    num_classes=num_classes,
    dropout_rate=0.4
)

model = compile_model(model, learning_rate=0.0001)

print("\nTraining Started...\n")

history = train_model(
    model,
    X_train, y_train,
    X_val, y_val,
    epochs=30,
    batch_size=32
)

print("\nTraining Completed!\n")

# ============== RUN MODEL EVALUATION ==============

evaluate_model(model, X_test, y_test)
