In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [3]:
DATASET_PATH = 'dataset/crop_diseases'
CACHE_DIR = 'dataset/cache'

In [4]:
def load_images(dataset_path, cache, batch_size=500):
    batch_images = []
    batch_labels = []
    label_map = {}
    all_batch_files = []    

    # Load from cache if exists
    batch_files = [f for f in os.listdir(cache) if f.startswith("images_batch_")]
    if batch_files:
        print("🔁 Loading cached batches...")
        # load all batches
        all_batch_files = []
        for batch_index in range(len(batch_files)):
            image_file = os.path.join(cache, f'images_batch_{batch_index}.npy')
            label_file = os.path.join(cache, f'labels_batch_{batch_index}.npy')
            all_batch_files.append((image_file, label_file))
            
        images = np.concatenate([np.load(file[0]) for file in all_batch_files], axis=0)
        labels = np.concatenate([np.load(file[1]) for file in all_batch_files], axis=0)
        label_map = np.load(os.path.join(cache, "label_map.npy"), allow_pickle=True).item()
        print("✅ Loaded cached batches.")
        print(f"🗺️ Label map loaded with {len(label_map)} classes.")
        
    else:
        print("🧪 Preprocessing and caching data...")
        if not os.path.exists(cache):
            os.makedirs(cache)

        class_id = 0
        batch_index = 0
        for folder_name in os.listdir(dataset_path):
            folder_path = os.path.join(dataset_path, folder_name)
            if os.path.isdir(folder_path):
                label_map[folder_name] = class_id
                for image_name in os.listdir(folder_path):
                    image_path = os.path.join(folder_path, image_name)
                    image = cv2.imread(image_path)
                    if image is not None:
                        image = cv2.resize(image, (128, 128))
                        image = image.astype(np.float32)/ 255.0
                        batch_images.append(image)
                        batch_labels.append(class_id)
                        # Save batch
                    if len(batch_images) == batch_size:
                        image_file = os.path.join(CACHE_DIR, f'images_batch_{batch_index}.npy')
                        label_file = os.path.join(CACHE_DIR, f'labels_batch_{batch_index}.npy')

                        np.save(image_file, np.array(batch_images, dtype=np.float32))
                        np.save(label_file, np.array(batch_labels))

                        all_batch_files.append((image_file, label_file))
                        print(f"✅ Saved batch {batch_index} with {len(batch_images)} images.")

                        batch_images = []
                        batch_labels = []
                        batch_index += 1
                class_id += 1
        if batch_images:
            image_file = os.path.join(CACHE_DIR, f'images_batch_{batch_index}.npy')
            label_file = os.path.join(CACHE_DIR, f'labels_batch_{batch_index}.npy')

            np.save(image_file, np.array(batch_images, dtype=np.float32))
            np.save(label_file, np.array(batch_labels))

            all_batch_files.append((image_file, label_file))
            print(f"✅ Saved final batch {batch_index} with {len(batch_images)} images.")

        np.save(os.path.join(CACHE_DIR, "label_map.npy"), label_map)
        print("🗺️ Label map saved.")
        images = np.concatenate([np.load(file[0]) for file in all_batch_files], axis=0)
        labels = np.concatenate([np.load(file[1]) for file in all_batch_files], axis=0)
    return images, labels, label_map

In [5]:
X, y, label_map = load_images(DATASET_PATH, CACHE_DIR)

🔁 Loading cached batches...
✅ Loaded cached batches.
🗺️ Label map loaded with 71 classes.


In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
y_train = to_categorical(y_train, num_classes=len(label_map))
y_val = to_categorical(y_val, num_classes=len(label_map))

In [8]:
print(f"Training samples: {len(X_train)}, Validation samples: {len(X_val)}")
print(f"Label map: {label_map}")
print(f"Number of classes: {len(label_map)}")

Training samples: 24086, Validation samples: 6022
Label map: {'Cauliflower___Bacterial_spot_rot': 0, 'Cauliflower___Black_Rot': 1, 'Cauliflower___Downy_Mildew': 2, 'Cauliflower___Healthy': 3, 'Coffee___Healthy': 4, 'Coffee___Leaf_Miner': 5, 'Coffee___Phoma': 6, 'Coffee___Red_Spider_Mite': 7, 'Coffee___Rust': 8, 'Corn_(maize)___Cercospora_Leaf_Spot': 9, 'Corn_(maize)___Common_Rust': 10, 'Corn_(maize)___Healthy': 11, 'Corn_(maize)___Northern_Leaf_Blight': 12, 'Cotton___Aphids': 13, 'Cotton___Army_Worm': 14, 'Cotton___Bacterial_Blight': 15, 'Cotton___Healthy': 16, 'Cotton___Powdery_Mildew': 17, 'Cotton___Target_Spot': 18, 'Eggplant___Healthy': 19, 'Eggplant___Insect_Pest': 20, 'Eggplant___Leaf_Spot': 21, 'Eggplant___Mosaic_Virus': 22, 'Eggplant___White_Mold': 23, 'Eggplant___Wilt': 24, 'Mango___Anthracnose': 25, 'Mango___Bacterial_Canker': 26, 'Mango___Die_Back': 27, 'Mango___Gall_Midge': 28, 'Mango___Healthy': 29, 'Mango___Powdery_Mildew': 30, 'Mango___Sooty_Mould': 31, 'Potato___Early_B

In [9]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator()
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)

In [10]:
cnn = Sequential()

# Block 1
cnn.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 128, 3),
               kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.25))

# Block 2
cnn.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.3))

# Block 3
cnn.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(BatchNormalization())
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.4))

# Replace Flatten with GlobalAveragePooling2D
cnn.add(GlobalAveragePooling2D())

# Fully Connected Layer
cnn.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
cnn.add(Dropout(0.5))

# Output Layer
cnn.add(Dense(len(label_map), activation='softmax'))

# Compile
cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

cnn.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 128, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 128, 128, 32)     128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 32)      9248      
                                                                 
 batch_normalization_1 (Batc  (None, 128, 128, 32)     128       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 64, 32)       0         
 )                                                               
                                                        

In [11]:
early_stopping = EarlyStopping(
    monitor='val_loss',     # Watch validation loss
    patience=5,             # If val_loss doesn't improve for 5 epochs, stop
    restore_best_weights=True,  # Restore the best model weights automatically
    verbose=1
)

# Define ModelCheckpoint callback (to save best model during training)
checkpoint = ModelCheckpoint(
    'model/crop_disease_best_model.h5',  # Save the model with the best validation loss
    monitor='val_loss',   # Monitor validation loss
    save_best_only=True,  # Save only the model with the best validation loss
    verbose=1
)

In [12]:
# Now train your model with the generator and callbacks
cnn.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 32,
    epochs=20,
    validation_data=val_generator,
    validation_steps=len(X_val) // 32,
    callbacks=[early_stopping, checkpoint]  # Add both callbacks
)

# Save the final model
cnn.save('model/crop_disease_model.h5')
print("✅ Model saved as crop_disease_model.h5")

Epoch 1/20
Epoch 1: val_loss improved from inf to 3.24849, saving model to model\crop_disease_best_model.h5
Epoch 2/20
Epoch 2: val_loss improved from 3.24849 to 2.04735, saving model to model\crop_disease_best_model.h5
Epoch 3/20
Epoch 3: val_loss improved from 2.04735 to 1.61491, saving model to model\crop_disease_best_model.h5
Epoch 4/20
Epoch 4: val_loss did not improve from 1.61491
Epoch 5/20
Epoch 5: val_loss improved from 1.61491 to 1.57300, saving model to model\crop_disease_best_model.h5
Epoch 6/20
Epoch 6: val_loss did not improve from 1.57300
Epoch 7/20
Epoch 7: val_loss improved from 1.57300 to 1.41738, saving model to model\crop_disease_best_model.h5
Epoch 8/20
Epoch 8: val_loss did not improve from 1.41738
Epoch 9/20
Epoch 9: val_loss did not improve from 1.41738
Epoch 10/20
Epoch 10: val_loss did not improve from 1.41738
Epoch 11/20
Epoch 11: val_loss did not improve from 1.41738
Epoch 12/20
Epoch 12: val_loss improved from 1.41738 to 1.19912, saving model to model\crop_