In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np
import os
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.utils.class_weight import compute_class_weight
import glob

In [3]:
# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

In [4]:
#Define the paths
base_dir = '/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)'
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
test_dir = '/kaggle/input/new-plant-diseases-dataset/test/test'

In [5]:
print("Base directory exists:", os.path.exists(base_dir))
print("Train directory exists:", os.path.exists(train_dir))
print("Validation directory exists:", os.path.exists(valid_dir))
print("Test directory exists:", os.path.exists(test_dir))

Base directory exists: False
Train directory exists: False
Validation directory exists: False
Test directory exists: False


In [7]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [8]:
# Verify and clean images
def verify_images(directory):
    valid_extensions = ('.jpg', '.jpeg', '.png')
    corrupted = 0
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(valid_extensions):
                try:
                    img = Image.open(os.path.join(root, file))
                    img.verify()
                except (IOError, SyntaxError) as e:
                    print(f"Corrupted image removed: {file}")
                    os.remove(os.path.join(root, file))
                    corrupted += 1
    print(f"Removed {corrupted} corrupted images from {directory}")

In [None]:
verify_images(train_dir)

In [28]:
verify_images(valid_dir)

Removed 0 corrupted images from /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid


In [30]:
# Get class names and check for imbalance
class_names = sorted(os.listdir(train_dir))
NUM_CLASSES = len(class_names)
print(f"Found {NUM_CLASSES} classes: {class_names}")

Found 38 classes: ['Apple___Apple_scab', 'Apple___Black_rot', 'Apple___Cedar_apple_rust', 'Apple___healthy', 'Blueberry___healthy', 'Cherry_(including_sour)___Powdery_mildew', 'Cherry_(including_sour)___healthy', 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot', 'Corn_(maize)___Common_rust_', 'Corn_(maize)___Northern_Leaf_Blight', 'Corn_(maize)___healthy', 'Grape___Black_rot', 'Grape___Esca_(Black_Measles)', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Grape___healthy', 'Orange___Haunglongbing_(Citrus_greening)', 'Peach___Bacterial_spot', 'Peach___healthy', 'Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Squash___Powdery_mildew', 'Strawberry___Leaf_scorch', 'Strawberry___healthy', 'Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', '

In [31]:
# Compute class weights for imbalanced dataset
train_labels = []
for class_name in class_names:
    class_path = os.path.join(train_dir, class_name)
    train_labels.extend([class_name] * len(glob.glob(os.path.join(class_path, '*.*'))))
class_weights = compute_class_weight('balanced', classes=class_names, y=train_labels)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}
print(f"Class weights: {class_weight_dict}")

Class weights: {0: 0.917593462823726, 1: 0.9309856170370566, 2: 1.0510616028708133, 3: 0.9212492136716293, 4: 1.0186500115928587, 5: 1.0991493886230728, 6: 1.013071424453796, 7: 1.1265946535034297, 8: 0.9700411227334198, 9: 0.9695327154363897, 10: 0.9950879080433737, 11: 0.9798031891168599, 12: 0.9634731359649122, 13: 1.074255761354606, 14: 1.0933028493218864, 15: 0.9203325477873789, 16: 1.006457247580322, 17: 1.0705257066276803, 18: 0.9669986518832365, 19: 0.9305173144127925, 20: 0.9540321923943432, 21: 0.9540321923943432, 22: 1.0141822483841183, 23: 1.038668400366441, 24: 0.9148706335571868, 25: 1.065592408440456, 26: 1.0427668664332759, 27: 1.0141822483841183, 28: 1.0868792133094192, 29: 0.9634731359649122, 30: 0.9993886661548523, 31: 0.9829268974774875, 32: 1.0600965163625395, 33: 1.0625321200761813, 34: 1.012516924495146, 35: 0.9433291285327035, 36: 1.0334460452807999, 37: 0.9604716620210964}


In [34]:
# Data augmentation pipeline
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=preprocess_input,  # EfficientNet-specific preprocessing
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=preprocess_input
)

In [35]:
# Load data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.


In [36]:
# Optimize data pipeline with tf.data
def create_dataset(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, *IMG_SIZE, 3], [None, NUM_CLASSES])
    )
    return dataset.cache().prefetch(tf.data.AUTOTUNE)

In [37]:
train_dataset = create_dataset(train_generator)
valid_dataset = create_dataset(valid_generator)

I0000 00:00:1749789253.986627      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1749789253.987297      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [38]:
# Build model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))
base_model.trainable = False  # Freeze base model initially

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [39]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

In [40]:
model = Model(inputs=base_model.input, outputs=predictions)

In [41]:
# Compile model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [42]:
# Define callbacks
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

In [None]:
# Train model (Phase 1: Transfer learning)
history = model.fit(
    train_dataset,
    epochs=1,
    validation_data=valid_dataset,
    callbacks=[checkpoint, early_stop, reduce_lr],
    class_weight=class_weight_dict
)

     83/Unknown [1m47s[0m 565ms/step - accuracy: 0.8987 - loss: 0.3209 - precision: 0.9266 - recall: 0.8681

In [None]:
# Fine-tuning
base_model.trainable = True
for layer in base_model.layers[:100]:
    layer.trainable = False

In [None]:
model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [None]:
# Train model (Phase 2: Fine-tuning)
history_fine = model.fit(
    train_dataset,
    epochs=10,
    validation_data=valid_dataset,
    callbacks=[checkpoint, early_stop, reduce_lr],
    class_weight=class_weight_dict
)

In [None]:
model.load_weights('best_model.keras')
eval_metrics = model.evaluate(valid_dataset, return_dict=True)
print(f"Validation Metrics: {eval_metrics}")

In [None]:
# Plot training history
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'] + history_fine.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'] + history_fine.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'] + history_fine.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'] + history_fine.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Save final model
model.save('plant_disease_model_final.keras')

In [None]:
# Prediction function
def predict_disease(image_path):
    try:
        img = tf.keras.preprocessing.image.load_img(image_path, target_size=IMG_SIZE)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = preprocess_input(img_array)  # Apply EfficientNet preprocessing
        img_array = np.expand_dims(img_array, axis=0)
        prediction = model.predict(img_array)
        class_idx = np.argmax(prediction[0])
        class_labels = list(train_generator.class_indices.keys())
        confidence = prediction[0][class_idx]
        return class_labels[class_idx], confidence
    except Exception as e:
        print(f"Prediction error: {e}")
        return None, 0.0

In [None]:
sample_image = 'path_to_test_image.jpg'
disease, confidence = predict_disease(sample_image)
if disease:
    print(f"Predicted disease: {disease} with confidence {confidence:.4f}")
