In [36]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [37]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kushagra3204/wheat-plant-diseases")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/wheat-plant-diseases


In [38]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report

In [39]:
BASE_PATH = '/kaggle/input/wheat-plant-diseases/wheat-plant-diseases/'
DATA_DIR_TRAIN = '/kaggle/input/wheat-plant-diseases/data/train'
DATA_DIR_TEST = '/kaggle/input/wheat-plant-diseases/data/test'
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 30  
NUM_CLASSES = 5

In [40]:
TRAIN_CLASS_FOLDERS = ['Aphid', 'Blast', 'Mildew', 'Smut', 'Tan spot']
TEST_CLASS_FOLDERS = ['aphid_test', 'blast_test', 'mildew_test', 'smut_test', 'tan_spot_test']
REPORT_TARGET_CLASSES = ['Aphid', 'Wheat Blast', 'Powdery Mildew', 'Smut', 'Spot Blotch']

In [41]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,        
    zoom_range=0.2,         
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

In [42]:
print("--- Initializing Data Generators ---")

# TRAINING GENERATOR
train_generator = train_datagen.flow_from_directory(
    DATA_DIR_TRAIN, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
    classes=TRAIN_CLASS_FOLDERS, class_mode='categorical', subset='training', shuffle=True
)

# VALIDATION GENERATOR
validation_generator = train_datagen.flow_from_directory(
    DATA_DIR_TRAIN, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
    classes=TRAIN_CLASS_FOLDERS, class_mode='categorical', subset='validation', shuffle=False
)

# TEST GENERATOR
test_generator = test_datagen.flow_from_directory(
    DATA_DIR_TEST, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
    classes=TEST_CLASS_FOLDERS, class_mode='categorical', shuffle=False
)

--- Initializing Data Generators ---
Found 3770 images belonging to 5 classes.
Found 941 images belonging to 5 classes.
Found 250 images belonging to 5 classes.


In [43]:
# --- MODEL BUILDING (VGG16 FINE TUNING) ---
print("\n--- Building VGG16 Model ---")

base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
)

for layer in base_model.layers:
    layer.trainable = False

for layer in base_model.layers[-4:]:
    layer.trainable = True

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization() (x)  
x = Dropout(0.5)(x)          
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)


--- Building VGG16 Model ---


In [44]:
# 3. Compile Model
model.compile(
    optimizer=Adam(learning_rate=0.00001), 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

In [45]:
optimizer = Adam(learning_rate=0.0001)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

checkpoint = ModelCheckpoint(
    'best_wheat_model.keras',     
    monitor='val_accuracy', 
    save_best_only=True, 
    mode='max', 
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.2,       
    patience=3, 
    min_lr=1e-6,      
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_loss', 
    patience=10,      
    restore_best_weights=True,
    verbose=1
)

In [46]:
print("\n--- Starting Training ---")
start_time = time.time()

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    callbacks=[checkpoint, reduce_lr, early_stop], 
    verbose=1
)

training_time = time.time() - start_time


--- Starting Training ---


  self._warn_if_super_not_called()


Epoch 1/30
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568ms/step - accuracy: 0.5064 - loss: 1.3636
Epoch 1: val_accuracy improved from -inf to 0.51616, saving model to best_wheat_model.keras
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 731ms/step - accuracy: 0.5071 - loss: 1.3617 - val_accuracy: 0.5162 - val_loss: 1.2235 - learning_rate: 1.0000e-04
Epoch 2/30
[1m  1/117[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 200ms/step - accuracy: 0.6875 - loss: 1.0073




Epoch 2: val_accuracy did not improve from 0.51616
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 143ms/step - accuracy: 0.6875 - loss: 1.0073 - val_accuracy: 0.5119 - val_loss: 1.2097 - learning_rate: 1.0000e-04
Epoch 3/30
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 558ms/step - accuracy: 0.7260 - loss: 0.7724
Epoch 3: val_accuracy improved from 0.51616 to 0.59806, saving model to best_wheat_model.keras
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 707ms/step - accuracy: 0.7260 - loss: 0.7724 - val_accuracy: 0.5981 - val_loss: 0.9747 - learning_rate: 1.0000e-04
Epoch 4/30
[1m  1/117[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 204ms/step - accuracy: 0.8125 - loss: 0.5450
Epoch 4: val_accuracy improved from 0.59806 to 0.60129, saving model to best_wheat_model.keras
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 152ms/step - accuracy: 0.8125 - loss: 0.5450 - val_accuracy: 0.6013 - val_loss: 0.9745 - le

In [47]:
#EVALUATION 
print(f"\n--- FINAL EVALUATION ---")

loss, accuracy = model.evaluate(test_generator, steps=test_generator.n // BATCH_SIZE + 1)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Training Time: {training_time:.2f} seconds")


--- FINAL EVALUATION ---
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 322ms/step - accuracy: 0.8779 - loss: 0.3945
Test Accuracy: 0.9040
Training Time: 1509.59 seconds


In [48]:
# Classification Report
test_generator.reset()
Y_pred = model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)
y_true = test_generator.classes

report = classification_report(
    y_true, 
    y_pred, 
    target_names=REPORT_TARGET_CLASSES, 
    zero_division=0,
    output_dict=True 
)

print("\n--- CLASSIFICATION REPORT ---")
print(classification_report(y_true, y_pred, target_names=REPORT_TARGET_CLASSES, zero_division=0))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 288ms/step

--- CLASSIFICATION REPORT ---
                precision    recall  f1-score   support

         Aphid       0.93      0.82      0.87        50
   Wheat Blast       0.94      0.96      0.95        50
Powdery Mildew       0.91      0.86      0.89        50
          Smut       0.98      1.00      0.99        50
   Spot Blotch       0.77      0.88      0.82        50

      accuracy                           0.90       250
     macro avg       0.91      0.90      0.90       250
  weighted avg       0.91      0.90      0.90       250



In [49]:
model_name = "VGG19"

metrics_summary = {
    'Model': model_name,
    'Accuracy': report['accuracy'],
    'Precision': report['weighted avg']['precision'], 
    'Recall': report['weighted avg']['recall'],
    'F1-Score': report['weighted avg']['f1-score'],
    'Training Time (s)': training_time,
}

df_metrics = pd.DataFrame([metrics_summary])

In [50]:
results_file = 'model_performance_summary.csv'
if not os.path.exists(results_file):
    df_metrics.to_csv(results_file, index=False)
else:
    df_metrics.to_csv(results_file, mode='a', header=False, index=False)
    
print(f"\nMetrics saved to {results_file}")


Metrics saved to model_performance_summary.csv
