In [2]:
!pip install -U 'tensorflow[and-cuda]'

Collecting tensorflow[and-cuda]
  Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting protobuf>=5.28.0 (from tensorflow[and-cuda])
  Downloading protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting tensorboard~=2.20.0 (from tensorflow[and-cuda])
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting keras>=3.10.0 (from tensorflow[and-cuda])
  Downloading keras-3.12.0-py3-none-any.whl.metadata (5.9 kB)
Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow[and-cuda])
  Downloading ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.9 kB)
Collecting nvidia-nccl-cu12<3.0,>=2.25.1 (from tensorflow[and-cuda])
  Downloading nvidia_nccl_cu12-2.28.9-py3-none-manylinux_2_18_x86_64.whl.metadata (2.0 kB)
Downloading keras-3.12.0-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m29.9 MB/s[

In [3]:
# Import Required Libraries
import tensorflow as tf
from tensorflow.keras.applications import VGG16, VGG19, InceptionV3, Xception, ResNet50, DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
import os
import numpy as np

from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/new-plant-diseases-dataset


In [5]:
train = '/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train'
valid = '/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid'

In [13]:
import tensorflow as tf

# Load and Preprocess Dataset
image_size = (128,128)
batch_size = 32

# Training dataset with augmentation
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train,
    validation_split=0.2,
    subset='training',
    seed=123,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=True
)

# Validation dataset (no augmentation needed)
val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    valid,
    validation_split=0.2,
    subset='validation',
    seed=123,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical'
)

# SAVE class_names BEFORE applying transformations
class_names = train_dataset.class_names
num_classes = len(class_names)

# Data augmentation layer
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomZoom(0.2)
])

# Apply augmentation and prefetching to training data
train_dataset = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

# Apply normalization and prefetching to validation data
normalization = tf.keras.layers.Rescaling(1./255)
val_dataset = val_dataset.map(
    lambda x, y: (normalization(x), y),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

Found 70295 files belonging to 38 classes.
Using 56236 files for training.
Found 17572 files belonging to 38 classes.
Using 3514 files for validation.


In [15]:
# Define Models
def create_model(base_model):
    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize models
models = {
    'VGG16': create_model(VGG16(weights='imagenet', include_top=False, input_shape=image_size + (3,))),
    'VGG19': create_model(VGG19(weights='imagenet', include_top=False, input_shape=image_size + (3,))),
    'InceptionV3': create_model(InceptionV3(weights='imagenet', include_top=False, input_shape=image_size + (3,))),
    'Xception': create_model(Xception(weights='imagenet', include_top=False, input_shape=image_size + (3,))),
    'ResNet50': create_model(ResNet50(weights='imagenet', include_top=False, input_shape=image_size + (3,))),
    'DenseNet121': create_model(DenseNet121(weights='imagenet', include_top=False, input_shape=image_size + (3,)))
}

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
import numpy as np

# Train Models
history = {}
epochs = 10

for model_name, model in models.items():
    print(f"Training {model_name}...")
    history[model_name] = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=epochs
    )
    model.save(f"{model_name}_plant_disease_model.h5")
    print(f"{model_name} training completed and model saved.")
    
    # Get predictions
    y_pred = model.predict(val_dataset)
    
    # Extract true labels from the dataset
    y_true = []
    for image_batch, label_batch in val_dataset:
        y_true.append(label_batch)
    
    # Concatenate all batches into a single array
    y_true = tf.concat(y_true, axis=0).numpy()
    
    # Save predictions with the model name
    np.save(f"{model_name}_y_pred.npy", y_pred)
    np.save(f"{model_name}_y_true.npy", y_true)
    print(f'{model_name} predictions saved')


Training VGG16...
Epoch 1/10
[1m   5/1758[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:06[0m 38ms/step - accuracy: 0.0254 - loss: 3.7533    

I0000 00:00:1764685674.898314     139 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 74ms/step - accuracy: 0.4615 - loss: 2.4577 - val_accuracy: 0.6357 - val_loss: 1.6177
Epoch 2/10
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 45ms/step - accuracy: 0.6774 - loss: 1.3318 - val_accuracy: 0.7057 - val_loss: 1.0996
Epoch 3/10
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 46ms/step - accuracy: 0.7331 - loss: 1.0025 - val_accuracy: 0.7439 - val_loss: 0.8972
Epoch 4/10
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 45ms/step - accuracy: 0.7622 - loss: 0.8494 - val_accuracy: 0.7752 - val_loss: 0.7825
Epoch 5/10
[1m1758/1758[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 45ms/step - accuracy: 0.7831 - loss: 0.7540 - val_accuracy: 0.7894 - val_loss: 0.7068
Epoch 6/10
[1m 229/1758[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1:05[0m 43ms/step - accuracy: 0.7903 - loss: 0.7345

In [None]:
# Generate Model Comparison Table
import numpy as np
import pandas as pd

models = ["VGG16", "VGG19", "InceptionV3", "Xception", "ResNet50", "DenseNet121"]
results = []

for name in models:
    y_pred = np.load(f"{name}_y_pred.npy")
    y_true = np.load(f"{name}_y_true.npy")
    
    # Convert predictions and true labels from one-hot to class indices
    y_pred_labels = y_pred.argmax(axis=1)
    y_true_labels = y_true.argmax(axis=1)  # Add this line!
    
    accuracy = np.mean(y_pred_labels == y_true_labels)
    results.append({
        'Model': name,
        'Accuracy': accuracy
    })

comparison_df = pd.DataFrame(results)
print(comparison_df)

# Generate Classification Reports
from sklearn.metrics import classification_report

for name in models:
    y_pred = np.load(f"{name}_y_pred.npy")
    y_true = np.load(f"{name}_y_true.npy")
    
    # Convert both predictions and true labels from one-hot to class indices
    y_pred_labels = y_pred.argmax(axis=1)
    y_true_labels = y_true.argmax(axis=1)  # Add this line!
    
    print(f"\n===== {name} =====")
    print(classification_report(y_true_labels, y_pred_labels, target_names=class_names))
