In [1]:
#Importing Libraries

import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorflow.keras.layers import Rescaling, RandomFlip, RandomRotation, RandomZoom
import os
# from PIL import Image
# import warnings

In [2]:
import os
from PIL import Image

def remove_corrupted_images(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            try:
                img_path = os.path.join(root, file)
                img = Image.open(img_path)
                img.verify()  # Verify the file
            except (IOError, SyntaxError):
                print(f"Removing corrupted image: {img_path}")
                os.remove(img_path)

remove_corrupted_images('train')
remove_corrupted_images('valid')

In [3]:
# import os
# from PIL import Image

def reencode_images(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg')):
                file_path = os.path.join(root, file)
                try:
                    with Image.open(file_path) as img:
                        img = img.convert("RGB")
                        img.save(file_path, "JPEG")
                except Exception as e:
                    print(f"Error re-encoding {file_path}: {e}")

# Run for both training and validation datasets
reencode_images("train")
reencode_images("valid")

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
# import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) #Checking for GPU connectivity

Num GPUs Available:  1


In [4]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
])

In [5]:
#Training Image preprocessing

training_set = tf.keras.utils.image_dataset_from_directory(
    'train',
    labels="inferred",
    label_mode="categorical",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
).map(lambda x, y: (tf.keras.layers.Rescaling(1./255)(x), y))

Found 139119 files belonging to 109 classes.


In [6]:
#Validation Image Preprocessing

validation_set = tf.keras.utils.image_dataset_from_directory(
    'valid',
    labels="inferred",
    label_mode="categorical",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
).map(lambda x, y: (tf.keras.layers.Rescaling(1./255)(x), y))

Found 43834 files belonging to 109 classes.


In [None]:
cnn = tf.keras.models.Sequential([
    # Data Augmentation Layer
    data_augmentation,

    # Block 1
    tf.keras.layers.Conv2D(32, 3, padding='same', input_shape=[128, 128, 3]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Dropout(0.25),

    # Block 2
    tf.keras.layers.Conv2D(64, 3, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Dropout(0.3),

    # Block 3
    tf.keras.layers.Conv2D(128, 3, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Dropout(0.35),

    # Block 4
    tf.keras.layers.Conv2D(256, 3, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Dropout(0.4),

    # Block 5
    tf.keras.layers.Conv2D(512, 3, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Dropout(0.45),

    # Flattening and Fully Connected Layers
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(109, activation='softmax')
])

In [8]:
# cnn.add(tf.keras.layers.Dropout(0.25))

In [9]:
# cnn.add(tf.keras.layers.Flatten())

In [10]:
# cnn.add(tf.keras.layers.Dense(units=512,activation='relu'))

In [11]:
# cnn.add(tf.keras.layers.Dropout(0.4)) #To avoid overfitting

In [12]:
#Output Layer
# cnn.add(tf.keras.layers.Dense(units=109,activation='softmax'))

In [13]:
#Compiling and Training Phase
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, clipnorm=1.0)
cnn.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
# Learning Rate Scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, verbose=1
)

In [15]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True, verbose=1
)

In [16]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs')

In [None]:
cnn.summary()

In [18]:
# #suppress TF warnings
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# #suppress PIL warnings
# warnings.filterwarnings("ignore", category=UserWarning, module="PIL")

# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs')


In [None]:
with tf.device('/GPU:0'):
    training_history = cnn.fit(
        x=training_set,
        validation_data=validation_set,
        epochs=30,
        callbacks=[lr_scheduler, early_stopping, tensorboard_callback]
    )

In [None]:
# cnn = tf.keras.models.load_model('best_model.h5')

In [None]:
#Evaluating Model
#Training set Accuracy
train_loss, train_acc = cnn.evaluate(training_set)
print('Training accuracy:', train_acc)

In [None]:
#Validation set Accuracy
val_loss, val_acc = cnn.evaluate(validation_set)
print('Validation accuracy:', val_acc)


In [None]:
#Saving Model
cnn.save('trained_plant_disease_model.keras')
print(f"Model output shape: {cnn.output_shape}")

In [None]:
training_history.history #Return Dictionary of history


In [None]:
#Recording History in json
import json
with open('training_hist.json', 'w') as f:
    json.dump(training_history.history, f)

In [None]:
print("Training history keys:", training_history.history.keys())

In [None]:
#Accuracy Visualization
epochs = range(1, len(training_history.history['accuracy']) + 1)

plt.figure(figsize=(10, 6))
plt.plot(epochs, training_history.history['accuracy'], color='red', label='Training Accuracy')
plt.plot(epochs, training_history.history['val_accuracy'], color='blue', label='Validation Accuracy')
plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy Visualization')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
#Some other metrics for model evaluation
class_name = validation_set.class_names


In [None]:
test_set = tf.keras.utils.image_dataset_from_directory(
    'valid',
    labels="inferred",
    label_mode="categorical",
    batch_size=1,
    image_size=(128, 128),
    shuffle=False
)

In [None]:
# Generating Predictions
y_pred = cnn.predict(test_set)
predicted_categories = tf.argmax(y_pred, axis=1)

In [None]:
# Extracting True Class Labels
true_categories = tf.concat([y for x, y in test_set], axis=0)
Y_true = tf.argmax(true_categories, axis=1)

In [None]:
# Y_true

In [None]:
# Classification Report and Confusion Matrix
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Precision Recall Fscore
cm = confusion_matrix(Y_true, predicted_categories)
print("Classification Report:")
print(classification_report(Y_true, predicted_categories, target_names=class_name))

In [None]:
# Saving the Classification Report to a File
with open('classification_report.txt', 'w') as f:
    f.write(classification_report(Y_true, predicted_categories, target_names=class_name))

In [None]:
# Confusion Matrix Visualization
plt.figure(figsize=(40, 40))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", annot_kws={"size": 10})
plt.xlabel('Predicted Class', fontsize=20)
plt.ylabel('Actual Class', fontsize=20)
plt.title('Plant Disease Prediction Confusion Matrix', fontsize=25)
plt.savefig('confusion_matrix.png')  # Save the confusion matrix as an image
plt.show()