<a href="https://colab.research.google.com/github/sivm205/plant-leaf-disease-detection-and-classification/blob/master/soya_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.utils import to_categorical

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
BATCH_SIZE = 32
IMAGE_SIZE = 224
CHANNELS=3
EPOCHS=50

In [None]:
import zipfile

# name of the zip file to extract
zip_file_name = "example.zip"

# open the zip file for reading
with zipfile.ZipFile("/content/drive/MyDrive/soybean.leaf.dataset.zip", 'r') as zip_ref:
    # extract all files to a specified directory
    zip_ref.extractall("/content/plant/")


In [None]:
path = "/content/plant"
dataset = tf.keras.preprocessing.image_dataset_from_directory(path,
    seed=123,
    shuffle=True,
    image_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

In [None]:
classes= dataset.class_names
classes

In [None]:
for image_batch, labels_batch in dataset.take(1): #only for one batch
    print(image_batch.shape)
    print(labels_batch.numpy()) #representing single batch images in the form of array
    #each value represent a single image from the dataset

In [None]:
labels_batch

### visualize some of the sample images from the datset

In [None]:
plt.figure(figsize=(15,15))
for image_batch, labels_batch in dataset.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels_batch[i]])
        plt.axis("off")
plt.show()

In [None]:
len(dataset) #number of total btach file, in single batch there will be 32 image sample 

In [None]:
train_size = 0.8
len(dataset)*train_size #80 percent of the data i will use for training

In [None]:
train_ds = dataset.take(524) #total 524 out of 655 batch set used as a training set 
len(train_ds)

In [None]:
test_ds = dataset.skip(524) #and the rest will used as validation and testing set
#it will start counting after the given number
len(test_ds)

In [None]:
val_size=0.1
len(dataset)*val_size  #10 percent will used as validation set from test dataset

In [None]:
val_ds = test_ds.take(65)
len(val_ds)

In [None]:
test_ds = test_ds.skip(65)
len(test_ds)

In [None]:
def get_dataset_partitions_tf(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    ds_size = len(ds)
    
    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds

In [None]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)


In [None]:
len(train_ds)


In [None]:
len(val_ds)


In [None]:
len(test_ds)


In [None]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
'''
overall this method are used to optimize the processing of each batch dataset
with the goal of reducing the overall processing time'''


In [None]:
resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255),
])

In [None]:
data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])

In [None]:
train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y)
).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
#base model
input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 9

model = models.Sequential([
    resize_and_rescale,
    layers.Conv2D(32, kernel_size = (3,3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

model.build(input_shape=input_shape)

In [None]:
#tuned hyperparameter

from tensorflow.keras import models, layers, optimizers, callbacks

# Define hyperparameters
BATCH_SIZE = 32
IMAGE_SIZE = 224
CHANNELS = 3
n_classes = 9
epochs = 50
initial_learning_rate = 0.001
dropout_rate = 0.5
weight_decay = 0.0001
patience = 5
factor = 0.5

# Define preprocessing layers
resize_and_rescale = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
    layers.experimental.preprocessing.Rescaling(1./255)
])

# Define the model architecture with regularization
model = models.Sequential([
    resize_and_rescale,
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), input_shape=(IMAGE_SIZE, IMAGE_SIZE, CHANNELS)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.Dropout(dropout_rate),
    layers.Dense(n_classes, activation='softmax')
])

# Compile the model with learning rate scheduling
opt = optimizers.Adam(learning_rate=initial_learning_rate)
lr_schedule = callbacks.ReduceLROnPlateau(factor=factor, patience=patience)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = callbacks.EarlyStopping(patience=patience, restore_best_weights=True)

# Train the model with callbacks
history = model.fit(train_ds,
                    batch_size=BATCH_SIZE,
                    validation_data=val_ds,
                    epochs=epochs,
                    callbacks=[early_stopping, lr_schedule],
                    verbose=1)


In [None]:

#using transfer learning
from tensorflow.keras import models, layers, optimizers, callbacks
import tensorflow_hub as hub

# Define hyperparameters
BATCH_SIZE = 32
IMAGE_SIZE = 224
CHANNELS = 3
n_classes = 9
epochs = 50
initial_learning_rate = 0.001
dropout_rate = 0.5
weight_decay = 0.0001
patience = 5
factor = 0.5

# Define preprocessing layers
resize_and_rescale = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
    layers.experimental.preprocessing.Rescaling(1./255)
])

# Load the pre-trained model
base_model = hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/5",
                            trainable=False)

# Add new classification layers on top
model = models.Sequential([
    resize_and_rescale,
    base_model,
    layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay)),
    layers.Dropout(dropout_rate),
    layers.Dense(n_classes, activation='softmax')
])

# Compile the model with learning rate scheduling
opt = optimizers.Adam(learning_rate=initial_learning_rate)
lr_schedule = callbacks.ReduceLROnPlateau(factor=factor, patience=patience)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = callbacks.EarlyStopping(patience=patience, restore_best_weights=True)

# Train the model with callbacks
history = model.fit(train_ds,
                    batch_size=BATCH_SIZE,
                    validation_data=val_ds,
                    epochs=epochs,
                    callbacks=[early_stopping, lr_schedule],
                    verbose=1)


In [None]:
import pickle
# Save the history object using pickle
with open('history.pkl', 'wb') as file:
    pickle.dump(history.history, file)


In [None]:
model.summary()

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_ds,
    batch_size=BATCH_SIZE,
    validation_data=val_ds,
    verbose=1,
    epochs=50,
)

In [None]:
scores = model.evaluate(test_ds)


In [None]:
#testing on a test set

import numpy as np
import tensorflow as tf
import random

np.random.seed(12)
random.seed(12)
tf.random.set_seed(12)

# Load the trained model
#model = load_model('D:/Programs/python/Data_Science/Leaf detection/research implementation/Soyabean_Model.h5')

# Evaluate the model
score = model.evaluate_generator(test_ds, steps=len(test_ds))
print("Test accuracy: {:.2f}%".format(score[1]*100))
print("Test loss: {:.4f}".format(score[0]))



In [None]:
scores

In [None]:
history

In [None]:
history.params

In [None]:
history.history.keys()

In [None]:
type(history.history['loss'])

In [None]:
len(history.history['loss'])


In [None]:
history.history['loss'][:5] # show loss for first 5 epochs


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(28), acc, label='Training Accuracy')
plt.plot(range(28), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(28), loss, label='Training Loss')
plt.plot(range(28), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
import numpy as np
for images_batch, labels_batch in test_ds.take(1):
    
    first_image = images_batch[0].numpy().astype('uint8')
    first_label = labels_batch[0].numpy()
    
    print("first image to predict")
    plt.imshow(first_image)
    print("actual label:",class_names[first_label])
    
    batch_prediction = model.predict(images_batch)
    print("predicted label:",class_names[np.argmax(batch_prediction[0])])

In [None]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]] 
        
        plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
        
        plt.axis("off")

In [None]:
model.save("Soyabean_Model3.h5")

In [None]:
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
y_true = list(test_ds.map(lambda x, y: y).unbatch().as_numpy_iterator())

# Make predictions on the test set
y_pred = model.predict(test_ds)

# Convert predicted probabilities to predicted class labels
y_pred = np.argmax(y_pred, axis=1)

# Calculate classification report and confusion matrix
print(classification_report(y_true, y_pred))
print(confusion_matrix(y_true, y_pred))

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Define the predicted and true labels for the test set
y_pred = model.predict(test_ds)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.concatenate([y for x, y in test_ds], axis=0)

# Create the confusion matrix
conf_mat = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix using Seaborn library
plt.figure(figsize=(10, 8))
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()
