In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from utils.transformer import TransformerEncoder, PatchClassEmbedding, Patches
from tensorflow.keras.applications.efficientnet import preprocess_input
from utils.visualize import plotHistory, plot_misclassified_images
from utils.tools import CustomSchedule
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import cv2
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import os

In [None]:
# set some paths
model_dir = Path('bin')



In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # Use the first GPU available
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        print("Using GPU:", tf.test.gpu_device_name())
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)


Using GPU: /device:GPU:0


In [None]:

import shutil

# 1.0 Import the Dataset
dataset_path = '/content/drive/MyDrive/Datasets/Coconut Tree Disease Dataset'

# Remove .ipynb_checkpoints directory from the dataset directory
checkpoints_dir = os.path.join(dataset_path, ".ipynb_checkpoints")
if os.path.exists(checkpoints_dir):
    shutil.rmtree(checkpoints_dir)

class_names = os.listdir(dataset_path)
n_images = sum(len(files) for _, _, files in os.walk(dataset_path))

print("Class Names:", class_names)
print("Total Images:", n_images)


Class Names: ['Bud_Root_Dropping', 'Bud_Rot', 'Gray_Leaf_Spot', 'Leaf_Rot', 'Stem_Bleeding']
Total Images: 5807


In [None]:
# 2.0 Prepare the Dataset

# dataset configurations
input_size = (224, 224, 3)
test_size = 0.2

def build_dataset(dataset_path, class_names, n_images):
    """Create a numpy array resizing all images"""
    X = np.empty((n_images, input_size[0], input_size[1], input_size[2]), dtype="float32")
    y = np.empty((n_images), dtype="float32")
    idx = 0
    for i, class_name in enumerate(class_names):
        class_path = os.path.join(dataset_path, class_name)
        for image_name in tqdm(os.listdir(class_path), desc=f"Processing {class_name}"):
            img_path = os.path.join(class_path, image_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, (input_size[1], input_size[0]))
                X[idx] = img
                y[idx] = i  # assigning class index as label
                idx += 1
            except Exception as e:
                print(f"Error processing image {img_path}: {str(e)}")
    return X[:idx], y[:idx]

X, y = build_dataset(dataset_path, class_names, n_images)

Processing Bud_Root_Dropping:   0%|          | 0/514 [00:00<?, ?it/s]

Error processing image /content/drive/MyDrive/Datasets/Coconut Tree Disease Dataset/Bud_Root_Dropping/.ipynb_checkpoints: OpenCV(4.8.0) /io/opencv/modules/imgproc/src/resize.cpp:4062: error: (-215:Assertion failed) !ssize.empty() in function 'resize'



Processing Bud_Rot:   0%|          | 0/470 [00:00<?, ?it/s]

Processing Gray_Leaf_Spot:   0%|          | 0/2135 [00:00<?, ?it/s]

Processing Leaf_Rot:   0%|          | 0/1673 [00:00<?, ?it/s]

Processing Stem_Bleeding:   0%|          | 0/1016 [00:00<?, ?it/s]

In [None]:
## 2.1 Split the dataset

# split with a stratified sampling
(X_train, X_test, y_train, y_test) = train_test_split(X, y,
    test_size=test_size, stratify=y, random_state=42)

## 2.2 Build a pre-process pipeline with keras pre

pre_process_pipeline = tf.keras.Sequential([
        tf.keras.layers.Lambda(lambda x: preprocess_input(x)),
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomContrast(0.4),
        tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.1)])


In [None]:
# 3.0 Build the Hybrid Vision Transformer (ViT)

## 3.1 Backbone

backbone = tf.keras.applications.EfficientNetB3(input_shape=(224,224,3), include_top=False)

backbone.summary()

for layer in backbone.layers:
    layer.trainable = False

Model: "efficientnetb3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 224, 224, 3)          0         ['input_1[0][0]']             
                                                                                                  
 normalization (Normalizati  (None, 224, 224, 3)          7         ['rescaling[0][0]']           
 on)                                                                                              
                                                                                                  
 rescaling_1 (Rescaling)     (None, 224, 224, 3)          0         ['normalization[0

In [None]:
## 3.2 Build the model

# model configurations
d_model = 128
d_ff = d_model * 2
n_heads = 8
mlp_head_size = 256
dropout = 0.1
activation = 'relu'
n_layers = 2
patch_size = 1

def build_vit(backbone, transformer):
    # Input
    inputs = tf.keras.layers.Input(shape=input_size)

    # Data pre_processing Pipeline
    x = pre_process_pipeline(inputs)

    # Feed Backbne
    x = backbone(x)

    backbone_shape = x.shape

    # Patch Creation
    x = Patches(patch_size)(x)

    # Linear Projection of Flattened Patches
    x = tf.keras.layers.Dense(d_model)(x)

    # Position Embedding + Extra learnable class embedding
    n_patches = (backbone_shape[1]//patch_size)**2
    x = PatchClassEmbedding(d_model, n_patches)(x)

    # Transformer Model
    x = transformer(x)

    # Take only the Extra Learnable Class
    x = tf.keras.layers.Lambda(lambda x: x[:,0,:])(x)

    # MLP Head
    x = tf.keras.layers.Dense(mlp_head_size, activation='relu')(x)
    outputs = tf.keras.layers.Dense(len(class_names))(x)  # output neurons equal to number of classes

    return tf.keras.models.Model(inputs, outputs)

transformer = TransformerEncoder(d_model, n_heads, d_ff, dropout,
                                 activation, n_layers)

vit_model = build_vit(backbone, transformer)
vit_model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 224, 224, 3)       0         
                                                                 
 efficientnetb3 (Functional  (None, 7, 7, 1536)        10783535  
 )                                                               
                                                                 
 patches (Patches)           (None, None, 1536)        0         
                                                                 
 dense_12 (Dense)            (None, None, 128)         196736    
                                                                 
 patch_class_embedding (Pat  (None, 50, 128)           6528      
 chClassEmbedding)                                           

In [None]:
# 4.0 Train the Network

# some training configurations
lr = 3e-4
batch_size = 32
epochs = 15

lr = CustomSchedule(d_model, warmup_steps=20000.0)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
vit_model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])

name_model = 'vision_transformer.h5'
checkpointer = tf.keras.callbacks.ModelCheckpoint(
        model_dir.joinpath(name_model),
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True)

history = vit_model.fit(x=X_train,
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_test, y_test),
    callbacks=[checkpointer])

In [None]:
# 5.0 Test the Model

# load best weights
vit_model.load_weights(model_dir.joinpath(name_model))

# Evaluate the model
vit_model.evaluate(X_test, y_test)



In [None]:
plotHistory(history)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get model predictions
y_pred = np.argmax(vit_model.predict(X_test), axis=1)

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred, normalize='true')

# Display confusion matrix
plt.figure(figsize=(30, 10))
ConfusionMatrixDisplay(cm, display_labels=class_names).plot(cmap='Blues', xticks_rotation='vertical')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def visualize_accuracy(model, X_test, y_test, class_names, num_examples_per_row=3, num_rows=5):
    example_count = 0
    fig, axes = plt.subplots(num_rows, num_examples_per_row, figsize=(15, 20))

    for row in range(num_rows):
        for col in range(num_examples_per_row):
            if example_count >= len(X_test):
                break

            image = X_test[example_count]
            true_label = class_names[int(y_test[example_count])]

            # Make predictions
            predictions = model.predict(np.expand_dims(image, axis=0))
            predicted_label = class_names[np.argmax(predictions)]

            # Normalize pixel values to [0, 1]
            image = (image - image.min()) / (image.max() - image.min())

            ax = axes[row, col]
            ax.imshow(image)
            ax.set_title(f"True Label: {true_label}\nPredicted Label: {predicted_label}")
            ax.axis('off')

            example_count += 1

    plt.tight_layout()
    plt.show()

# Visualize accuracy using example images
visualize_accuracy(vit_model, X_test, y_test, class_names, num_examples_per_row=3, num_rows=5)


In [None]:
from sklearn.metrics import classification_report

# Get model predictions
y_pred = np.argmax(vit_model.predict(X_test), axis=1)

# Generate classification report
class_report = classification_report(y_test, y_pred, target_names=class_names)

print("Classification Report:")
print(class_report)
