# Training Transfer Learning Model Template

## Details about implementation

This model will provide a layout to implement a transfer learning model.

### Importing Libraries

In [None]:
import os
import math
import json
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

2025-01-28 06:02:27.785234: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738044147.966431   20444 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738044148.011604   20444 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Conv2D,
    MaxPool2D,
    Flatten,
    Dense,
    Dropout,
    GlobalAveragePooling2D,
    BatchNormalization,
    Activation,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import image_dataset_from_directory, load_img, img_to_array
from tensorflow.keras.applications import (
    InceptionV3,
    EfficientNetV2B3,
    Xception,
    MobileNetV2,
    MobileNetV3Large,
)
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [None]:
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay
)
from sklearn.utils.class_weight import compute_class_weight
from datetime import datetime

In [None]:
# Global Variables
MODEL_NAME = "tl_model_template"

NUM_CLASSES = 38
IMAGE_SIZE = 224
BATCH_SIZE = 32

now = datetime.now()

### Importing Dataset

In [None]:
training_set = image_dataset_from_directory()
validation_set = image_dataset_from_directory()
test_set = image_dataset_from_directory()
normalized_training_set = training_set
normalized_validation_set = validation_set
normalized_test_set = test_set
augmented_training_set = normalized_training_set

### Training Model

#### Building Model

In [None]:
# Load InceptionV3 model pre-trained on ImageNet without the top (classification) layer
base_model = InceptionV3(
    weights="imagenet", include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
)

# Freeze the base model (don't update weights during training)
base_model.trainable = False

# # fine-tuning
base_model.trainable = True
for layer in base_model.layers[:-20]:  # Freeze all layers except the last 20
    layer.trainable = False

In [None]:
# Add custom layers on top of the base model
x = GlobalAveragePooling2D()(base_model.output)  # Reduce spatial dimensions
x = BatchNormalization()(x)  # Normalize features to improve training stability
x = Dense(512)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dropout(0.4)(x)  # Dropout for regularization
x = Dense(256, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.01))(
    x
)  # Add a smaller dense layer for hierarchical learning
x = Dropout(0.3)(x)  # Another dropout layer with lower rate
predictions = Dense(NUM_CLASSES, activation="softmax")(x)  # Output layer

In [None]:
# Define the complete model
model = Model(inputs=base_model.input, outputs=predictions)

#### Setting Up Callbacks for Early Stopping and Model Checkpointing

In [None]:
# Define the callbacks
checkpoint = ModelCheckpoint(
    filepath=f"../models/checkpoints/{MODEL_NAME}_best_weights_{now.strftime("%Y_%m_%d_%I_%M_%S_%p")}.keras",
    monitor="val_accuracy",
    verbose=1,
    save_best_only=True,
    mode="max",
)

early_stopping = EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=5,
    verbose=1,
    mode="min",
    restore_best_weights=True,
)

lr_scheduler = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=2,
    verbose=1,
    min_lr=1e-6,
)

callbacks_list = [checkpoint, early_stopping, lr_scheduler]

#### Compiling Model

In [None]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

#### Training Model

In [None]:
# Get the number of samples in the training and validation datasets
train_samples = len(training_set) 
validation_samples = len(
    validation_set
)

# Calculate steps per epoch and validation steps
steps_per_epoch = (train_samples + (BATCH_SIZE - 1)) // BATCH_SIZE
validation_steps = (validation_samples + (BATCH_SIZE - 1)) // BATCH_SIZE

print("steps_per_epoch:", steps_per_epoch)
print("validation_steps:", validation_steps)

# Compute class weights to balance the dataset
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(validation_set.class_names),
    y=validation_set.class_names,
)
class_weights_dict = dict(enumerate(class_weights))

print("Class weights:", class_weights_dict)

steps_per_epoch: 69
validation_steps: 18


In [None]:
# Train the model
training_history = model.fit(
    augmented_training_set,
    epochs=30,
    validation_data=normalized_validation_set,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    class_weight=class_weights_dict,
    callbacks=callbacks_list,
    verbose=1,
)

Epoch 1/30


I0000 00:00:1738044324.744625   20632 service.cc:148] XLA service 0xe5f2d90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1738044324.745422   20632 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1738044327.201833   20632 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/69[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20:55[0m 18s/step - accuracy: 0.0312 - loss: 7.5459

I0000 00:00:1738044337.339537   20632 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.0543 - loss: 7.3627
Epoch 1: val_accuracy improved from -inf to 0.21354, saving model to checkpoints/new_model_best_weights.keras
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 253ms/step - accuracy: 0.0547 - loss: 7.3592 - val_accuracy: 0.2135 - val_loss: 6.5993 - learning_rate: 1.0000e-04
Epoch 2/30
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.1583 - loss: 6.4739
Epoch 2: val_accuracy improved from 0.21354 to 0.39236, saving model to checkpoints/new_model_best_weights.keras
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 173ms/step - accuracy: 0.1587 - loss: 6.4718 - val_accuracy: 0.3924 - val_loss: 6.0680 - learning_rate: 1.0000e-04
Epoch 3/30
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - accuracy: 0.2795 - loss: 5.9267
Epoch 3: val_accuracy improved from 0.39236 to 0.49479, saving model

#### Evaluating Model

In [None]:
# Training set Accuracy
train_loss, train_acc = model.evaluate(augmented_training_set)
print("Training accuracy:", train_acc)

[1m2197/2197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 179ms/step - accuracy: 0.8342 - loss: 2.0235
Training accuracy: 0.8354790806770325


In [None]:
# Validation set Accuracy
val_loss, val_acc = model.evaluate(normalized_validation_set)
print("Validation accuracy:", val_acc)

[1m550/550[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 52ms/step - accuracy: 0.8001 - loss: 2.1428
Validation accuracy: 0.7982586026191711


#### Saving Model

In [None]:
model.save(f'../models/{MODEL_NAME}_{now.strftime("%Y_%m_%d_%I_%M_%S_%p")}.keras')

In [None]:
# Save the training history
with open(
    f"training_histories/training_history_{MODEL_NAME}_{now.strftime("%Y_%m_%d_%I_%M_%S_%p")}.json",
    "w",
) as f:
    json.dump(training_history.history, f)

#### Model Details

##### Model Architecture

In [None]:
model.summary()

##### Model Performance Metrics

In [None]:
# Get true labels
y_true = np.concatenate([y.numpy() for _, y in test_set], axis=0)

if y_true.ndim > 1:  # If it's one-hot encoded
    y_true = np.argmax(y_true, axis=1)

print(f"y_true shape: {y_true.shape}")

In [None]:
# Predict labels using the trained model
y_pred = model.predict(normalized_test_set)

if y_pred.ndim > 1:  # If it's one-hot encoded or probabilities
    y_pred = np.argmax(y_pred, axis=1)

print(f"y_pred shape: {y_pred.shape}")

In [None]:
# Generate the classification report
report = classification_report(y_true, y_pred, target_names=test_set.class_names)

print("Classification Report:")
print(report)

##### Model Confusion Matrix

In [None]:
def plot_confusion_matrix_heatmap(model, test_set, class_names):
    """
    Plots the confusion matrix as a heatmap for a given model and validation dataset.
    Uses human-readable class names for display.

    Parameters:
        model: Trained model.
        test_set: Test dataset (normalized).
        class_names: List of class names.
    """
    # Get true labels and predictions
    true_labels = np.concatenate([y for x, y in test_set], axis=0)
    predicted_probs = model.predict(test_set)

    # If true_labels are one-hot encoded, convert them to class indices
    if true_labels.ndim > 1:  # Check if one-hot encoded
        true_labels = np.argmax(true_labels, axis=1)

    # Convert predicted probabilities to class indices
    predicted_labels = np.argmax(predicted_probs, axis=1)

    # Compute confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)

    # Plot confusion matrix as a heatmap
    plt.figure(figsize=(40, 40))
    sns.heatmap(
        cm,
        annot=True,
        annot_kws={"size": 10},
        cmap="magma",
        xticklabels=class_names,
        yticklabels=class_names,
    )

    plt.xlabel("Predicted Class", fontsize=20)
    plt.ylabel("Actual Class", fontsize=20)
    plt.title("Plant Disease Prediction Confusion Matrix", fontsize=25)
    plt.show()

In [None]:
plot_confusion_matrix_heatmap(model, normalized_test_set, test_set.class_names)

##### Train | Vaild Accuracy & Loss graph

In [None]:
def plot_training_history(history):
    """
    Plots training and validation accuracy and loss graphs.

    Parameters:
        history: The History object returned by model.fit().
    """
    # Extract metrics
    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    epochs = range(1, len(acc) + 1)

    # Plot accuracy
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, label="Training Accuracy")
    plt.plot(epochs, val_acc, label="Validation Accuracy")
    plt.title("Training and Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, label="Training Loss")
    plt.plot(epochs, val_loss, label="Validation Loss")
    plt.title("Training and Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
# Plot the graphs
plot_training_history(training_history)

## Conclusion