### **SVM**


In [None]:
class SVMInitiate():
    """
      Encapsulate entire process of SVM pipeline, including data preprocessing,
      feature extraction, hyper parameter tuing, training, and evaluation

    """

    def __init__(self, params={}, input_size=(64,64), max_iter=-1, augmentation=False, balancing=False, filepath=None):
        """
        Params:
          - params: Dictionary containing parameters to be optimized using grid search
          - input_size: Tuple indicating the desired input image size for resizing
          - max_iter: Maximum number of iterations for SVM training
          - augmentation: Boolean indicating whether to apply data augmentation
          - balancing: Boolean indicating whether to apply class balancing
          - filepath: String indicating the model filename to be saved
        """

        self.params = params
        self.input_size = input_size
        self.augmentation = augmentation
        self.balancing = balancing
        self.max_iter = max_iter
        self.filepath = filepath


    def _get_data(self, path):
        """
        Import data

        Params:
          - path: String indicating the path from which to import the data (train, test)

        Returns:
          - List containing the values, and labels
        """

        return _import_data(path)


    def _preprocess(self, X, y, equalizehist=False, augmentation=False, balancing=False):
        """
        Preprocess input data

        Params:
          - X: Input data
          - y: Labels corresponding to input data
          - equalizehist: Boolean indicating whether to apply histogram equalization
          - augmentation: Boolean indicating whether to apply data augmentation
          - balancing: Boolean indicating whether to apply class balancing

        Returns:
          - Array of preprocessed input data and labels
        """

        # Resize input data
        X = _resize(X, self.input_size)

        # Convert to array
        X, y = _convert_to_array(X, y)

        # Apply equilized histogram, if specify
        if equalizehist:
            X = _equalizehist(X)

        # Apply balaning, if specify
        if balancing:

            # Downsampling majority class 1
            X_train_filter = X[y == 1]
            y_data_filter = y[y == 1]

            X_train_filter, y_data_filter = resample(X_train_filter, y_data_filter, replace=False, n_samples=500, random_state=42)

            # Upsampling all the other classes
            X_train_other = X[y != 1]
            y_train_other = y[y != 1]

            X_train_combined = np.concatenate([X_train_filter, X_train_other])
            y_train_combined = np.concatenate([y_data_filter, y_train_other])

            X, y = self.balancing(X_train_combined, y_train_combined)

            # Shuffle input data and label, preventing bias
            X, y = _shuffle(X, y)

        # Apply augmentation, if specify
        if augmentation:
              X, y = _gen_augmented_data(X, y)
              X, y = _shuffle(X, y)

        return X, y


    def _get_features(self):
        """
        Extract features

        Returns:
          - None
        """

        pass

    def _main(self):
        """
        Encapsulate the training process, from importing data to evaluation on train and validation set

        Returns:
          - None
        """

        # Use current timestamp as model filename
        model_fn = _get_timestamp()

        # Import input data
        self.X_train, self.X_validate, self.y_train, self.y_validate = _get_data('train', split_size=0.2)

        # Preprocess train data
        self.X_train, self.y_train = self._preprocess(self.X_train, self.y_train, equalizehist=True, augmentation=self.augmentation, balancing=self.balancing)

        # Feature extraction using either SIFT or HOG
        self.X_train, self.y_train = self._get_features(self.X_train, self.y_train, types='train', fn=model_fn)

        self.filepath = model_fn

        # Preprocess validation data
        self.X_validate, self.y_validate = self._preprocess(self.X_validate, self.y_validate)

        # Feature extraction using either SIFT or HOG
        self.X_validate, self.y_validate = self._get_features(self.X_validate, self.y_validate, types='test')

        # Initilize SVM model
        classifier = svm.SVC(class_weight='balanced', random_state=42, tol=1e-3, max_iter=self.max_iter)

        # Perform parameter tuning using grid search cross validation
        grid_search = _gridsearchcv(classifier, self.X_train, self.y_train, self.params)

        # Get best model
        best_classifier = grid_search.best_estimator_

        # Save best model to drive
        print(f"Model saved to {model_fn}")
        self._save_model(best_classifier, model_fn)

        # Evaluate on train set
        y_pred = best_classifier.predict(self.X_train)
        _gen_performance_metrics(self.y_train, y_pred)

        # Evaluate on validation set
        y_pred = best_classifier.predict(self.X_validate)
        _gen_performance_metrics(self.y_validate, y_pred)


    def _evaluate_frame(self, X, y, model):
        """
        Evaluate subset of test images, including preprocessing, feature extraction, and model prediction

        Params:
          - X: Input data
          - y: True label corresponding to the input data
          - model: Trained model for prediction

        Returns:
          - True labels, predicted labels, and any additional evaluation result
        """

        # Preprocess test data
        X, y = self._preprocess(X, y)

        # Feature extraction either SIFT or HOG
        X, y = self._get_features(X, y, types='test')

        # Get predicted results
        y_pred = model.predict(X)

        return y, y_pred, None


    def _evaluate_test_set(self, model, label_true=None, label_pred=None):
        """
        Evaluate the entire test data

        Params:
          - model: Trained model for prediction
          - label_true: True label, to visualize image with this true label
          - label_pred: Predicted label, to visualize image with this predicted label

        Returns:
          - None
        """

        # Import test data
        X_test, y_test = self._get_data('test')

        # Preprocess test data
        X_test, y_test = self._preprocess(X_test, y_test)

        # Feature extraction using either SIFT or HOG
        X_test, y_test = self._get_features(X_test, y_test, types='test')

        # Get predicted results
        y_pred = model.predict(X_test)

        # if label_true not provided, visualize classification report and confusion matrix
        if label_true is None:
            _gen_performance_metrics(y_test, y_pred)
            _confusion_matrix(y_test, y_pred)

        # To visualize images with particular true labels, and predicted labels
        else:
          _plot_images_with_labels(X_test, y_test, y_pred, label_true, label_pred)


###**HOG**

In [None]:
class SVMWithHOG(SVMInitiate):
    """
    Inherit from SVMInitiate class to replace feature extraction using HOG
    Code derived from Lab tutorial 05
    """

    def _get_features(self, X, y, **kwargs):
        """
        Extract features using Histogram of Oriented Gradients (HOG)

        Params:
          - X: Input images
          - y: Labels corresponding to the input images
          - **kwargs: Additional keyword arguments

        Returns:
          - The extracted HOG features and labels
        """

        hog_features = []

        # Iterate over each image in the dataset
        for i in range(len(X)):

            # Compute HOG features
            features = hog(X[i],
                          orientations=9,
                          pixels_per_cell=(8, 8),
                          cells_per_block=(2, 2),
                          visualize=False,
                          channel_axis=2,
                          )

            # Append computed features to the list
            hog_features.append(features)

        return np.array(hog_features), y


    def _save_model(self, model, fn):
        """
        Save the best model

        Params:
          - model: Trained SVM model to be saved
          - fn: Filename for the saved model

        Returns:
          - None
        """

        # Create directory
        os.makedirs(f'{MODEL_PATH}/SVM-HOG', exist_ok=True)

        # Save best model to drive
        joblib.dump(model, f'{MODEL_PATH}/SVM-HOG/{fn}.pkl')

### **SIFT**

In [None]:
class SVMWithSIFT(SVMInitiate):
    """
    Inherit from SVMInitiate class to replace feature extraction using SIFT
    Code obtained from Lab tutorial 06
    """

    def _get_features(self, X, y, types=None, fn=None):
        """
        Extract features using Scale-Invariant Feature Transform (SIFT)

        Params:
          - X: Input images
          - y: Labels corresponding to the input images
          - types: Type of data (train, test)
          - fn: Filename for the saved model

        Returns:
          - The extracted SIFT-BoVW features and labels
        """

        cluster=30

        def _sift_bovw_features(X, y, fn):
            """
            Generate SIFT - Bag of Visual Words (BoVW) representation feature descriptors for training data
            """

            # Apply augmentation, if specify
            if self.augmentation:
                # Convert to grey scale
                X = _rgb2gray(_convert_to_int(X))

            else:
                # Convert to grey scale
                X = _rgb2gray(X)

            # Initialise SIFT
            sift = cv2.SIFT_create()

            des_list = []
            y_list = []

            for i in range(len(X)):

                # Identify keypoints and descriptors
                kp, des = sift.detectAndCompute(X[i], None)

                # Keep list of descriptors
                if des is not None:
                    des_list.append(des)
                    y_list.append(y[i])

            # Convert list to array
            descriptors = np.vstack(des_list)

            # Generate BoVW representation using MiniBatchKMeans for faster computation and lower memory usage
            batch_size = descriptors.shape[0] // 4
            kmeans = MiniBatchKMeans(n_clusters=cluster, batch_size=batch_size, n_init='auto').fit(descriptors)

            # Convert descriptors into histograms of codewords for each image
            hist_list = []
            idx_list = []

            for des in des_list:
                # Initialize a histogram of zeros
                hist = np.zeros(cluster)

                # Predict cluster indices
                idx = kmeans.predict(des)
                idx_list.append(idx)

                # Update histogram counts based on cluster indices
                for j in idx:
                    hist[j] = hist[j] + (1 / len(des))
                hist_list.append(hist)

            # Stack histograms into an array
            hist_array = np.vstack(hist_list)

            return hist_array, y_list, kmeans


        def _sift_bovw_features_test(X, y, kmeans):
            """
            Generate SIFT - Bag of Visual Words (BoVW) representation feature descriptors for test data
            """

            # Convert to grey scale
            X = _rgb2gray(X)

            hist_list = []

            for i in range(len(X)):

                # Initialise SIFT
                sift = cv2.SIFT_create()

                # Identify keypoints and descriptors
                kp, des = sift.detectAndCompute(X[i], None)

                # Check if descriptors are found
                if des is not None:
                    # Initialize an array to store the histogram
                    hist = np.zeros(cluster)

                    # Predict cluster indices for descriptors
                    idx = kmeans.predict(des)

                    # Update histogram counts based on cluster indices
                    for j in idx:
                        hist[j] = hist[j] + (1 / len(des))

                    hist_list.append(hist)

                else:
                    hist_list.append(None)


            # Remove potential cases of images with no descriptors
            idx_not_empty = [i for i, x in enumerate(hist_list) if x is not None]
            hist_list = [hist_list[i] for i in idx_not_empty]
            y = [y[i] for i in idx_not_empty]

            # Stack histograms into an array
            hist_array = np.vstack(hist_list)

            return hist_array, y

        if types == 'train':
            # Extract features for training data
            X, y, kmeans = _sift_bovw_features(X, y, fn)

            # Create directory to save model
            os.makedirs(f'{MODEL_PATH}/SVM-SIFT/{fn}/', exist_ok=True)

            # Save KMeans model
            joblib.dump(kmeans, f'{MODEL_PATH}/SVM-SIFT/{fn}/kmeans.pkl')

        elif types == 'test':
           if not 'SVM-SIFT' in self.filepath:
              self.filepath = f"SVM-SIFT/{self.filepath}"

           # Load KMeans model
           kmeans = joblib.load(f'{MODEL_PATH}/{self.filepath}/kmeans.pkl')

           # Extract features for test data
           X, y = _sift_bovw_features_test(X, y, kmeans)

        return X, y


    def _save_model(self, model, fn):
        """
        Save the best model

        Params:
          - model: Trained SVM model to be saved
          - fn: Filename for the saved model

        Returns:
          - None
        """

        # Save model to drive
        joblib.dump(model, f'{MODEL_PATH}/SVM-SIFT/{fn}/model.pkl')

### **CNN - Custom**

In [None]:
class customCNN():
    """
      Encapsulate entire process of CNN pipeline, including data preprocessing,
      CNN structure, hyper parameter tuing, training, and evaluation
    """

    def __init__(self, params={}, input_size=(64,64), augmentation=False,
                 class_weight=False, sr=False,
                 epochs=100, batch_size=32):
        """
        Initialize the CustomCNN object

        Params:
          - params: Dictionary of hyperparameters for the CNN model
          - input_size: Tuple representing the desired input image size
          - augmentation: Boolean indicating whether data augmentation should be applied
          - class_weight: Boolean indicating whether class weights should be used during training
          - sr: Boolean indicating whether to apply super resolution
          - epochs: Number of maximum epochs
          - batch_size: Batch size
        """

        self.params = params
        self.input_size = input_size
        self.augmentation = augmentation
        self.epochs = epochs
        self.batch_size = batch_size
        self.class_weight = class_weight
        self.sr = sr


    def _preprocess_data(self, X, y, augmentation=False):
        """
        Preprocess input data

        Params:
          - X: Input images
          - y: Labels corresponding to the input images
          - augmentation: Boolean indicating whether data augmentation should be applied

        Returns:
          - Preprocessed input data and labels
        """

        # Resize and convert to array
        X = np.array(_resize(X, self.input_size))
        y = np.array(y)

        # Apply equalized histogram
        X = _equalizehist(X)

        # Apply augmentation, if specify
        if augmentation:
              X, y = _gen_augmented_data(X, y)
              X, y = _shuffle(X, y)

        # Normalize pixel values to the range 0 and 1
        X = X / 255.0

        # Convert class labels to one-hot encoded vectors
        y = to_categorical(y, 3)

        return X, y


    def _callback(self):
        """
        Define callback function for training

        Returns:
          - List of callbacks including Model checkpoint, Early stopping, and Learning rate scheduler
        """

        # Model filepath
        model_fp = f"{MODEL_PATH}/Custom-CNN/{_get_timestamp()}.h5"
        print(f"Model saved to {model_fp}")

        # Define model checkpoint to save the best model based on validation loss
        check_point = ModelCheckpoint(filepath=model_fp, monitor="val_loss", mode="min", save_best_only=True, verbose=1)

        # Define early stopping to stop training if validation loss doesn't improve after certain patience epochs
        early_stop = EarlyStopping(monitor='val_loss', patience=15, mode='min', verbose=1)

        # Define learning rate scheduler to adjust learning rate if validation loss doesn't improve after 5 epochs
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-5)

        return [check_point, early_stop, lr_scheduler]


    def _train(self, **kwargs):
          """
          Train the CNN model

          Params:
            - kwargs: Additional keyword arguments

          Returns:
            - Training history and trained model
          """

          # Perform randomized search with cross validation
          grid = RandomizedSearchCV(estimator=self._create_keras_classifier(callbacks=self._callback(), **kwargs),
                                    param_distributions=self.params, n_jobs=1, cv=5, verbose=1, scoring="f1_macro")

          if self.class_weight:
                # Fit the grid search with class weights
                grid_result = grid.fit(self.X_train, self.y_train, class_weight=self.weight)

          else:
                # Fit the grid search without class weights
                grid_result = grid.fit(self.X_train, self.y_train)

          # Obtain best model
          classifier = grid_result.best_estimator_

          print(f"Best Estimator Parameters: {classifier.get_params()}")
          print(classifier.model_.summary())

          if self.class_weight:
              # Fit the best estimator with class weights
              history = classifier.fit(self.X_train,
                                      self.y_train,
                                      validation_data =(self.X_validate, self.y_validate),
                                      epochs=self.epochs,
                                      batch_size=self.batch_size,
                                      class_weight=self.weight)

          else:
              # Fit the best estimator without class weights
              history = classifier.fit(self.X_train,
                                      self.y_train,
                                      validation_data =(self.X_validate, self.y_validate),
                                      epochs=self.epochs,
                                      batch_size=self.batch_size)

          return history, classifier


    def _main(self, **kwargs):
        """
        Execute the entire training process, including importing data,
        training, and evaluation

        Returns:
          - None
        """

        # Import data
        self.X_train, self.X_validate, self.y_train, self.y_validate = _get_data('train', split_size=0.2)

        # Apply super resolution, if specify
        if self.sr:
            # Unzip super resolution images
            if not os.path.exists('esrgan_train_imgs.pkl'):
                _unzip_data(filename='esrgan_train_imgs.zip')

            # Load super resolution images
            with open('esrgan_train_imgs.pkl', 'rb') as f:
                data = pickle.load(f)

            # Extract X_hr and y_train from the loaded data
            self.X_train = data['X']
            self.y_train = data['y']

        # Calculate class weights using class frequency
        self.weight = dict(enumerate(compute_class_weight('balanced', classes=np.unique(self.y_train), y=self.y_train)))

        # Preprocess train data
        self.X_train, self.y_train = self._preprocess_data(self.X_train, self.y_train, augmentation=self.augmentation)

        # Preprocess validation data
        self.X_validate, self.y_validate = self._preprocess_data(self.X_validate, self.y_validate)

        # Get trained history, and trained model
        history, classifier = self._train(**kwargs)

        # Evaluate model's performance on train and validation set
        _evaluate(classifier, history.history_, self.X_train, self.y_train, self.X_validate.astype('float32'), self.y_validate)


    def _evaluate_test_set(self, model, label_true=None, label_pred=None):
        """
        Evaluate the entire test set

        Params:
          - model: Trained model to be evaluated
          - label_true: True label, to visualize image with this true label
          - label_pred: Predicted label, to visualize image with this predicted label

        Returns:
          - None
        """

        # Import and preprocess test set
        self.X_test, self.y_test = _get_data('test')
        self.X_test, self.y_test = self._preprocess_data(self.X_test, self.y_test)

        # Predict test data
        y_pred_test = np.argmax(model.predict(self.X_test), axis=1)
        self.y_test = np.argmax(self.y_test, axis=1)

        # if label_true not provided, visualize classification report and confusion matrix
        if label_true is None:
            _gen_performance_metrics(self.y_test, y_pred_test)
            _confusion_matrix(self.y_test, y_pred_test)

        # To visualize images with particular true labels, and predicted labels
        else:
          _plot_images_with_labels(self.X_test, self.y_test, y_pred_test, label_true, label_pred)


    def _evaluate_frame(self, X, y, model):
        """
        Evaluate subset of test images, including preprocessing, feature extraction, and model prediction

        Params:
          - X: Input test image
          - y: True label of the test image
          - model: Trained model for prediction

        Returns:
          - True label, predicted label, and confidence scores
        """

        # Preprocess test image
        X, _ = self._preprocess_data(X, [])

        # Get predicted label
        y_probs = model.predict(X, verbose=0)

        # Get the predicted labels with the highest probability
        y_pred = np.argmax(y_probs, axis=1)

        # Get the confidence scores
        confidence_scores = np.max(y_probs, axis=1)

        return y, y_pred, confidence_scores


    def _create_keras_classifier(self, **kwargs):
        """
        Create a Keras classifier model

        Params:
          - kwargs: Additional keyword arguments

        Returns:
          - KerasClassifier object
        """

        def _create_cnn_model(learning_rate, dense_units, dropout_rates, weight_decay):
              """
              Create a CNN model

              Params:
                - learning_rate: Learning rate for the optimizer
                - dense_units: Hidden neuron size
                - dropout_rates: Dropout rate for regularization
                - weight_decay: Weight decay for regularization

              Returns:
                - Compiled Keras model
              """

              model = Sequential()

              # First convolution layer
              model.add(Conv2D(32, (5, 5), activation='relu',kernel_initializer='he_uniform', input_shape=self.input_size + (3,)))

              # Max pooling layer
              model.add(MaxPooling2D((2, 2)))

              # Drop out
              model.add(Dropout(dropout_rates))

              # Second convolution layer
              model.add(Conv2D(32, (5, 5), activation='relu', kernel_initializer='he_uniform'))

              # Max pooling layer
              model.add(MaxPooling2D((2, 2)))

              # Drop out
              model.add(Dropout(dropout_rates))

              # Flatten layer
              model.add(Flatten())

              # Fully connected layer
              model.add(Dense(dense_units, activation='relu', kernel_initializer='he_uniform'))

              # Drop out
              model.add(Dropout(dropout_rates))

              # Output layer with softmax activation
              model.add(Dense(3, activation='softmax'))

              # Compile model with the specified optimizer, learning rate, and weight decay
              model.compile(Adam(learning_rate=learning_rate, weight_decay=weight_decay), loss='categorical_crossentropy', metrics=['accuracy'])

              return model

        return KerasClassifier(build_fn=_create_cnn_model, **kwargs)

time: 1.97 ms (started: 2024-03-30 01:19:36 +00:00)


### **CNN - Transfer learning using pre-trained model**

In [None]:
class pretrainedCNN():
    """
      Encapsulate entire process of CNN pipeline, including data preprocessing,
      CNN structure, hyper parameter tuing, training, and evaluation.
    """

    def __init__(self, input_size=(224, 224), augmentation=False, sr=False,
                 base_model=MobileNetV2, preprocess=mobilenet_preprocess,
                 finetune=False,
                 early_stop_patience=10, batch_size=32, epochs=200,
                 model_path=None, class_weight=False):
        """
        Initialize the pretrainedCNN object

        Params:
          - input_size: Tuple representing the desired input image size
          - augmentation: Whether to apply data augmentation
          - sr: Whether to apply super-resolution
          - base_model: Base CNN model to be used
          - preprocess: Preprocessing function for input images
          - finetune: Whether to fine-tune the base model
          - early_stop_patience: Threshold for early stopping during training
          - batch_size: Batch size for training
          - epochs: Number of epochs for training
          - model_path: Path to save the trained model
          - class_weight: Whether to apply class weighting during training

        """

        self.input_size = input_size
        self.augmentation = augmentation
        self.base_model = base_model
        self.preprocess = preprocess
        self.finetune = finetune
        self.early_stop_patience = early_stop_patience
        self.batch_size = batch_size
        self.epochs = epochs
        self.model_path = model_path
        self.sr = sr
        self.class_weight = class_weight


    def _preprocess_data(self, X, y, augmentation=False, **kwargs):
        """
        Preprocess input data

        Params:
          - X: Input data
          - y: Target labels
          - augmentation: Whether to apply data augmentation

        Returns:
          - Preprocessed input data and labels
        """

        # Resize and convert to array
        X = np.array(_resize(X, self.input_size))
        y = np.array(y)

        # Apply augmentation, if specify
        if augmentation:
            X, y = _gen_augmented_data(X, y)

        # Preprocess input
        X = self.preprocess(X)

        # Convert class labels to one-hot encoded vectors
        y = to_categorical(y, 3)

        return X, y


    def _train(self, **kwargs):
        """
        Training process

        Params:
          - kwargs: Additional arguments

        Returns:
            - None
        """

        # Initialize a RandomSearch tuner with the specified parameters
        tuner = RandomSearch(partial(_build_model, base_model=self.base_model, input_size=self.input_size, finetune=self.finetune),
                             objective='val_loss',
                             max_trials=10,
                             executions_per_trial=1,
                             directory='tmp/',
                             project_name='pretrainedCNN')

        # Perform hyperparameter search with or without class weights
        if self.class_weight:
            tuner.search(self.X_train, self.y_train, epochs=5, validation_data=(self.X_validate, self.y_validate), class_weight=self.weight, callbacks=self._callback())

        else:
            tuner.search(self.X_train, self.y_train, epochs=5, validation_data=(self.X_validate, self.y_validate), callbacks=self._callback())

        # Retrieve the best models and hyperparameters
        models = tuner.get_best_models(num_models=1)
        best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(f"Best Hyperparameters: {best_hyperparameters.values}")

        classifier = models[0]


        if self.class_weight:
            # Fit the best estimator with class weights
            history = classifier.fit(self.X_train,
                                    self.y_train,
                                    validation_data =(self.X_validate, self.y_validate),
                                    epochs=self.epochs,
                                    batch_size=self.batch_size,
                                    class_weight=self.weight,
                                    callbacks=self._callback())
        else:
            # Fit the best estimator without class weights
            history = classifier.fit(self.X_train,
                                    self.y_train,
                                    validation_data =(self.X_validate, self.y_validate),
                                    epochs=self.epochs,
                                    batch_size=self.batch_size,
                                    callbacks=self._callback())

        return history, classifier


    def _evaluate_test_set(self, model, label_true=None, label_pred=None):
        """
        Evaluate the entire test set

        Params:
          - model: Trained model
          - label_true: True label, to visualize image with this true label
          - label_pred: Predicted label, to visualize image with this predicted label

        Returns:
          - None
        """

        # Import and preprocess test set
        X_test, y_test = _get_data('test')
        X_test_processed, y_test = self._preprocess_data(X_test, y_test)

        # Get predicted labels
        y_pred_test = np.argmax(model.predict(X_test_processed), axis=1)

        # Convert y test from onehot encoded vector to 1D-array
        y_test = np.argmax(y_test, axis=1)

        # if label_true not provided, visualize classification report and confusion matrix
        if label_true is None:
            _gen_performance_metrics(y_test, y_pred_test)
            _confusion_matrix(y_test, y_pred_test)

        # To visualize images with particular true labels, and predicted labels
        else:
          _plot_images_with_labels(X_test, y_test, y_pred_test, label_true, label_pred)


    def _evaluate_frame(self, X, y, model):
        """
        Evaluate subset of test images, including preprocessing, feature extraction, and model prediction

        Params:
          - X: Input test image
          - y: True label of the test image
          - model: Trained model for prediction

        Returns:
          - True label, predicted label, and confidence scores
        """

        # Preprocess test image
        X, _ = self._preprocess_data(X, [])

        # Get predicted output
        y_probs = model.predict(X, verbose=0)

        # Get the predicted labels with the highest probability
        y_pred = np.argmax(y_probs, axis=1)

        # Get the confidence scores
        confidence_scores = np.max(y_probs, axis=1)

        return y, y_pred, confidence_scores


    def _callback(self):
        """
        Define callback function for training

        Returns:
          - List of callbacks including Model checkpoint, Early stopping, and Learning rate scheduler
        """

        # Model filepath
        model_fp = f"{MODEL_PATH}/Pretrained-CNN/{_get_timestamp()}.h5"
        print(f"Model saved to {model_fp}")

        # Define model checkpoint to save the best model based on validation loss
        check_point = ModelCheckpoint(filepath=model_fp, monitor="val_loss", mode="min", save_best_only=True, verbose=1)

        # Define early stopping to stop training if validation loss doesn't improve after certain patience epochs
        early_stop = EarlyStopping(monitor='val_loss', patience=self.early_stop_patience, mode='min', verbose=1)

        # Define learning rate scheduler to adjust learning rate if validation loss doesn't improve after 5 epochs
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-5)

        return [check_point, early_stop, lr_scheduler]


    def _main(self, **kwargs):
        """
        Entire training process, from importing data to evaluation on train and validation set

        Params:
        - kwargs: Additional arguments
        """

        # Import data
        self.X_train, self.X_validate, self.y_train, self.y_validate = _get_data('train', split_size=0.2)

        # Apply super resolution, if specify
        if self.sr:
            # Unzip super resolution images
            if not os.path.exists('esrgan_train_imgs.pkl'):
                _unzip_data(filename='esrgan_train_imgs.zip')

            # Load super resolution images
            with open('esrgan_train_imgs.pkl', 'rb') as f:
                data = pickle.load(f)

            # Extract X_hr and y_train from the loaded data
            self.X_train = data['X']
            self.y_train = data['y']

        # Calculate class weights using class frequency
        self.weight = dict(enumerate(compute_class_weight('balanced', classes=np.unique(self.y_train), y=self.y_train)))

        # Preprocess train data
        self.X_train, self.y_train = self._preprocess_data(self.X_train, self.y_train, augmentation=self.augmentation)

        # Preprocess validation data
        self.X_validate, self.y_validate = self._preprocess_data(self.X_validate, self.y_validate)

        # Get trained history, and trained model
        history, classifier = self._train(**kwargs)

        # Evaluate model's performance on train and validation set
        _evaluate(classifier, history.history, self.X_train, self.y_train, self.X_validate, self.y_validate)

def _build_model(hp, base_model, input_size, finetune):
        """
        Builds a customized CNN model for hyperparameter tuning

        Params:
          - hp: Hyperparameter tuning object
          - base_model: Pretrained base model
          - input_size: Tuple specifying the input size of the model
          - finetune: Boolean indicating whether to finetune the base model

        Returns:
          - Compiled Keras model
        """

        # Load the base model with specified parameters
        base = base_model(include_top=False,
                              weights="imagenet",
                              input_tensor=K.Input(shape=input_size + (3,)),
                              pooling='avg')

        # Set whether to fine-tune the base model
        base.trainable = finetune

        # Define the top layers of the model
        x = base.output
        x = Dense(3, activation='softmax')(x)
        model = Model(inputs=base.input, outputs=x)

        # Choose optimizer, learning rate, and weight decay using hyperparameters
        optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
        learning_rate = hp.Choice('learning_rate', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1])
        weight_decay = hp.Choice('weight_decay', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1])

        # Configure optimizer based on the choice
        if optimizer_choice == 'adam':
            optimizer = Adam(learning_rate=learning_rate, weight_decay=weight_decay)
        elif optimizer_choice == 'sgd':
            optimizer = SGD(learning_rate=learning_rate, weight_decay=weight_decay)
        else:
            optimizer = RMSprop(learning_rate=learning_rate, weight_decay=weight_decay)


        # Compile the model with the selected optimizer, loss function
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

        return model