In [19]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern, hog
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.preprocessing.image import img_to_array, load_img


from tensorflow.keras.preprocessing import image
import numpy as np
import tensorflow as tf
import keras
import pandas as pd
import cv2
from PIL import Image
from tqdm import tqdm

from skimage.io import imread
from skimage.transform import rescale
from skimage.feature import hog
from skimage import exposure
from skimage import color

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers
from keras.models import Sequential, Model, load_model
from tensorflow.keras import callbacks
from tensorflow.keras.optimizers import Adam, Adamax
from keras.layers import Input, UpSampling2D, Conv2D, concatenate, MaxPooling2D, Flatten, Dense, Dropout, Activation, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.utils import load_img, img_to_array
from keras.regularizers import l1, l2

from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input # Changed import path

import os
from math import ceil
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')


Create a Model


In [30]:
def compute_hog(image):
    pixels_per_cell = (16, 16)
    cells_per_block = (2, 2)
    block_norm = 'L2-Hys'
    orientations = 6
    hog_features = hog(image, orientations=orientations, pixels_per_cell=pixels_per_cell,
                       cells_per_block=cells_per_block, block_norm=block_norm, visualize=False,
                       feature_vector=True)
    return hog_features



In [31]:
# Constants for LBP
LBP_RADIUS = 2
LBP_N_POINTS = 16



def compute_lbp(image):
    lbp = local_binary_pattern(image, P=LBP_N_POINTS, R=LBP_RADIUS, method='default')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 2**LBP_N_POINTS + 1), range=(0, 2**LBP_N_POINTS))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

In [32]:

def compute_sift(image):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    if descriptors is not None:
        descriptors = descriptors.flatten()[:128]
        if descriptors.shape[0] < 128:
            descriptors = np.pad(descriptors, (0, 128 - descriptors.shape[0]), 'constant')
    else:
        descriptors = np.zeros(128)
    return descriptors

**Try feat_cnn_model with tuned hog parameters on classfication**

In [33]:
from tensorflow.keras.models import Model

# Modify the CNN model for feature extraction (using the layer before the final Dense layer)
def create_cnn_feature_extractor(model):
    # Extract features from the Flatten layer
    feature_model = Model(inputs=model.input, outputs=model.layers[-2].output)
    return feature_model


# CHange the source with drive location !!
base_model = load_model('feat_cnn_model_softmax_rois_256.h5')
model = create_cnn_feature_extractor(base_model)
model.summary()


Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4_input (InputLayer)  [(None, 256, 256, 3)]    0         
                                                                 
 conv2d_4 (Conv2D)           (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 127, 127, 32)     0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 123, 123, 20)      16020     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 61, 61, 20)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 59, 59, 52)        9412

In [35]:
def process_and_extract_features_from_folders(image_folder):
    hog_features_list = []
    lbp_features_list = []
    cnn_features_list = []
    sift_features_list = []
    labels = []

    for label_folder in os.listdir(image_folder):
        folder_path = os.path.join(image_folder, label_folder)
        if not os.path.isdir(folder_path):
            continue

        for image_name in os.listdir(folder_path):
            img_path = os.path.join(folder_path, image_name)

            # Read the image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Unable to read image at {img_path}. Skipping.")
                continue
            
            img = cv2.resize(img, (256, 256))
            
            # Prepare image for CNN feature extraction
            img_array = img_to_array(img)
            img_array = img_array / 255.0
            img_array = np.expand_dims(img_array, axis=0)
            
            # Extract CNN features using the modified model
            cnn_features = model.predict(img_array).flatten()  # Output is already flat
            cnn_features_list.append(cnn_features)

            # Process for HOG, LBP, and SIFT
            img_resized = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img_resized is None:
                print(f"Warning: Unable to read grayscale image at {img_path}. Skipping.")
                continue

            hog_features_list.append(compute_hog(img_resized).flatten())
            lbp_features_list.append(compute_lbp(img_resized).flatten())
            sift_features_list.append(compute_sift(img_resized).flatten())

            # Store the label
            labels.append(label_folder)
    
    return {
            'HOG': np.array(hog_features_list),
            'LBP': np.array(lbp_features_list),
            'CNN': np.array(cnn_features_list),
            'SIFT': np.array(sift_features_list)
        }, np.array(labels)


In [37]:

image_folder = 'dataset\3_dataset\filtered_images'
# Process images and extract features
features_dict, labels = process_and_extract_features_from_folders(image_folder)

print("Feature extraction and normalization completed.")
# print("Features shape:", features_dict.shape)
print("Labels shape:", labels.shape)

Feature extraction and normalization completed.
Labels shape: (9846,)


In [9]:
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
encoded_labels_dict = label_encoder.fit_transform(labels)
    
print(encoded_labels_dict)

In [10]:
y_training = pd.DataFrame(encoded_labels_dict) 
print(y_training)

In [11]:
from sklearn.svm import SVC                         # Support Vector Machine
from sklearn.ensemble import RandomForestClassifier  # Random Forest
from sklearn.neighbors import KNeighborsClassifier   # K-Nearest Neighbors
from sklearn.neural_network import MLPClassifier     # Multi-Layer Perceptron
from sklearn.linear_model import LogisticRegression  # Logistic Regression
from sklearn.naive_bayes import GaussianNB           # Gaussian Naive Bayes
from xgboost import XGBClassifier                    # Extreme Gradient Boosting
import lightgbm as lgb 

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pandas as pd
import numpy as np
import itertools
import time
from sklearn.decomposition import PCA
from mrmr import mrmr_classif
from xgboost import XGBClassifier
import lightgbm as lgb
import itertools


In [12]:
y_training = pd.DataFrame(encoded_labels_dict) 

# Evaluation Metrics
# Evaluation Metrics
def evaluate_model(y_test, y_pred, model_name):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    return accuracy, precision, recall, f1

# Classifiers to evaluate (only top performers)
classifiers = {
    'SVM': SVC(probability=True, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'KNN': KNeighborsClassifier(),
    'MLP': MLPClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42),
    'GaussianNB': GaussianNB(),
    'XGB': XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42),
    'LightGBM': lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
}


# PCA and computing combinations
feature_keys = ['HOG', 'LBP', 'CNN', 'SIFT']
combinations = []
for i in range(1, len(feature_keys) + 1):
    combinations.extend(itertools.combinations(feature_keys, i))

pca_transformed_features = {}

# Loop through each feature combination
for feature_combination in combinations:
    # Concatenate selected features
    combined_features = np.hstack([features_dict[key] for key in feature_combination])
    
    # Normalize the features
    scaler = StandardScaler()
    combined_features_normalized = scaler.fit_transform(combined_features)
    
    # Apply PCA to reduce the dimensionality
    pca = PCA(n_components=min(100, combined_features_normalized.shape[1]))  # Example to keep top 100 components or fewer
    combined_features_pca = pca.fit_transform(combined_features_normalized)
    
    # Store the PCA-transformed features
    pca_transformed_features['+'.join(feature_combination)] = combined_features_pca

# Results list to store evaluation results
results = []

# Initialize highest accuracy tracker
highest_acc = {
    'Classifier': None,
    'Accuracy': 0.0,
    'Features': None
}

# Loop through each feature combination stored in pca_transformed_features
for feature_combination_str, combined_features_selected in pca_transformed_features.items():
    # Normalize features
    scaler = StandardScaler()
    combined_features = scaler.fit_transform(combined_features_selected)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(combined_features, y_training, test_size=0.2, random_state=42)
    
    for clf_name, clf in classifiers.items():
        start_time = time.time()
        
        # Train and predict
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        
        # Calculate evaluation metrics
        accuracy, precision, recall, f1 = evaluate_model(y_test, y_pred, clf_name)
        computation_time = time.time() - start_time
        
        # Update highest accuracy if current accuracy is better
        if accuracy > highest_acc['Accuracy']:
            highest_acc = {
                'Classifier': clf_name,
                'Accuracy': accuracy,
                'Features': feature_combination_str
            }
        
        # Save the results
        results.append({
            'Classifier': clf_name,
            'Features': feature_combination_str,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1,
            'Computation Time (s)': computation_time
        })

# Convert results to DataFrame for better visualization
print("Highest Accuracy:", highest_acc)
results_df = pd.DataFrame(results)
print(results_df)





In [13]:
# results_df.to_csv('inception_full_244.csv')
sorted_df = results_df.sort_values(by='Accuracy', ascending=False).reset_index(drop=True)
sorted_df.head(10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_confusion_matrix(cm, classifier_name, feature_combination_str):
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(f'Confusion Matrix for {classifier_name} with Features: {feature_combination_str}')
    plt.show()

    
# Extract the best-performing classifier and feature combination
best_classifier_name = highest_acc['Classifier']
best_features = highest_acc['Features']

# Recreate the feature set with the best feature combination
best_combined_features = pca_transformed_features[best_features]

# Normalize the features again
scaler = StandardScaler()
best_combined_features = scaler.fit_transform(best_combined_features)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(best_combined_features, y_training, test_size=0.2, random_state=42)

# Initialize the best classifier
best_classifier = classifiers[best_classifier_name]

# Train and predict using the best classifier
best_classifier.fit(X_train, y_train)
y_pred = best_classifier.predict(X_test)

# Compute the confusion matrix
best_cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
plot_confusion_matrix(best_cm, best_classifier_name, best_features)
   


