In [3]:
import cv2
import numpy as np
import os
import numpy as np
import pickle
from skimage.feature import hog
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import randint, uniform
import warnings
from sklearn.decomposition import PCA
from skimage.feature import hog, local_binary_pattern
from skimage.measure import moments_hu
import sklearn
sklearn.__version__

'1.0.2'

In [2]:
def preprocess_image(image):
        # Convert the image to YCrCb color space
        image_ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)

        # Split the image into channels
        y, cr, cb = cv2.split(image_ycrcb)

        # Apply thresholding to the Cr channel using Otsu's thresholding
        _, cr_thresholded = cv2.threshold(cr, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

       # Apply Gaussian blur multiple times
        for _ in range(5):
            image_filtered = cv2.GaussianBlur(cr_thresholded, (5, 5), 0)
            cr_thresholded=image_filtered

        # Define the kernel for morphological operations
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))  # Elliptical kernel
        
        # Perform morphological opening multiple times
        iterations = 10
        for _ in range(iterations):
            opened = cv2.morphologyEx(image_filtered, cv2.MORPH_OPEN, kernel)
            # Update the image for the next iteration
            image_filtered = opened
    
        #cv2.imwrite(output_path, opened)
        return opened


In [3]:
def LBP_features(image):
    lbp = cv2.resize(image, (60, 60))
    lbp_features = np.array([])
    for i in range(0, lbp.shape[0], 8):
        for j in range(0, lbp.shape[1], 8):
            block = lbp[i:i+8, j:j+8]
            hist, _ = np.histogram(block, bins=np.arange(256), density=True)
            lbp_features = np.concatenate((lbp_features, hist))
    lbp_features.flatten()
    return lbp_features

In [4]:
# Feature Extraction Module
def extract_features(image):
    # Extract HOG features
    hog_features = hog(image, orientations=9, pixels_per_cell=(12, 12), cells_per_block=(2, 2), visualize=False)
    # Calculate Hu moments
    moments = cv2.moments(image)
    hu_moments = cv2.HuMoments(moments).flatten()
       # Extract LBP features
    # lbp = cv2.resize(image, (60, 60))
    # lbp_features = np.array([])
    # for i in range(0, lbp.shape[0], 8):
    #     for j in range(0, lbp.shape[1], 8):
    #         block = lbp[i:i+8, j:j+8]
    #         hist, _ = np.histogram(block, bins=np.arange(256), density=True)
    #         lbp_features = np.concatenate((lbp_features, hist))
    features = np.concatenate((hog_features, hu_moments))
    return features

In [5]:
# Load the dataset
dataset = []
labels = []
all_LBP=[]
# Assuming you have a dataset with images and corresponding labels
# Add your own code to load the dataset and labels
dataset_features = []
features=[]
# Path to the main directory containing the six folders
main_directory = "./Dataset_0-5"

# List of folders for each digit (0 to 5)
digit_folders = ["0","1","2","3","4","5"]

# Iterate over each digit folder
for digit, folder in enumerate(digit_folders):
    folder_path = main_directory + "/" + folder
    
    # Iterate over images in the folder
    for filename in os.listdir(folder_path):
        image_path = folder_path + "/" + filename
        # Load the image
        image = cv2.imread(image_path)
        # Add the images and labels 
        # dataset.append(image)
        image=cv2.resize(image, (128, 64))
        roi = preprocess_image(image)
        # cv2.imshow("Image", roi)
        # print("Label:", label)    
        features=extract_features(roi)
        # lbp_features=LBP_features(roi)
        # all_LBP.append(lbp_features)
        dataset_features.append(features)
        labels.append(digit)


In [6]:
def reduce_features(features, n_components):
    features_array = np.array(features)
    # Reshape the feature matrix
    features_reshaped = features_array.reshape(len(features), -1)

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=n_components)
    reduced_features = pca.fit_transform(features_reshaped)

    return reduced_features

In [8]:
# Split the dataset into training and testing sets
dataset_features = np.array(dataset_features)
X_train, X_test, y_train, y_test = train_test_split(dataset_features, labels, test_size=0.2, random_state=42)
X_train=np.array(X_train)
y_train=np.array(y_train)


In [10]:
# Define the classifiers and their respective parameter grids for RandomizedSearchCV
# Define a directory to save the pickle files
pickle_directory = "./classifiers/"
classifiers = {
        'Logistic Regression':(LogisticRegression(),
                            {'C': np.logspace(-4, 4, 100),
                             'penalty': ['l1', 'l2', 'none'],
                             'solver': ['lbfgs', 'liblinear', 'sag', 'newton-cg', 'saga'],
                             'max_iter': randint(500, 1000)}),
        'Decision Tree': (DecisionTreeClassifier(),#87
                        {'criterion': ['gini', 'entropy'],
                        'splitter': ['best', 'random'],
                        'max_depth': randint(3, 100),
                        'min_samples_split': randint(2, 20),
                        'min_samples_leaf': randint(1, 5),
                        'max_features': ['sqrt', 'log2', None]}),
        'K-Nearest Neighbors': (KNeighborsClassifier(),#82.5
                                {'n_neighbors': np.arange(3, 45),
                                'weights': ['uniform', 'distance'],
                                'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                                'leaf_size': randint(15, 40)}),
        'Gaussian Naive Bayes': (GaussianNB(),#79.5
                                {
                                'var_smoothing': uniform(1e-10, 1e-05)
                                }),
        'AdaBoost': (AdaBoostClassifier(base_estimator=DecisionTreeClassifier(),#89.5
                                        random_state=42),
                    {'n_estimators': randint(100, 200),
                    'learning_rate': uniform(0.01, 1),
                    'base_estimator__max_depth': randint(3, 100)}),
        'Random Forest': (RandomForestClassifier(),#91
                        {'n_estimators': randint(100, 200),
                        'criterion': ['gini', 'entropy'],
                        'max_depth': randint(3, 100),
                        'min_samples_split': randint(2, 20),
                        'min_samples_leaf': randint(1, 5),
                        'max_features': ['sqrt', 'log2', None]}),
        'Gradient Boosting': (GradientBoostingClassifier(),#90
                            {'n_estimators': randint(100, 200),
                            'learning_rate': uniform(0.01, 1),
                            'max_depth': randint(3, 100),
                            'min_samples_split': randint(2, 20),
                            'min_samples_leaf': randint(1, 5),
                            'max_features': ['sqrt', 'log2', None]}),
        'SVC': (SVC(),#87
                {'C': np.logspace(-4, 4, 100),
                'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                'degree': randint(2, 10),
                'gamma': ['scale', 'auto']})
    }
best_classifier = None
best_params = None
best_accuracy = 0

# Iterate over classifiers and perform RandomizedSearchCV
for name, (classifier, param_grid) in classifiers.items():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        rs = RandomizedSearchCV(classifier, param_grid, n_iter=10, scoring='accuracy', random_state=42)
        rs.fit(X_train, y_train)
    y_pred = rs.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{name}: Best params - {rs.best_params_}, Accuracy - {accuracy}')

    if accuracy > best_accuracy:
        best_classifier = rs.best_estimator_
        best_params = rs.best_params_
        best_accuracy = accuracy
# Save the classifier as a pickle file
    pickle_filename = f"{pickle_directory}{name}.pickle"
    with open(pickle_filename, 'wb') as f:
        pickle.dump(rs.best_estimator_, f)
# Print the best classifier and its parameters
print('\nBest Classifier:')
print(best_classifier)
print('Best Params:')
print(best_params)
print('Best Accurancy:')
print(best_accuracy)

Logistic Regression: Best params - {'C': 1.3219411484660315, 'max_iter': 848, 'penalty': 'none', 'solver': 'sag'}, Accuracy - 0.7726027397260274
Decision Tree: Best params - {'criterion': 'gini', 'max_depth': 23, 'max_features': None, 'min_samples_leaf': 2, 'min_samples_split': 12, 'splitter': 'best'}, Accuracy - 0.6054794520547945
K-Nearest Neighbors: Best params - {'algorithm': 'brute', 'leaf_size': 36, 'n_neighbors': 4, 'weights': 'distance'}, Accuracy - 0.821917808219178

Best Classifier:
KNeighborsClassifier(algorithm='brute', leaf_size=36, n_neighbors=4,
                     weights='distance')
Best Params:
{'algorithm': 'brute', 'leaf_size': 36, 'n_neighbors': 4, 'weights': 'distance'}
Best Accurancy:
0.821917808219178
