In [1]:
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from flask import Flask, request, jsonify, render_template
import os


In [2]:
file_path = '/content/drive/MyDrive/dataset_full'

In [3]:
import os
import cv2

def load_images(file_path, target_size=(256, 256)):
    images = []
    labels = []
    label_map = {}
    label_counter = 0
    for folder in os.listdir(file_path):
        label_map[folder] = label_counter
        for file in os.listdir(os.path.join(file_path, folder)):
            image_path = os.path.join(file_path, folder, file)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read image in grayscale
            # Preprocessing steps (resize, normalize, etc.)
            if image is not None:
                # Resize image to target size
                image = cv2.resize(image, target_size)
                images.append(image)
                labels.append(label_map[folder])
        label_counter += 1
    return np.array(images), np.array(labels)




Images are read using OpenCV's cv2.imread() function with the flag cv2.IMREAD_GRAYSCALE, which reads the image in grayscale mode. Grayscale images have only one channel representing pixel intensity, which simplifies processing.

After reading each image, it is resized to a target size specified by the target_size parameter. Resizing is important for ensuring consistency in the dimensions of all images. In this case, the target size is set to (256, 256) pixels

In [4]:
def extract_features(images):
    features = []
    for image in images:
        equalized = cv2.equalizeHist(image)
        hist_equalized, _ = np.histogram(equalized.ravel(), bins=256, range=[0, 256])

        # 2. Grayscale Transformation
        gray_transform = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # 3. Canny Edge Detection
        edges = cv2.Canny(image, 100, 200)  # Adjust threshold values as needed
        edges_hist, _ = np.histogram(edges.ravel(), bins=256, range=[0, 256])

        # Append features to the feature vector
        features.append(np.concatenate([hist_equalized, edges_hist]))

    return np.array(features)


Histogram Equalization: Enhances contrast and reveals hidden details in the image, making it easier for the classifier to distinguish between different classes based on intensity variations.

Grayscale Transformation: Simplifies the representation of the image by reducing it to a single channel, thereby reducing computational complexity while retaining essential information about intensity values.

Canny Edge Detection: Identifies significant changes in intensity, which often correspond to edges and boundaries between objects in the image. These edges contain important structural information that can be valuable for classification, helping the model to recognize distinct patterns and shapes.

In [5]:
# Function for dimensionality reduction (PCA)
def apply_pca(features, n_components=100):
    pca = PCA(n_components=n_components)
    reduced_features = pca.fit_transform(features)
    return reduced_features

The `apply_pca` function performs Principal Component Analysis (PCA) on the input feature set to reduce its dimensionality. By specifying the number of components, it transforms the original features into a lower-dimensional space while retaining the most important information. This technique is crucial for managing high-dimensional data in image classification tasks, as it helps in mitigating computational complexity, overfitting, and facilitates better visualization and interpretation of the data.

In [6]:
# Function to train and evaluate model
def train_evaluate_model(X_train, X_test, y_train, y_test):
    # Train model
    clf = SVC(kernel='linear', C=1.0, random_state=42)
    clf.fit(X_train, y_train)
    # Evaluate model
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    return clf, train_accuracy, test_accuracy

In [7]:
file_path = "/content/drive/MyDrive/dataset_full"
images, labels = load_images(file_path)

In [8]:
features = extract_features(images)

In [9]:
reduced_features = apply_pca(features)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(reduced_features, labels, test_size=0.3, random_state=42)

In [11]:
model, train_accuracy, test_accuracy = train_evaluate_model(X_train, X_test, y_train, y_test)
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Train Accuracy: 1.0
Test Accuracy: 0.2857142857142857


In [12]:
from sklearn.ensemble import RandomForestClassifier

def train_evaluate_model(X_train, X_test, y_train, y_test):
    # Train model
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    # Evaluate model
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    return clf, train_accuracy, test_accuracy


In [13]:
model, train_accuracy, test_accuracy = train_evaluate_model(X_train, X_test, y_train, y_test)
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Train Accuracy: 1.0
Test Accuracy: 0.2857142857142857


In [14]:
from sklearn.neighbors import KNeighborsClassifier

def train_evaluate_model(X_train, X_test, y_train, y_test):
    # Train model
    clf = KNeighborsClassifier(n_neighbors=5)  # Set the number of neighbors (you can adjust this)
    clf.fit(X_train, y_train)
    # Evaluate model
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    return clf, train_accuracy, test_accuracy


In [15]:
model, train_accuracy, test_accuracy = train_evaluate_model(X_train, X_test, y_train, y_test)
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Train Accuracy: 0.4965986394557823
Test Accuracy: 0.37566137566137564


Due to constraints posed by a slow machine processor, the dataset size was reduced to facilitate smoother processing, leading to a compromise in model accuracy across all models.

To improve model performance, consider data augmentation to enrich the dataset and hyperparameter optimization for fine-tuning model settings. Additionally, explore transfer learning with pre-trained deep learning models for feature extraction. Ensemble learning methods like model stacking and automated feature selection techniques can further enhance classification accuracy while streamlining the feature extraction process.