In [None]:
import os
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.feature_selection import SequentialFeatureSelector as SFS
from sklearn.decomposition import PCA
from joblib import Parallel, delayed
import matplotlib.pyplot as plt


In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder))
    for label, class_name in enumerate(class_names):
        class_folder = os.path.join(folder, class_name)
        for filename in os.listdir(class_folder):
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(label)
    return images, labels

train_folder = '/content/drive/My Drive/HWData/train'
test_folder = '/content/drive/My Drive/HWData/test'


train_images, train_labels = load_images_from_folder(train_folder)
test_images, test_labels = load_images_from_folder(test_folder)

def extract_features(images, max_descriptors=128):
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        keypoints, descriptors = sift.detectAndCompute(img_gray, None)
        if descriptors is not None:
            descriptors = descriptors[:max_descriptors]  # Use only the first `max_descriptors`
            if descriptors.shape[0] < max_descriptors:
                # If less than `max_descriptors`, pad with zeros
                descriptors = np.pad(descriptors, ((0, max_descriptors - descriptors.shape[0]), (0, 0)), 'constant')
        else:
            # Handle case with no descriptors
            descriptors = np.zeros((max_descriptors, sift.descriptorSize()))
        features.append(descriptors.flatten())
    return np.array(features)

train_features = extract_features(train_images)
test_features = extract_features(test_images)


In [None]:
pca = PCA(n_components=100)  # Reduce dimensions to 100 (or another number you find suitable)
train_features_pca = pca.fit_transform(train_features)
test_features_pca = pca.transform(test_features)


In [None]:
sfs = SFS(SVC(kernel='linear'),
          n_features_to_select=50,  # Adjust this number based on the reduced dimensions
          direction='forward',
          n_jobs=-1)  # Utilize all available cores
train_features_reduced = sfs.fit_transform(train_features_pca, train_labels)
test_features_reduced = sfs.transform(test_features_pca)


In [None]:
# Train SVM
svm = SVC(kernel='linear')
svm.fit(train_features_reduced, train_labels)

# Predict on test data
test_predictions = svm.predict(test_features_reduced)

# Evaluate the model
conf_matrix = confusion_matrix(test_labels, test_predictions)
accuracy = accuracy_score(test_labels, test_predictions)

print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)


Confusion Matrix:
[43  0  0  0  0  0  0  0  0  0  3  0  0  0  0]
[ 1  3  0  0  0  0  0  0  0  0  0  0  0  0  0]
[ 0  0 13  0  0  0  0  0  0  0  0  0  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  2  0  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  0  0  2  0  0]
[ 1  0  0  0  0  2  0  0  0  0  1  0  0  0  0]
[ 1  0  0  0  0  0  0  3  0  0  0  0  0  0  0]
[ 4  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[ 3  0  0  0  0  0  0  0  0  8  0  0  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  6  0  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  0  1  0  0  0]
[ 2  0  0  0  0  0  0  0  0  0  0  0  6  0  0]
[ 2  0  0  0  0  0  0  0  0  0  0  0  0  6  0]
[ 2  0  0  0  0  0  0  0  0  0  0  0  0  0  2]
Accuracy: 0.7277853355945519
