In [None]:
# Importing the necessary modules
import cv2
import glob
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans

# Defining the BikeVSHorse class
class BikeVSHorse:

    def __init__(self, path):
        self.labels = []  # Initializing the list for labels
        self.n_clusters = 128  # Number of clusters for KMeans

        # Creating lists of image paths for bikes and horses
        bikes = list(glob.glob(f'{f"{path}Bikes"}/*.*'))
        bikes_list = [0] * len(bikes)

        horse = list(glob.glob(f'{f"{path}Horses"}/*.*'))
        horse_list = [1] * len(horse)

        # Combining the two lists of labels
        self.labels.extend(bikes_list)
        self.labels.extend(horse_list)

        # Combining the two lists of image paths
        self.image_path = bikes+horse

        # Preprocessing the image data
        self.preprocessing()

        self.BOW(1) 
        self.BOW(0)  

        # Calculating and printing the accuracy of different classification algorithms
        self.accuracy_calc()

    def preprocessing(self):
        # Splitting the image data into training and testing sets
        self.train_images, self.test_images, self.train_labels, self.test_labels = train_test_split(self.image_path, self.labels, train_size=0.8, shuffle=True, stratify=self.labels)

        # Creating a SIFT object
        self.sift = cv2.SIFT_create()
        descriptors_train = []

        # Extracting SIFT descriptors for each image in the training set
        for img_path in self.train_images:
            img = cv2.imread(img_path)
            gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            key_points, descriptors = self.sift.detectAndCompute(gray, None)
            descriptors_train.extend(iter(descriptors))

        # Converting the descriptors to an array for clustering
        self.descriptors_train = np.asarray(descriptors_train)

        # Clustering the descriptors using KMeans
        self.vocabulary = KMeans(n_clusters=self.n_clusters)
        self.vocabulary.fit(self.descriptors_train)

    def BOW(self, flag):
        # Initializing a list to store bag-of-words vectors
        BOW_Array = []
        # Selecting the appropriate set of images (training or testing) based on the flag value
        images = self.train_images if flag else self.test_images

        # Generating the bag-of-words vector for each image
        for img_path in images:
            counts = [0 for _ in range(self.n_clusters)]

            img = cv2.imread(img_path)
            gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

            key_points, descriptors = self.sift.detectAndCompute(gray, None)

            # Counting the frequency of descriptors in each cluster
            for i in self.vocabulary.predict(descriptors):
                counts[i] += 1
            BOW_Array.append(counts)

        # Storing the bag-of-words vectors in the appropriate array (training or testing) based on the flag value
        if (flag):
            self.train_features = np.asarray(BOW_Array)
        else:
            self.test_features = np.asarray(BOW_Array)

    def accuracy_calc(self):
        self.print_accuracy(svm.SVC(C=0.005, kernel='linear'), "Accuracy (SVM): ")
        self.print_accuracy(LogisticRegression(max_iter=1000), "Accuracy (LR): ")
        self.print_accuracy(KNeighborsClassifier(n_neighbors=5), "Accuracy(KNN): ")

    def print_accuracy(self, model, score):
        model.fit(self.train_features, self.train_labels)
        prediction = model.predict(self.test_features)
        accuracy = accuracy_score(self.test_labels, prediction)
        print(score, accuracy)


path = './Assignment2_BikeHorses/'
BVH = BikeVSHorse(path)