SIFT OBJECT RECOGNIZING

In [1]:
import numpy as np
import cv2 as cv
import os
import pandas as pd
import sklearn
import sklearn.cluster
import sklearn.metrics
import sklearn.preprocessing

class RecognizerSift:

    def __init__(self) -> None:
        self.trainingSet = []
        self.testSet = []
        nameOfClasses = ['banana_1', 'calculator_1', 'camera_1', 'cell_phone_1', 'flashlight_1', 'food_bag_1', 'lemon_1', 'lightbulb_1', 'lime_1', 'marker_1']
        nameLabelsOfClasses = {nameOfClass:i for i, nameOfClass in enumerate(nameOfClasses)}

        numberOfClasses = len(nameOfClasses)
        self.dataLoader(nameLabelsOfClasses)

    def siftDetectsAndComputes(self, image):
        sift = cv.SIFT_create()
        grayscaleImage1 = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

        keyPoints1, descriptors1 = sift.detectAndCompute(grayscaleImage1, None)
        image = cv.drawKeypoints(grayscaleImage1, keyPoints1, image)
        return image, descriptors1

    def dataLoader(self, nameLabelsOfClasses):
        datasetDirectory = os.path.join(os.getcwd(), "dataset")
        objectIndex = 0
        setOfDescriptors = [] 
        setOfImages = []
        labelsOfData = []

        print("Please wait while the data is being loaded (it may take 1-2 minutes)...")
        # Choose random 90% of the objects to be the training set in every class folder
        for imageFolder in os.listdir(datasetDirectory):
            # label = nameLabelsOfClasses[imageFolder]
            label = objectIndex
            objectIndex += 1
    
            imageFolderPath = os.path.join(datasetDirectory, imageFolder)

            ''''''''''''''' TRAINING PART '''''''''''''''
            trainingClassFolderList = os.listdir(imageFolderPath)
            # choose random 90& of the objects and get their path
            trainingClassFolderList = np.random.choice(trainingClassFolderList, int(len(trainingClassFolderList)*0.9), replace=False)
            trainingClassFolderList = [os.path.join(imageFolderPath, x) for x in trainingClassFolderList]

            # select the png files that doesn't end with depthcrop.png and maskcrop.png and get their path
            trainingClassFolderList = [x for x in trainingClassFolderList if os.path.isfile(x) 
                        and x.endswith(".png") and not x.endswith("maskcrop.png") and not x.endswith("depthcrop.png") ]         

            # sorting training class folder list
            sortedFiles = sorted(trainingClassFolderList)
            # print("Sortedf files are: " , sortedFiles)
            for file in sortedFiles:
                imageWithKeyPoints, descriptors = self.siftDetectsAndComputes(cv.imread(file))
                setOfImages.append(imageWithKeyPoints)
                setOfDescriptors.append(descriptors)

                for descIndex in range(0, len(descriptors)):
                    labelsOfData.append(label)

    
        # print("Labels of data are: ", labelsOfData) # so big array of labels so if want to print be careful
        print("Total images checked ", len(setOfImages))
        print("Total descriptor sets: ", len(setOfDescriptors))
        print("Total descriptors with respect to labels added: ", len(labelsOfData))

        # concatanate all the descriptors into one array for training
        descriptors = np.vstack(setOfDescriptors)
        print("New shape of Descriptors: ", np.shape(descriptors))
        
        newDescriptorData = descriptors
        # PART OF K-MEANS CLUSTERING TO FIND THE GROUP OF DESCRIPTORS THAT BELONG TO THE SAME CLASS
        print("KMEANS CLUSTERING (it may take 1 minute)...")
        # First, normalize the data to the range [0,1] to make the clustering more robust to different lighting conditions.
        scalerObject = sklearn.preprocessing.MinMaxScaler()
        newDescriptorData = scalerObject.fit_transform(descriptors)

        # Apply k-means clustering to the descriptors to obtain the training set
        kmeans = sklearn.cluster.KMeans(n_clusters=256)
        kmeans.fit(newDescriptorData)

        # Checking the size of labels to obtain a label for each of the descriptors
        labelsOfClustering = kmeans.predict(newDescriptorData)
        print("clusteringLabels.shape :" , np.shape(labelsOfClustering))

        # Clustering labels are now the labels of the training set which associated with it
        # For example second index of clusterinLabels is
        # print("clusteringLabels[2] :" , labelsOfClustering[2])
        # Cluster label and the label of the data are the same (printing)
        print("len(labelsOfData) :" , len(labelsOfData), " and len(labelsOfClustering) : ", len(labelsOfClustering))

        predictedLabels = []
        predictedLabels = self.getLabelArrayOfClusters(labelsOfClustering, labelsOfData)

        print("Accuracy of classicifaction: ", sklearn.metrics.accuracy_score(labelsOfData, predictedLabels))

        rows = ["banana", "calculator", "camera", "cell_phone", "flashlight", "food_bag", "lemon", "lightbulb", "lime", "marker"]
        columns = ["banana", "calculator", "camera", "cell_phone", "flashlight", "food_bag", "lemon", "lightbulb", "lime", "marker"]
        confusionMatrix = sklearn.metrics.confusion_matrix(labelsOfData, predictedLabels, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        matrixTable = pd.DataFrame(confusionMatrix, rows, columns)
        print("Confusion Matrix: \n", matrixTable)

            # # labelsTraining.append(label)
            
            # ''''''''''''''' TESTING PART '''''''''''''''
            # # choose other 10% of the objects and get their path   
            # testingFolderList = [x for x in trainingClassFolderList if x not in trainingClassFolderList[:int(len(trainingClassFolderList)*0.9)]]
            # # select the png files that doesn't end with depthcrop.png and maskcrop.png and get their path
            # testingFolderList = [x for x in testingFolderList  if os.path.isfile(x) 
            #             and x.endswith(".png") and not x.endswith("depthcrop.png") and not x.endswith("maskcrop.png")]

        #     # read the images and resize them
        #     testingImages = [cv.imread(x) for x in testingFolderList]
        #     # convert every image to rgb
        #     testingImages = [cv.cvtColor(x, cv.COLOR_BGR2RGB) for x in testingImages]
        #     # resize every image to IMAGE_SIZE which is (100,100) to speed up the training
        #     testingImages = [cv.resize(x, IMAGE_SIZE) for x in testingImages]

        #     labelsTest.append(label)
        #     # imagesTestingAll.append(testingImages)
        #     imagesTestingAll = testingImages
        #     # End of for loop for every class folder
            
        # # convert the lists to numpy arrays
        # imagesTrainingAll = np.array(imagesTrainingAll, dtype=np.float32)
        # imagesTestingAll = np.array(imagesTestingAll, dtype=np.float32)
        # labelsTraining = np.array(labelsTraining, dtype=np.int32)
        # labelsTest = np.array(labelsTest, dtype=np.int32)

        # # Append the training and testing sets and their labels
        # output.append( (imagesTrainingAll, labelsTraining) )
        # output.append( (imagesTestingAll, labelsTest) )

        # return output    
        
    def getLabelArrayOfClusters(self, labelsOfClustering, labelsOfData):
        indexMap = {}

        for i in range( len(np.unique(labelsOfClustering)) ):
            index = np.where(labelsOfClustering == i,1,0)

            allIndices = [i2 for i2, x in enumerate(index) if x == 1]
            # _ = []
            labelSaver = []
            for jindex in allIndices:
                labelSaver.append(labelsOfData[jindex])


            # counting the most common label in the cluster 
            # bincount => counts the number of occurrences of each value in an array
            # argmax => returns the index of the largest value in an array (if multiple return index array)
            number = np.bincount(labelSaver).argmax()

            indexMap[i] = number
        # now we have the index map of the labels of the clusters which are the labels of the training set

        clusterLabelsLength = len(labelsOfClustering)
        predictedLabels = np.random.rand( clusterLabelsLength )
        for i in range(clusterLabelsLength):
            predictedLabels[i] = indexMap[labelsOfClustering[i]]
        return predictedLabels


if __name__=='__main__':
    RecognizerSift()


Please wait while the data is being loaded (it may take 1-2 minutes)...
Total images checked  111
Total descriptor sets:  111
Total descriptors with respect to labels added:  6046
New shape of Descriptors:  (6046, 128)
KMEANS CLUSTERING (it may take 10-15 minutes for big data, 0-1 minute for small data)...
clusteringLabels.shape : (6046,)
len(labelsOfData) : 6046  and len(labelsOfClustering) :  6046
Accuracy of classicifaction:  0.6366192523982799
Confusion Matrix: 
             banana  calculator  camera  cell_phone  flashlight  food_bag  \
banana          86          10      23           0          30        28   
calculator       6        1005       5          12          79       229   
camera           2          35      67           4          35       128   
cell_phone       0          51      14          63          20        37   
flashlight      17          52      41           1         288       168   
food_bag        32         189      42          38          98      2079