In [37]:
import numpy as np
import cv2
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from tqdm import tqdm

In [40]:
# takes all images and convert them to grayscale. 
# return a dictionary that holds all images category by category. 
def load_images_from_folder(folder):
    images = {}
    for filename in tqdm(os.listdir(folder)):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(path + "/" + cat,0)
            #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if img is not None:
                category.append(img)
        images[filename] = category
    return images

# images = load_images_from_folder("data")
images = load_images_from_folder('dataset/train')  # take all images category by category 
test = load_images_from_folder("dataset/test") # take test images 

100%|██████████| 7/7 [00:04<00:00,  1.65it/s]
100%|██████████| 7/7 [00:00<00:00,  7.41it/s]


In [41]:
# Creates descriptors using sift 
# Takes one parameter that is images dictionary
# Return an array whose first index holds the decriptor_list without an order
# And the second index holds the sift_vectors dictionary which holds the descriptors but this is seperated class by class
def orb_features(images):
    orb_vectors = {}
    descriptor_list = []
    orb = cv2.ORB_create()

    # Loop over classes
    for key,value in tqdm(images.items()):
        features = []
        for img in value:
            kp, des = orb.detectAndCompute(img,None)

            if des is not None:
                descriptor_list.extend(des)
                features.append(des)
        orb_vectors[key] = features
    return [descriptor_list, orb_vectors]

# descriptor list is unordered one, sift features that is seperated class by class for train data
descriptor_list, all_bovw_feature = orb_features(images) 
# Takes the sift features that is seperated class by class for test data
test_bovw_feature = orb_features(test)[1] 

100%|██████████| 7/7 [00:10<00:00,  1.48s/it]
100%|██████████| 7/7 [00:02<00:00,  3.02it/s]


In [42]:

# A k-means clustering algorithm who takes 2 parameter which is number 
# of cluster(k) and the other is descriptors list(unordered 1d array)
# Returns an array that holds central points.
def kmeans(k, descriptor_list):
    # kmeans = KMeans(n_clusters = k, n_init=10, max_iter=50)
    kmeans = MiniBatchKMeans(n_clusters=k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words
    
# Takes the central points which is visual words    
descriptor_list = np.stack(descriptor_list, dtype=np.float32)
visual_words = kmeans(150, descriptor_list) 
print(visual_words.shape)
print(visual_words[0])

(150, 32)
[134.47598  110.96585   86.05241  173.00012   67.23574  172.29462
  70.49426   89.048256 156.54669   85.549286 180.01643  175.21315
 125.39979  178.02411   96.57973  164.29842  185.2338   159.26035
 217.13612  144.36104   79.92036   67.27581  143.49489  186.40923
 180.09035  199.6814    90.15073  174.53725   68.434395  66.42967
  63.082005  84.7094  ]


In [43]:
def closest_centroid(x, centroids):
    """Finds and returns the index of the closest centroid for a given vector x"""
    distances = np.empty(len(centroids))
    for i in range(len(centroids)):
        distances[i] = distance.euclidean(centroids[i], x)
    return np.argmin(distances) # return the index of the lowest distance

# Takes 2 parameters. The first one is a dictionary that holds the descriptors that are separated class by class 
# And the second parameter is an array that holds the central points (visual words) of the k means clustering
# Returns a dictionary that holds the histograms for each images that are separated class by class. 
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in tqdm(all_bovw.items()):
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                # ind = np.where(centers == each_feature)[0] # 
                ind = closest_centroid(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
        print(f"{key}: {len(category)}")
    return dict_feature

# Creates histograms for train data    
print("Classifying training data")
bovw_train = image_class(all_bovw_feature, visual_words) 
# Creates histograms for test data
print("Classifying testing data")
bovw_test = image_class(test_bovw_feature, visual_words) 

## TODO: Should probably Pickle this

Classifying training data


 14%|█▍        | 1/7 [01:15<07:35, 75.91s/it]

green: 133


 29%|██▊       | 2/7 [01:45<04:02, 48.44s/it]

house_indoor: 42


 43%|████▎     | 3/7 [02:56<03:55, 58.90s/it]

sea: 141


 57%|█████▋    | 4/7 [03:46<02:46, 55.41s/it]

house_building: 70


 71%|███████▏  | 5/7 [04:56<02:01, 60.68s/it]

city: 140


 86%|████████▌ | 6/7 [06:04<01:03, 63.22s/it]

face: 140


100%|██████████| 7/7 [07:13<00:00, 61.86s/it]


office: 140
Classifying testing data


 14%|█▍        | 1/7 [00:13<01:22, 13.82s/it]

green: 30


 29%|██▊       | 2/7 [00:28<01:11, 14.32s/it]

house_indoor: 30


 43%|████▎     | 3/7 [00:42<00:55, 13.98s/it]

sea: 30


 57%|█████▋    | 4/7 [00:55<00:40, 13.61s/it]

house_building: 30


 71%|███████▏  | 5/7 [01:09<00:27, 13.95s/it]

city: 30


 86%|████████▌ | 6/7 [01:23<00:13, 14.00s/it]

face: 30


100%|██████████| 7/7 [01:38<00:00, 14.11s/it]

office: 30





In [46]:
# 1-NN algorithm. We use this for predict the class of test images.
# Takes 2 parameters. images is the feature vectors of train images and tests is the feature vectors of test images
# Returns an array that holds number of test images, number of correctly predicted images and records of class based images respectively
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}
    
    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0] # [correct, all]
        
        for tst in test_val:
            predict_start = 0
            minimum = 0
            key = "a" #predicted

            # Find the closest match
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        #minimum = L1_dist(tst,train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        #dist = L1_dist(tst,train)
                        if(dist < minimum):
                            minimum = dist
                            key = train_key
            
            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1

    return [num_test, correct_predict, class_based]
    
# Call the knn function    
test, correct, classes = knn(bovw_train, bovw_test) 
print(classes)

Accuracy: 47.14285714285714%
{'green': [13, 30], 'house_indoor': [2, 30], 'sea': [15, 30], 'house_building': [6, 30], 'city': [18, 30], 'face': [26, 30], 'office': [19, 30]}


In [47]:
# Calculates the average accuracy and class based accuracies.  
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("\nClass based accuracies: \n")
    for key,value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(key + " : %" + str(acc))
        
# Calculates the accuracies and write the results to the console.       
accuracy((test, correct, classes)) 

Average accuracy: %47.14285714285714

Class based accuracies: 

green : %43.333333333333336
house_indoor : %6.666666666666667
sea : %50.0
house_building : %20.0
city : %60.0
face : %86.66666666666667
office : %63.33333333333333
