In [2]:
import os
import numpy as np
import shutil
import random

def create_main_dir(file_path):
    script_dir = os.path.dirname(os.path.abspath("__file__"))
    dest_dir = os.path.join(script_dir, file_path)
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

def create_sub_dir(file_path,parent_dir):
    script_dir = os.path.dirname(os.path.abspath("_file_"))
    dest_dir = os.path.join(script_dir, parent_dir, file_path)
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)


def process_data():
    directory='../btp/dataset/original'
    create_main_dir(directory)
    classes = ['Aboard', 'All_Gone','Baby','Beside','Book','Bowl','Bridge','Camp','Cartridge','Eight','Five','Fond','Four','Friend','Glove','Hang','High','House','How_Many','IorMe','Man','Marry','Meat','Medal','Mid_Day','Middle','Money','Moon','Mother','Nine','One','Opposite','Prisoner','Ring','Rose','See','Seven','Short','Six','Superior','Ten','Thick','Thin','Three','Tobacco','Two','Up','Watch','Write','You']
    for cls in classes:
        create_sub_dir(cls,directory)


process_data()

In [3]:

root_dir = '../btp/dataset/' # data root path
classes_dir = ['Aboard', 'All_Gone','Baby','Beside','Book','Bowl','Bridge','Camp','Cartridge','Eight','Five','Fond','Four','Friend','Glove','Hang','High','House','How_Many','IorMe','Man','Marry','Meat','Medal','Mid_Day','Middle','Money','Moon','Mother','Nine','One','Opposite','Prisoner','Ring','Rose','See','Seven','Short','Six','Superior','Ten','Thick','Thin','Three','Tobacco','Two','Up','Watch','Write','You']  #total labels

test_ratio = 0.30

for cls in classes_dir:
    if not os.path.exists(root_dir +'train/' + cls):
        os.makedirs(root_dir +'train/' + cls)
    if not os.path.exists(root_dir +'test/' + cls):
        os.makedirs(root_dir +'test/' + cls)


# Creating partitions of the data after shuffling

    src = root_dir + 'original/'+cls # Folder to copy images from

    allFileNames = os.listdir(src)
    np.random.shuffle(allFileNames) 
    train_FileNames,test_FileNames = np.split(np.array(allFileNames),
                                                          [int(len(allFileNames)* (1 - (test_ratio))), 
                                                          ])


    train_FileNames = [src+'/'+ name for name in train_FileNames.tolist()]
    test_FileNames = [src+'/' + name for name in test_FileNames.tolist()]

# print('Total images: ', len(allFileNames))
# print('Training: ', len(train_FileNames))
# print('Testing: ', len(test_FileNames))

# Copy-pasting image

    for name in train_FileNames:
#         print(name)
#         print(cls)
        shutil.copy(name, root_dir +'train/' + cls)

    for name in test_FileNames:
        shutil.copy(name, root_dir +'test/' + cls)

In [4]:
pip install opencv-python==4.5.1.48 opencv-contrib-python==4.5.1.48

Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
import cv2

def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(path + "/" + cat,0)  #grayscale img loading 
            if img is not None:
                category.append(img)
        images[filename] = category
    return images



In [6]:
train = load_images_from_folder('dataset/train')  
test = load_images_from_folder("dataset/test") 

In [7]:
from scipy import ndimage
from scipy.spatial import distance
import numpy as np

def sift_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.xfeatures2d.SIFT_create()
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
#             print(des)
            if des is not None:
                descriptor_list.extend(des)
                features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(train) 
# Takes the descriptor list which is unordered one
descriptor_list = sifts[0] 
# Takes the sift features that is seperated class by class for train data
all_bovw_feature = sifts[1] 
# Takes the sift features that is seperated class by class for test data
test_bovw_feature = sift_features(test)[1] 

In [8]:
from sklearn.cluster import KMeans

def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words
    
# Takes the central points which is visual words    
visual_words = kmeans(150, descriptor_list) 

In [9]:
def find_index(image, center):
    count = 0
    ind = 0
    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i]) 
           #count = L1_dist(image, center[i])
        else:
            dist = distance.euclidean(image, center[i]) 
            #dist = L1_dist(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

In [10]:
import numpy as np
 
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature
    
# Creates histograms for train data    
bovw_train = image_class(all_bovw_feature, visual_words) 
# Creates histograms for test data
bovw_test = image_class(test_bovw_feature, visual_words) 

In [11]:
type(bovw_train)

dict

In [12]:
print(len(bovw_train))

50


In [13]:
print(bovw_train['One'])  #dict of labels vs array of floats

[array([0., 0., 0., 0., 0., 0., 0., 1., 0., 2., 0., 1., 1., 0., 0., 0., 0.,
       1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 2., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 2., 1., 1., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 1., 0., 0.,
       2., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 5., 0., 0., 0., 0., 2., 0., 0., 1., 0., 0.]), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.,

In [14]:
import matplotlib.pyplot as plt
from numpy import array

data = list(bovw_train.values())
a = np.array(data)
print(a)
print(type(a))
a=a.flatten()
plt.hist(a.astype('float'))
plt.show()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



ValueError: setting an array element with a sequence.

In [15]:
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}
    
    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0] # [correct, all]
        for tst in test_val:
            predict_start = 0
            #print(test_key)
            minimum = 0
            key = "a" #predicted
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        #minimum = L1_dist(tst,train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        #dist = L1_dist(tst,train)
                        if(dist < minimum):
                            minimum = dist
                            key = train_key
            
            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
            #print(minimum)
    return [num_test, correct_predict, class_based]
    
# Call the knn function    
results_bowl = knn(bovw_train, bovw_test) 

In [16]:
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("\nClass based accuracies: \n")
    for key,value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(key + " : %" + str(acc))
        
# Calculates the accuracies and write the results to the console.       
accuracy(results_bowl) 

Average accuracy: %95.52814186584425

Class based accuracies: 

Aboard : %92.3076923076923
All_Gone : %92.15686274509804
Baby : %90.56603773584906
Beside : %92.15686274509804
Book : %92.5925925925926
Bowl : %90.38461538461539
Bridge : %100.0
Camp : %98.14814814814815
Cartridge : %100.0
Eight : %100.0
Five : %95.83333333333334
Fond : %100.0
Four : %96.0
Friend : %90.56603773584906
Glove : %100.0
Hang : %100.0
High : %95.83333333333334
House : %92.0
How_Many : %98.21428571428571
IorMe : %92.3076923076923
Man : %100.0
Marry : %94.33962264150944
Meat : %100.0
Medal : %94.0
Middle : %98.11320754716981
Mid_Day : %96.0
Money : %84.61538461538461
Moon : %96.22641509433963
Mother : %100.0
Nine : %96.15384615384616
One : %97.95918367346938
Opposite : %94.23076923076923
Prisoner : %80.0
Ring : %92.0
Rose : %96.07843137254902
See : %96.07843137254902
Seven : %98.18181818181819
Short : %87.75510204081633
Six : %97.91666666666666
Superior : %96.0
Ten : %94.0
Thick : %96.0
Thin : %93.87755102040816
T