In [1]:
!pip3 install sklearn



In [2]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import math
from sklearn.cluster import KMeans

In [3]:
def fix_image_to_show(img):
    image = img.copy()
    image = image.astype('float32')
    max_pixel = np.max(image)
    min_pixel = np.min(image)
    if max_pixel == min_pixel:
        image[:, :, :] = 127
        image = image.astype('uint8')
        return image
    m = 255 / (max_pixel - min_pixel)
    image = image * m - min_pixel * m
    image = image.astype('uint8')
    return image

def show_image(img, name=None, cmap=None):
    image = img.copy()
    image = fix_image_to_show(image)
    plt.imshow(image, vmin=0, vmax=255, cmap=cmap)
    plt.show()
    if name is not None :
        plt.imsave(name + '.jpg', image, cmap=cmap)

In [4]:
import os 
directory = 'resources/Data/Train'
files = os.listdir(directory)
images = []
class_list = []
for file in files:
    path = directory + '/' + file 
    image_names = os.listdir(path)
    for image_name in image_names: 
        image = cv2.imread(path + '/' + image_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        images.append(image)
        class_list.append(file)

In [None]:
# sift = cv2.SIFT_create()
# kp1, desc1 = sift.detectAndCompute(image1, None)

# extract feature vectors from all images 

min_kp = 100000

feature_vectors = []

sift = cv2.SIFT_create()

deb = False

n = 2 # 2n + 1

for image in images: 
    kp, desc = sift.detectAndCompute(image, None)
    for i in range(min(len(kp), min_kp)):
        feature_vectors.append(desc[i])
print(len(feature_vectors))

In [None]:
kmeans = KMeans(n_clusters=100, max_iter=300)
feature_vectors = np.asarray(feature_vectors, dtype='uint8')
kmeans.fit(feature_vectors)
kmeans.labels_

In [None]:
kmeans.cluster_centers_

In [None]:
def L1_distance(image1, image2): 
    abs_image = np.abs(image1 - image2)
    return np.sum(abs_image)

def L2_distance(image1, image2):
    dif_image = np.power(image1 - image2, 2)
    return np.sum(dif_image)



centers = kmeans.cluster_centers_


sift = cv2.SIFT_create()

histograms = []

for image in images: 
    kp, desc = sift.detectAndCompute(image, None)
    hist = np.zeros((len(centers)), dtype='uint32')
#     for i in range(len(kp)): 
#         # now we have vector 
#         cen = list(range(len(centers)))
# #         mn = cen.sort(key=lambda x: L2_distance(centers[x], desc[i]))
#         mn = min(cen, key=lambda x: L1_distance(centers[x], desc[i]))
#         hist[mn] = hist[mn] + 1
    for i in range(min(min_kp, len(kp))):
        cen = list(range(len(centers)))
        mn = min(cen, key=lambda x: L1_distance(centers[x], desc[i]))
        hist[mn] = hist[mn] + 1
        
    hist = hist.astype('float32')
    hist /= np.sum(hist)
    histograms.append(hist)


In [None]:
hist_c = []
for hist in histograms:
    hist = hist.astype('float32')
    hist /= np.sum(hist)
    hist_c.append(hist)
histograms = hist_c.copy()


In [None]:
import os 
directory = 'resources/Data/Test'
files = os.listdir(directory)
test_images = []
test_class_list = []
for file in files:
    path = directory + '/' + file 
    image_names = os.listdir(path)
    for image_name in image_names: 
        image = cv2.imread(path + '/' + image_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        test_images.append(image)
        test_class_list.append(file)
        
        

In [None]:
def predict_class_bow(image, k):
    kp, desc = sift.detectAndCompute(image, None)
    hist = np.zeros((len(centers)), dtype='uint64')
    for i in range(len(kp)):
        
        # now we have vector 
        cen = list(range(len(centers)))
#         cen.sort(key=lambda x: L2_distance(centers[x], desc[i])
        mn = min(cen, key=lambda x: L1_distance(centers[x], desc[i]))
        hist[mn] = hist[mn] + 1
        
    hist = hist.astype('float32')
    hist /= np.sum(hist)
    # find nearest to histogram
    h_ind = list(range(len(histograms)))
    h_ind.sort(key=lambda x: L1_distance(histograms[x], hist))
    bow_class_list = list(map(lambda x: class_list[x], h_ind))
    bow_class_list = bow_class_list[0: k]
    bow_class_list_c = bow_class_list.copy()
    bow_class_list.sort(key=lambda x: -bow_class_list_c.count(x))
    return bow_class_list[0]

In [None]:
correct = 0
for i in range(len(test_images)):
    image = test_images[i]
    image_class = test_class_list[i]
    if predict_class_bow(image, 30) == image_class:
        correct = correct + 1
print(correct)

In [None]:
print((correct / len(test_images)) * 100)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

X = histograms.copy()
Y = class_list


from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X, Y)

In [None]:
def predict_class_svm(image): 
    global clf 
    kp, desc = sift.detectAndCompute(image, None)
    hist = np.zeros((len(centers)), dtype='uint32')
    for i in range(min(min_kp, len(kp))):
        
        # now we have vector 
        cen = list(range(len(centers)))
#         cen.sort(key=lambda x: L2_distance(centers[x], desc[i])
        mn = min(cen, key=lambda x: L1_distance(centers[x], desc[i]))
        hist[mn] = hist[mn] + 1
    hist = hist.astype('float32')
    hist /= np.sum(hist)
    return clf.predict(hist.reshape(1, -1))

In [None]:
# show_image(test_images[0])
# print(predict_class_svm(test_images[0])[0])
correct = 0
for i in range(len(test_images)):
    image = test_images[i]
    image_class = test_class_list[i]
    predict = predict_class_svm(image)[0]
    if predict == image_class:
        correct = correct + 1
print(correct)

In [None]:
print(correct / len(test_images) * 100)