In [1]:
# Python 2
import cv2
import numpy as np
import pandas as pd
import random
from __future__ import print_function
from os import listdir
from os.path import join
from scipy.spatial.distance import cosine
from sklearn import neighbors

In [2]:
img_dir = "data"
df = pd.DataFrame(filter(lambda x: ".JPG" in x, listdir(img_dir)), columns=["name"])
print(df.dtypes)
print(df.head())

name    object
dtype: object
           name
0  image001.JPG
1  image002.JPG
2  image003.JPG
3  image004.JPG
4  image006.JPG


In [3]:
df["class"] = np.repeat(np.linspace(0, 49, num=50), 4)
df["class"] = df["class"].astype("category")
print(df.dtypes)
print(df.head())

name       object
class    category
dtype: object
           name class
0  image001.JPG     0
1  image002.JPG     0
2  image003.JPG     0
3  image004.JPG     0
4  image006.JPG     1


In [4]:
split = ["train"] * 200
for i in range(0, 50):
    split[i * 4 + random.randint(0, 3)] = "test"
df["split"] = split
df["split"] = df["split"].astype("category")
print(df.dtypes)
print(df.head())

name       object
class    category
split    category
dtype: object
           name class  split
0  image001.JPG     0  train
1  image002.JPG     0  train
2  image003.JPG     0   test
3  image004.JPG     0  train
4  image006.JPG     1   test


In [5]:
df_train = df[df["split"] == "train"]
df_train_name = list(df_train["name"])
df_train_class = list(df_train["class"])
sift = cv2.SIFT(nfeatures=20)
train_x, train_y = list(), list()
for i in range(0, 150):
    progress = int(i / 150.0 * 1000) / 10.0
    print("{}: {}%".format(df_train_name[i], progress), end="\r")
    img = cv2.imread(join(img_dir, df_train_name[i]))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kp, des = sift.detectAndCompute(img, None)
    for d in des:
        train_x.append(tuple(d.astype(np.int32)))
        train_y.append(int(df_train_class[i]))



In [6]:
kNN = neighbors.KNeighborsClassifier(n_neighbors=5, weights="distance", metric=cosine)
# scipy.spatial.distance.cosine is much faster than sklearn.metrics.pairwise.cosine_distances
kNN.fit(train_x, train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30,
           metric=<function cosine at 0x110243140>, metric_params=None,
           n_neighbors=5, p=2, weights='distance')

In [7]:
df_test = df[df["split"] == "test"]
df_test_name = list(df_test["name"])
sift = cv2.SIFT(nfeatures=10)
predict = list()
for i in range(0, 50):
    progress = int(i / 50.0 * 1000) / 10.0
    print("{}: {}%".format(df_train_name[i], progress), end="\r")
    img = cv2.imread(join(img_dir, df_test_name[i]))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kp, des = sift.detectAndCompute(img, None)
    vote = np.zeros(50, dtype=np.uint16)
    for d in des:
        vote[kNN.predict(d)[0]] += 1
    predict.append(vote.argmax())



In [8]:
correct = 0
for i in range(0, 50):
    if predict[i] == i:
        correct += 1
print(predict)
print(correct)

[0, 1, 2, 3, 4, 26, 6, 7, 8, 9, 38, 11, 12, 13, 14, 15, 23, 17, 15, 19, 20, 17, 22, 31, 24, 45, 26, 27, 46, 29, 38, 31, 32, 13, 34, 35, 45, 38, 38, 15, 1, 41, 25, 43, 44, 45, 46, 47, 48, 49]
35
