In [1]:
%load_ext autoreload
%autoreload 2

In [51]:
import numpy as np
import cv2
from tqdm import trange
from sklearn.model_selection import train_test_split
from skimage import io, img_as_ubyte
from sklearn.cluster import KMeans

from src.data.ucmerced_dataset import UcMercedDataset
from src.settings import DATA_DIRECTORY

In [3]:
dataset = UcMercedDataset(DATA_DIRECTORY)

In [4]:
x = np.empty(shape=(dataset.__len__(), 256, 256, 3))
y = np.empty(shape=(dataset.__len__(), ), dtype=np.int)

for idx in trange(dataset.__len__()):
    item = dataset.__getitem__(idx)

    x[idx] = item['a']
    y[idx] = item['a_y']

100%|██████████| 2056/2056 [00:11<00:00, 179.97it/s]


In [40]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.8)

In [41]:
sift = cv2.SIFT_create()

In [48]:
def get_descriptors(images):
    desc = []

    for img in images:
        cv_img = img_as_ubyte(img)
        cv_img = cv2.cvtColor(cv_img, cv2.COLOR_RGB2GRAY)
        _, d = sift.detectAndCompute(cv_img, None)
        if d is not None:
            desc.append(d)

    return np.vstack(desc)

In [49]:
descriptors = get_descriptors(x_train)

In [59]:
k_means = KMeans(n_clusters=15)

k_means.fit(descriptors[:1000])

KMeans(n_clusters=15)

In [60]:
test_ex = x_test[:1]

test_ex_desc = get_descriptors(test_ex)

In [61]:
codewords = k_means.predict(test_ex_desc)

In [68]:
np.histogram(codewords, bins=15)[0] / np.shape(codewords)[0]

array([0.08471455, 0.05064457, 0.04143646, 0.0718232 , 0.04880295,
       0.08195212, 0.09760589, 0.09116022, 0.053407  , 0.04143646,
       0.06537753, 0.05985267, 0.11878453, 0.03959484, 0.053407  ])

In [65]:
np.unique(codewords, return_counts=True)[1]

array([ 92,  55,  45,  78,  53,  89, 106,  99,  58,  45,  71,  65, 129,
        43,  58])