In [1]:
import os
import cv2
import time
import sklearn
from sklearn.cluster import KMeans
from scipy.spatial import distance
import numpy as np
import pickle
from PIL import Image
## extract the descriptors from all images. ##
sift = cv2.xfeatures2d.SIFT_create()

In [2]:
import json
f = open('mylist1.txt','r')
names = json.loads(f.read())
f.close()
f = open('mylist2.txt','r')
labels = json.loads(f.read())
f.close()
f = open('mylist3.txt','r')
categories = json.loads(f.read())
f.close()

In [3]:
def bag_of_words(centroids, img_descriptors):
    n_centroids = centroids.shape[0]  # number of centroids found with the KMeans clustering #100
    n_descriptors = img_descriptors.shape[0]  # number of descriptors extracted from the image #200
    
    # initialization of the bag of words (BoW) vector
    # Note that the BoW vector has length equal to the number of cluster centroids
    # The cluster centroids are indeed our visual words, and the BoW will be the histogram of these words found in the given image
    bow_vector = np.zeros(n_centroids)  
    
    for i in range(n_descriptors):
        for j in range(n_centroids):
            if img_descriptors[i][j]==True: #if the feature is in the image (true in img_descriptor)
                bow_vector[j]+=1            #bow_vector.shape => 100 (for each image)
    return bow_vector

In [None]:
# Initialize the data structure that will contain all the descriptors
descriptors = None
# Loop over map images
for img_name in names:

    img = cv2.imread(os.path.join('../images/', img_name))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    (kps, descriptors_img) = sift.detectAndCompute(gray, None)

    if descriptors is None:
        descriptors = descriptors_img
    else:
        descriptors = np.vstack( (descriptors, descriptors_img))
               
print(descriptors.shape)

## CLUSTERING ##

K = 100  # number of clusters (equivalent to the number of words) we want to estimate
kmeans = KMeans(n_clusters=K, random_state=0, n_init=4)
clusters = kmeans.fit(descriptors)  # we use the descriptors extracted from the map (training) images before
centroids = clusters.cluster_centers_
with open("kmeanmodel.pkl","wb") as f:
    pickle.dump(kmeans,f)
print("Shape of the centroids matrix: ", centroids.shape)
print("We computed ", centroids.shape[0], "centroids of lengh ", centroids.shape[1], " (the same of the descriptor)")
# Rememeber: the centroids can be considered as the words that compose our documents 
# -> in this case the basic components of the images

## BAG OF WORDS REPRESENTATION OF MAP IMAGES ## 

bow_map_images = None
# loop over the images in the map set
for img_name in names:
    # load image
    img = cv2.imread(os.path.join('../images/', img_name))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    (kps, img_descriptors) = sift.detectAndCompute(gray, None)

    # compute BoW representation of the image (using the basic 'words', i.e. centroinds, computed earlier)
    bow = bag_of_words(centroids, img_descriptors)
    # add the computed BoW vector to the set of map representations
    if bow_map_images is None:
        bow_map_images = bow
    else:
        bow_map_images = np.vstack( (bow_map_images, bow))
        
np.save("centroids.npy", centroids)
np.save("bow_map_images.npy", bow_map_images)        

(174716, 128)
