In [82]:
import numpy as np
import cv2
import os
from sklearn.cluster import KMeans
from sklearn.naive_bayes import MultinomialNB
from scipy.cluster.vq import vq,kmeans
from sklearn.preprocessing import StandardScaler


In [6]:
#Step 1
#Choose three different categories of objects from the Caltech 101 dataset, as diverse from each other as possible.

In [9]:
def read_dataset_metadata(dir_path):
    dataset = {}
    for filename in os.listdir(dir_path):
        if os.path.isdir(dir_path + filename):
            dataset[filename] = []
            for img in os.listdir(os.path.join(dir_path + filename)):
                dataset[filename].append(os.path.join(dir_path,filename,img))
    return dataset

def get_frequent_object(dataset,count=3):
    """
    dataset : Dictionary object returned by the function 'read_dataset'
    
    Returns a list of objects with their frequency in descending order
    """
    key_count = {}
    for key in dataset.keys():
        key_count[key] = len(dataset[key])
    
    
    object_keys = []
    for key, value in sorted(key_count.iteritems(), key=lambda (k,v): (v,k), reverse=True):
        object_keys.append(key)
        count-=1
        if count == 0:
            break
    return object_keys
    

In [10]:
dataset_metadata = read_dataset_metadata('./101_ObjectCategories/')

In [11]:
frequent_objects = get_frequent_object(dataset_metadata,5)
print frequent_objects

['airplanes', 'Motorbikes', 'BACKGROUND_Google', 'Faces_easy', 'Faces']


In [12]:
#The output of the above cell shows the 5 most frequent objects
#In the interest of choosing three different categories as diverse from each other as possible
#We choosse 'airplanes', 'Motorbikes' and 'Faces_easy' classes

In [13]:
categories = ['airplanes', 'Motorbikes','Faces_easy']

new_dataset_metadata = {}
for category in categories:
    new_dataset_metadata[category] = dataset_metadata[category]

In [61]:
def make_dataset(metadata):
    try:
        dataset = []
        for key in metadata.keys():
            for img_path in metadata[key]:
                img = cv2.imread(img_path,0)
                dataset.append((img,key))
        dataset = np.array(dataset)
        np.save('dataset',dataset)
        return 'dataset.npy'
    except:
        raise "Error in creating Dataset File"

In [62]:
dataset_path = make_dataset(new_dataset_metadata)

In [63]:
dataset = np.load(dataset_path)

In [7]:
"""
Step 2
Extract some local features (SIFT/SURF), cluster them using k-Means algorithm, and
create a bag-of-words representation for images. This bag-of-word representation is to be
used as image feature in the subsequent steps.
"""

'\nStep 2\nExtract some local features (SIFT/SURF), cluster them using k-Means algorithm, and\ncreate a bag-of-words representation for images. This bag-of-word representation is to be\nused as image feature in the subsequent steps.\n'

In [97]:
sift = cv2.xfeatures2d.SIFT_create()

kp, des = sift.detectAndCompute(dataset[0][0],None)


TypeError: Required argument 'mask' (pos 2) not found

In [76]:
des_list = []
for img_data in dataset:
    im = img_data[0]
    kpts = sift.detect(im)
    kpts, des = sift.compute(im, kpts)
    des_list.append((im, des))

In [77]:
descriptors = des_list[0][1]
for img, descriptor in des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))  


In [98]:
descriptors.shape

(619879, 128)

In [83]:
k = 20
voc, variance = kmeans(descriptors, k, 1) 

In [91]:
im_features = np.zeros((len(dataset), k), "float32")
for i in xrange(len(dataset)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
        im_features[i][w] += 1

In [95]:
stdSlr = StandardScaler().fit(im_features)
im_features = stdSlr.transform(im_features)

In [96]:
im_features

array([[ 0.91382909, -1.30561399, -0.94648373, ..., -1.41317773,
         0.28676912, -1.23465788],
       [-0.71712714, -0.90231484, -0.88215715, ..., -0.83762997,
        -0.93375099, -0.93811244],
       [-0.71712714, -1.10396445, -1.0751369 , ..., -0.40596923,
         1.1004492 , -0.93811244],
       ..., 
       [ 0.83616447,  0.91253138,  0.08274174, ...,  1.75233459,
         0.61224115,  0.24806936],
       [ 0.91382909,  1.71912968,  1.75523317, ...,  2.18399525,
         3.05328131,  1.92849362],
       [ 1.53514576,  0.8117066 ,  0.98331404, ...,  0.02569152,
         1.26318514,  2.12619042]], dtype=float32)

In [108]:
def make_descriptor_data(dataset):
    try:
        descriptor_list = []
        sift = cv2.xfeatures2d.SIFT_create()
        for img_data in dataset:
            img = img_data[0]
            label = img_data[1]
            #Computing descriptors for every image
            kpts = sift.detect(im)
            kpts, des = sift.compute(im, kpts)
            descriptor_list.append((img, des, label))
            
        descriptor_list = np.array(descriptor_list)
        np.save('descriptor_data',descriptor_list)
        return True
    except:
        raise "Error in creating Bag of Words File"

In [109]:
make_descriptor_data(dataset)

True

In [110]:
descriptor_data = np.load('descriptor_data.npy')

In [114]:
descriptors = descriptor_data[0][1]
for img, descriptor,label in descriptor_data[1:]:
    descriptors = np.vstack((descriptors, descriptor))  


In [117]:
#K-Means clustering of the descriptors
k = 20
voc, variance = kmeans(descriptors, k, 1) 

In [128]:
#Crearing Histogram of the Bag of Words
im_features = np.zeros((len(dataset), k), "float32")
class_labels =[]
for i in xrange(len(dataset)):
    words, distance = vq(descriptor_data[i][1],voc)
    class_labels.append(descriptor_data[i][2])
    for w in words:
        im_features[i][w] += 1
class_labels = np.array(class_labels)

In [136]:
feature_dataset = []
for im_feature,label in zip(im_features,class_labels):
    feature_dataset.append((im_feature,label))

feature_dataset=np.array(feature_dataset)

In [138]:
feature_dataset = np.load('bag_of_words_features.npy')

In [8]:
"""
Step 3:
Choose approximately half of the images from each category, as training data. Save the
rest as test data.
"""

'\nStep 3:\nChoose approximately half of the images from each category, as training data. Save the\nrest as test data.\n'

In [14]:
category_map = {'airplanes':1, 'Motorbikes':2,'Faces_easy':3}

In [140]:
#Splitting bow_data according to categories
category_bow_data = {}
for data in feature_dataset:
    if category_bow_data.has_key(data[1]):
        category_bow_data[data[1]].append(data[0])
    else:
        category_bow_data[data[1]]=[]    

In [141]:
#Splitting data into training and testing data
x_train,y_train,x_test,y_test = [[],[],[],[]]
category_count = {'airplanes':len(category_bow_data['airplanes']), 'Motorbikes':len(category_bow_data['Motorbikes']),
                  'Faces_easy':len(category_bow_data['Faces_easy'])}
for category in category_map:
    x_train.extend(category_bow_data[category][0:category_count[category]/2+1])
    y_train.extend([category_map[category] for _ in range(len(category_bow_data[category][0:category_count[category]/2+1]))])
    
    x_test.extend(category_bow_data[category][category_count[category]/2+1:])
    y_test.extend([category_map[category] for _ in range(len(category_bow_data[category][category_count[category]/2+1:]))])
    
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [155]:
print x_train.shape,y_train.shape

(1017, 20) (1017,)


In [53]:
"""
Step 4:
Train Normal Bayesian Classifier of OpenCV to distinguish between the three object
categories using the chosen training data.
"""

'\nStep 4:\nTrain Normal Bayesian Classifier of OpenCV to distinguish between the three object\ncategories using the chosen training data.\n'

In [143]:
classifier = MultinomialNB()

In [144]:
classifier.fit(x_train,y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [145]:
classifier.score(x_test,y_test)

0.39387956564659427

In [158]:
help(vq)

Help on function vq in module scipy.cluster.vq:

vq(obs, code_book, check_finite=True)
    Assign codes from a code book to observations.
    
    Assigns a code from a code book to each observation. Each
    observation vector in the 'M' by 'N' `obs` array is compared with the
    centroids in the code book and assigned the code of the closest
    centroid.
    
    The features in `obs` should have unit variance, which can be
    achieved by passing them through the whiten function.  The code
    book can be created with the k-means algorithm or a different
    encoding algorithm.
    
    Parameters
    ----------
    obs : ndarray
        Each row of the 'M' x 'N' array is an observation.  The columns are
        the "features" seen during each observation. The features must be
        whitened first using the whiten function or something equivalent.
    code_book : ndarray
        The code book is usually generated using the k-means algorithm.
        Each row of the array holds a