In [1]:
import cv2
import os
import os.path as osp
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd

from datetime import datetime

from skimage.morphology import skeletonize
from skimage.morphology import thin
from skimage import data
from skimage.util import invert
from skimage.color import rgb2gray
from skimage.feature import hog
from skimage.transform import resize


from scipy.spatial.distance import cdist


from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler #, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.svm import SVC


In [2]:
# Extract Desciptors for the list of images
def extract_sift_features(list_image):

    image_descriptors = []
    sift = cv2.SIFT_create()
    for image in list_image:
        _, descriptor = sift.detectAndCompute(image, None)
        
       
        image_descriptors.append(descriptor)

    return image_descriptors

#Create k-means bow
# We basically take descriptors for all images (all_descriptors= list of descriptor for each image)
#  and then we run k-means clustering to put them in groups
# then we return the cluster centers as bag of words dictionary

def kmean_bow(all_descriptors, num_cluster):
    bow_dict = []

    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(all_descriptors)

    bow_dict = kmeans.cluster_centers_

    #if not os.path.isfile('bow_dictionary.pkl'):
    #    pickle.dump(bow_dict, open('bow_dictionary.pkl', 'wb'))

    return bow_dict





### Now let us build a feature vector

##### a) Each image has x number of descriptors (let us call these descriptor-set)
#####  b) for each descriptor-set, find the spatial distance to each cluster's center
##### c) For every descriptor in the descriptor-set , Find the cluster it is nearest to
##### d) Now we have a vector of cluster numbers to which the image is closest to
##### e) So finally, build number of features of size = num_cluster, where value of each feature represents a cluster and how many descriptors of the image belong to that cluster
##### f) So this feature vector wil have values ranging from 0 to number of descriptors for the image 

In [3]:
#create feature vecor 
# this vector is the assignment to the  of the descriptor from each cluster
def create_feature_bow(image_descriptors, BoW, num_cluster):

    X_features = []

    for i in range(len(image_descriptors)):
        features = np.array([0] * num_cluster)

        if image_descriptors[i] is not None:
            distance = cdist(image_descriptors[i], BoW)

            argmin = np.argmin(distance, axis = 1)

            for j in argmin:
                features[j] += 1
        X_features.append(features)

    return X_features


## Extract SIFT feature from the image dataframe of yg_ar

In [4]:
df_path = '/home/ubuntu/data/yg_ar/image_hard_df.pkl'
siftdesc_df = '/home/ubuntu/data/yg_ar/image_hard_df_sift_amiya.pkl'

In [5]:
NUM_CLUSTERS = 60 

###########################################################
# get images (builds a list of images
#
###########################################################
df = pd.read_pickle(df_path)
images = []

for index, row in df.iterrows():
    #if index > 2000:
    #    break
    #print(row)
    img = np.array(row["image"])
    images.append(img)

###########################################################
# Extract SIFT descriptors
#
###########################################################
print("Now extracting descriptors BOW..")
image_desctiptors = extract_sift_features(np.array(images))

###########################################################
# Now we will use k-means for building bag of words
#
###########################################################
print("Now building descriptors BOW..")
# For this, we need to use the entire vocabulary (all the descriptors of all the images)
all_descriptors = []
for descriptor in image_desctiptors:
    if descriptor is not None:
        for des in descriptor:
            all_descriptors.append(des)


# We will create a cluster of NUM_CLUSTERS for bag of words            
BoW = kmean_bow(all_descriptors, NUM_CLUSTERS)

###########################################################
# Now utilizing the bag of words, we will create feature for each image
#
###########################################################
print("Now creating the features..")
X_features = create_feature_bow(image_desctiptors, BoW, NUM_CLUSTERS)

#Then we will save it to the df.
print("Now saving the dataframe...")
df["image"] = X_features
df.to_pickle(siftdesc_df)
print("All Done.")


Now extracting descriptors BOW..
Now building descriptors BOW..




Now creating the features..
Now saving the dataframe...
All Done.


In [6]:
del df

In [7]:
print(siftdesc_df)
df = pd.read_pickle(siftdesc_df)

/home/ubuntu/data/yg_ar/image_hard_df_sift_amiya.pkl


In [8]:

df.head()

Unnamed: 0,image,label_a,label_at,file_name
0,"[1, 2, 1, 4, 1, 2, 1, 1, 0, 0, 3, 2, 0, 0, 2, ...",camel,camel_1,camel_1_hair_0_cloth_0_pants_0_Z1031_XON17_YON...
1,"[0, 0, 1, 1, 0, 3, 2, 0, 0, 0, 2, 2, 3, 1, 2, ...",camel,camel_1,camel_1_hair_0_cloth_0_pants_0_Z1095_XOP17_YON...
2,"[0, 0, 0, 0, 0, 7, 0, 2, 1, 0, 0, 0, 0, 0, 2, ...",camel,camel_1,camel_1_hair_0_cloth_0_pants_0_Z1117_XON28_YOP...
3,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 2, ...",camel,camel_1,camel_1_hair_0_cloth_0_pants_0_Z1120_XON1_YOP0...
4,"[0, 2, 3, 1, 0, 4, 0, 0, 1, 1, 0, 0, 1, 2, 1, ...",camel,camel_1,camel_1_hair_0_cloth_0_pants_0_Z1144_XOP5_YOP1...


In [9]:
df["image"].iloc[0]

array([1, 2, 1, 4, 1, 2, 1, 1, 0, 0, 3, 2, 0, 0, 2, 2, 1, 1, 0, 1, 0, 2,
       1, 2, 6, 2, 2, 0, 1, 0, 1, 0, 3, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 2,
       0, 2, 0, 1, 0, 1, 0, 3, 1, 2, 3, 0, 4, 3, 0, 0])

### test the newly built dataframe

In [10]:
features = []
labels = []
# get images and titles
for index, row in df.iterrows():
    #if index > 2000:
    #    break
    #print(row)
    img = np.array(row["image"])
    features.append(img)
    labels.append( row["label_a"])

data_train = np.array(features)
label = np.array(labels)

In [35]:
X_train, X_test, Y_train, Y_test = train_test_split(data_train,label, test_size = 0.2, random_state = 1)


In [36]:
svclassifier = SVC(kernel = "poly", degree = 8, C=20)
svclassifier.fit(X_train, Y_train)

Y_pred = svclassifier.predict(X_test)


In [37]:
print(confusion_matrix(Y_test,Y_pred))
print(classification_report(Y_test, Y_pred))

[[263  10  74  18  15  11   1  11  10   3]
 [ 24 182  96  24   7  14   4   9  10  35]
 [ 53  24 267   9   7  39   8   7   1   7]
 [  6   9   1 317   3   2   7   2  20  13]
 [ 47  13 149  20 142  23  10   7   4   7]
 [ 42  12 116  17   6 177   8   7   3  26]
 [  6  15  82  56  11  13 148   9  25  23]
 [ 49  13 134   6   4  13   5 143   1  16]
 [  1   2   1   9   0   3  10   0 380   2]
 [  3  16  67  23   5   4   2   4   8 229]]
                   precision    recall  f1-score   support

            camel       0.53      0.63      0.58       416
            chair       0.61      0.45      0.52       405
           childs       0.27      0.63      0.38       422
lord_of_the_dance       0.64      0.83      0.72       380
            lotus       0.71      0.34      0.46       422
      thunderbolt       0.59      0.43      0.50       414
         triangle       0.73      0.38      0.50       388
       upward_dog       0.72      0.37      0.49       384
       warrior_II       0.82      0.9

In [26]:
model_svm = SVC(C = 30, random_state = 0)
parameters = [
     {'C': [20, 25, 30, 35, 40, 45]}
 ]
grid_model = GridSearchCV(
     estimator = model_svm,
     param_grid = parameters,
     cv = 10
 )
grid_model.fit(X_train, Y_train)

#model_svm.fit(X_train, Y_train)
filename = 'svm_model.sav'
#pickle.dump(model_svm, open(filename, 'wb'))
#print("score on training set params: ", model_svm.score(X_train, Y_train))
#print("score on testing set params: ", model_svm.score(X_test, Y_test))
print("best score: ", grid_model.best_score_)
print("best_params: ", grid_model.best_params_)

best score:  0.7691874999999999
best_params:  {'C': 20}
