In [1]:
from pandas import *
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import MiniBatchKMeans
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report ,confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [2]:
def load_data(path):
    classes = os.listdir(path+"/train")[:-1]
    train = []
    test = []
    test_names = []
    for label in classes:
        train.append([])
        for img_name in os.listdir(path + "/train/" + label):
            im = cv2.imread(path + "/train/" + label + "/" + img_name,0)
            if im is not None:
                im = cv2.resize(im, (150, 150))
                train[classes.index(label)].append(im)
        test.append([])
        test_names.append([])
        test_img_path = path + "/test/" + label
        for img_name in os.listdir(test_img_path):
            img = cv2.imread(test_img_path + "/" + img_name,0)
            if img is not None:
                img = cv2.resize(img, (150, 150))
                test[classes.index(label)].append(img)
                test_names[classes.index(label)].append(img_name)
    return classes,train,test,test_names
# classes,train_imgs,test_imgs,test_names = load_data("../dataset/SUN_data/SUN_data")

In [3]:
def sftfeat(imgs):
    sift_vectors = []
    descriptor_list = []
    sift = cv2.xfeatures2d.SIFT_create()
    for i in range(len(imgs)):
        features = []
        for img in imgs[i]:
            kp, des = sift.detectAndCompute(img,None)           
            descriptor_list.extend(des)
            features.append(des)
        sift_vectors.append(features)
    return [descriptor_list, sift_vectors]

In [4]:
def findex(image, center):
    c_dist = distance.euclidean(image,center[0])
    ret = 0
    for i in range(len(center[1:])):
        d = distance.euclidean(image,center[i])
        if(d<c_dist):
            c_dist=d
            ret = i+1
    return ret

In [5]:
def iclass(bovw, centers,norm_flag):
    feature = []
    for i in range(len(bovw)):
        category = []
        for img in bovw[i]:
            hist = np.zeros(len(centers))
            for j in range(len(img)):
                index = findex(img[j], centers)
                hist[index] += 1
            if(norm_flag==1):
                hist = hist/np.sum(hist)
            category.append(hist)
        feature.append(category)
    return feature

In [6]:
def reorganise_SVM(bovw_train,bovw_test,classes,test_names):
    train_labels = []
    X_train = []
    X_test=  []
    X_test_names=  []
    test_labels = []
    for i in range(len(bovw_train)):
        for j in range(len(bovw_train[i])):
            X_train.append(bovw_train[i][j])
            train_labels.append(classes[i])
        for j in range(len(bovw_test[i])):
            X_test.append(bovw_test[i][j])
            X_test_names.append(test_names[i][j])
            test_labels.append(classes[i])
    return X_train,train_labels,X_test,test_labels,X_test_names

In [7]:
def SVM_fit(X_train,train_labels,X_test,test_labels):
    model = OneVsRestClassifier(SVC(kernel='linear',C=0.03))
    model.fit(X_train, train_labels)
    prediction = model.predict(X_test)
    print(f"Test Set Accuracy : {accuracy_score(test_labels, prediction) * 100} %\n")
    print(f"Classification Report : \n{classification_report(test_labels, prediction)}")
    return prediction

In [8]:
def confs_matrix(classes,prediction,test_labels):
    conf_matrix = []
    conf_names = []
    for i in range(len(classes)):
        conf_matrix.append([0 for j in range(len(classes))])
        conf_names.append([[] for j in range(len(classes))])
    for i in range(len(prediction)):
        conf_matrix[classes.index(prediction[i])][classes.index(test_labels[i])]+=1
        conf_names[classes.index(prediction[i])][classes.index(test_labels[i])].append(X_test_names[i])
    return conf_matrix,conf_names

In [9]:
classes,train_imgs,test_imgs,test_names = load_data("../dataset/SUN_data/SUN_data")

In [10]:
sifts = sftfeat(train_imgs) 
descriptor_list = sifts[0] 
all_bovw_feature = sifts[1] 
test_bovw_feature = sftfeat(test_imgs)[1] 

In [11]:
We kmeans = MiniBatchKMeans(n_clusters = 8, n_init = 10)
kmeans.fit(descriptor_list)
visual_words = kmeans.cluster_centers_ 

In [12]:
bovw_train = iclass(all_bovw_feature, visual_words,0) 
bovw_test = iclass(test_bovw_feature, visual_words,0) 
bovw_train_norm = iclass(all_bovw_feature, visual_words,1) 
bovw_test_norm = iclass(test_bovw_feature, visual_words,1) 

In [13]:
X_train,train_labels,X_test,test_labels,X_test_names=reorganise_SVM(bovw_train,bovw_test,classes,test_names)
X_train_norm,train_labels_norm,X_test_norm,test_labels_norm,X_test_names_norm=reorganise_SVM(bovw_train_norm,bovw_test_norm,classes,test_names)

In [14]:
print("Classification without Normalisation\n----------------------------------------")
predictions = SVM_fit(X_train,train_labels,X_test,test_labels)


Classification without Normalisation
----------------------------------------
Test Set Accuracy : 35.625 %

Classification Report : 
              precision    recall  f1-score   support

    aquarium       0.33      0.20      0.25        20
      desert       0.52      0.80      0.63        20
     highway       0.33      0.05      0.09        20
     kitchen       0.33      0.15      0.21        20
  laundromat       0.31      0.45      0.37        20
        park       0.31      0.55      0.40        20
   waterfall       0.38      0.40      0.39        20
    windmill       0.25      0.25      0.25        20

    accuracy                           0.36       160
   macro avg       0.35      0.36      0.32       160
weighted avg       0.35      0.36      0.32       160



In [15]:
print("Classification with Normalisation\n----------------------------------------")
predictions_norm = SVM_fit(X_train_norm,train_labels_norm,X_test_norm,test_labels_norm)


Classification with Normalisation
----------------------------------------
Test Set Accuracy : 22.5 %

Classification Report : 
              precision    recall  f1-score   support

    aquarium       0.00      0.00      0.00        20
      desert       0.35      0.60      0.44        20
     highway       0.33      0.05      0.09        20
     kitchen       0.11      0.05      0.07        20
  laundromat       0.20      0.60      0.30        20
        park       0.24      0.25      0.24        20
   waterfall       0.00      0.00      0.00        20
    windmill       0.19      0.25      0.21        20

    accuracy                           0.23       160
   macro avg       0.18      0.23      0.17       160
weighted avg       0.18      0.23      0.17       160



In [16]:
conf_matrix,conf_names = confs_matrix(classes,predictions,test_labels)
conf_matrix_norm,conf_names_norm = confs_matrix(classes,predictions_norm,test_labels_norm)

In [17]:
for i in range(len(classes)):
    print(i,"->",classes[i])
print("\n")
print("Confusion Matrix with Normalisation\n")
print(DataFrame(conf_matrix))
print("\nConfusion Matrix without Normalisation\n")
print(DataFrame(conf_matrix_norm))

0 -> highway
1 -> waterfall
2 -> park
3 -> windmill
4 -> aquarium
5 -> kitchen
6 -> desert
7 -> laundromat


Confusion Matrix without Normalisation

   0   1   2  3  4  5   6  7
0  1   0   0  0  1  0   0  1
1  1   8   3  2  4  2   0  1
2  2  10  11  4  5  2   0  1
3  5   0   1  5  3  3   1  2
4  2   2   0  2  4  0   1  1
5  2   0   0  1  0  3   0  3
6  6   0   1  3  2  1  16  2
7  1   0   4  3  1  9   2  9

Confusion Matrix without Normalisation

    0  1  2  3   4   5   6   7
0   1  0  0  1   0   0   1   0
1   0  0  1  0   1   1   0   0
2   2  3  5  5   3   1   1   1
3   5  2  2  5   2   5   3   3
4   0  2  0  1   0   0   0   0
5   1  3  3  0   1   1   0   0
6  10  1  1  3   3   0  12   4
7   1  9  8  5  10  12   3  12


In [18]:
# print(os.getcwd())
# os.chdir("../dataset/SUN_data/SUN_data/test/")
for i in range(len(conf_names)):
    if(len(conf_names[i][i])>0):
        print(conf_names[i][i][0] + " Image in "+classes[i]+" class is corectly classified")
        break
fl = 1
for i in range(len(conf_names)):
    for j in range(len(conf_names[i])):
        if(i!=j and len(conf_names[i][j])>0):
            print(conf_names[i][j][0] + " Image in "+classes[j]+" class is wrongly classified as "+classes[i])
            fl = 0
            break
    if(fl==0):
        break

sun_bwbhvftrpsasgenv.jpg Image in highway class is corectly classified
sun_avdirymfvklsislz.jpg Image in aquarium class is wrongly classified as highway


- Bag of Visual Words is used to classify by defining words from images using Kmeans 
- The following are the parameters to play around
    - no of clusters in k-Means
    - c in svm
- It was observed that the higher the k the better the result 
- Accuracy values are  around 20-30% for k=8 and around 45-55% at k=150
- Normalisation doesn't guarentee increase in inaccuracy