In [None]:

# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Read the images from the directory

In [21]:
#Function for getting the image and corresponding label from directory

import cv2
import os
import glob
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

def label_image(path_to_data):

	list_label = os.listdir(path_to_data) # Get the list of files in the required directory

	labelmapping = {}
	for ind, value in enumerate(list_label): # the names of the vehicles are used as keys and are assigned a number,their index position, as values.
		labelmapping[value] = ind

	x = []
	label = []

	for lab in list_label:
		for img in glob.glob(path_to_data+"/" + lab + '/*'):
			image = cv2.imread(img)
			Label = labelmapping[lab]

			x.append(image)
			label.append(Label)

	return x, label, labelmapping

Creation of Feature vector (SIFT + BOW)

In [22]:
import cv2
from sklearn.cluster import KMeans
import pickle
from scipy.spatial.distance import cdist
import os
import numpy as np

def SIFT_Feature_extraction(list_image):

    descriptors = []
    sift = cv2.xfeatures2d.SIFT_create()                      # Create an instantiation of SIFT object.
    for image in list_image:
        _, descriptor = sift.detectAndCompute(image, None)    # Use the instantiation to obtain the keypoints and descriptors.
        descriptors.append(descriptor)

    return descriptors

def Clustering(descriptors, vocab_size):                      # This function creates the groups the descriptors into k(= vocabulary size) clusters 
    bow_dict = []

    kmeans = KMeans(n_clusters = vocab_size)
    kmeans.fit(descriptors)

    bow_dict = kmeans.cluster_centers_

    if not os.path.isfile('BoW_dict.pkl'):
        pickle.dump(bow_dict, open('BoW_dict.pkl', 'wb'))     # The pickle function is used to store the k clusters(the words learnt) into a .pkl file. This is the visual dictionary

    return bow_dict

def feature_vector_creation(image_descriptors, BoW, num_cluster):

    X_features = []

    for i in range(len(image_descriptors)):
        features = np.array([0] * num_cluster)

        if image_descriptors[i] is not None:
            distance = cdist(image_descriptors[i], BoW)

            argmin = np.argmin(distance, axis = 1)

            for j in argmin:
                features[j] += 1
        X_features.append(features)

    return X_features

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
import sklearn
import pickle
import argparse
import os
from matplotlib import pyplot as plt

training_data, data_label, label2id = label_image('/content/drive/MyDrive/data/Training_data/') # Use the required path here

cv2.imwrite('test.jpg', training_data[0])
print("the id mapping of this class for this image is:",data_label[5])
print(" The make and model is:")
for u in label2id.keys():
  if label2id[u] == data_label[5]:
    print(u)


image_desctiptors = SIFT_Feature_extraction(training_data)

all_descriptors = []
# Cleans the data. There are occaionally, some unwanted None values in the descriptors. The following piece of code removes that.
for descriptor in image_desctiptors:
    if descriptor is not None:
        for des in descriptor:
            all_descriptors.append(des)

vocab_size = 20
BoW = Clustering(all_descriptors, vocab_size)

X_features = feature_vector_creation(image_desctiptors, BoW, vocab_size)

X_train, X_test, Y_train, Y_test = train_test_split(X_features, data_label, test_size = 0.3)


parameters = {'kernel':('linear','poly', 'rbf'), 'C': [1,2,4,6,8,10,12]} # Which function to fit and what value of regularization constant to fit?
param = {'linear','poly', 'rbf','sigmoid'}

ker = parameters['kernel']
Cc = parameters['C']

Test_acc_linear = []
Train_acc_linear = []

Test_acc_rbf = []
Train_acc_rbf = []

Test_acc_poly = []
Train_acc_poly = []
k = 0
for j in ker:

  for i in range(len(Cc)):


  # grid_model = GridSearchCV(model_svm,parameters)
  # grid_model.fit(X_train, Y_train)
    model_svm = sklearn.svm.SVC(C = Cc[i], kernel = j,random_state = 1)
    model_svm.fit(X_train, Y_train)
    filename = 'Support_Vector_Machine.sav'
    pickle.dump(model_svm, open(filename, 'wb'))
    print("score on training set params: ", model_svm.score(X_train, Y_train))
    print("score on testing set params: ", model_svm.score(X_test, Y_test))

    if j == 'linear':
      Train_acc_linear.append(100*float(model_svm.score(X_train, Y_train)))

      Test_acc_linear.append(100*float(model_svm.score(X_test, Y_test)))
    elif j == 'rbf':
       Train_acc_rbf.append(100*float(model_svm.score(X_train, Y_train)))

       Test_acc_rbf.append(100*float(model_svm.score(X_test, Y_test))) 
    else:
       Train_acc_poly.append(100*float(model_svm.score(X_train, Y_train)))

       Test_acc_poly.append(100*float(model_svm.score(X_test, Y_test)))    


  # print("best score: ", grid_model.best_score_)
  # print("best_params: ", grid_model.best_params_)
# print(data_train[0])

# print(label2id)

# cv2.imwrite(data_label[0],data_train[0])

plt.figure()
plt.plot(Cc,Test_acc_linear,label='Testing Accuracy')
plt.plot(Cc,Train_acc_linear,label='Training Accuracy')
plt.legend()
plt.xlabel("Regularization constant")
plt.ylabel("Accuracy (%)")
plt.title("Train and Test Accuracy on Cross Validation using linear SVM kernel")


plt.figure()
plt.plot(Cc,Test_acc_rbf,label='Testing Accuracy')
plt.plot(Cc,Train_acc_rbf,label='Training Accuracy')
plt.legend()
plt.xlabel("Regularization constant")
plt.ylabel("Accuracy (%)")
plt.title("Train and Test Accuracy on Cross Validation using rbf SVM kernel")




plt.figure()
plt.plot(Cc,Test_acc_poly,label='Testing Accuracy')
plt.plot(Cc,Train_acc_poly,label='Training Accuracy')
plt.legend()
plt.xlabel("Regularization constant")
plt.ylabel("Accuracy (%)")
plt.title("Train and Test Accuracy on Cross Validation using poly SVM kernel")



grid_model = GridSearchCV(model_svm,parameters)
grid_model.fit(X_train, Y_train)
grid_model.fit(X_test,Y_test) 
print("best choice of parameters using grid search CV: ", grid_model.best_params_)
print("best accuracy using grid search CV: ", grid_model.best_score_)

##################### Testing Data #############################

testing_data, testing_label, label2id = label_image('/content/drive/MyDrive/data/Test_data/')
image_desctiptors1 = SIFT_Feature_extraction(testing_data)

all_descriptors1 = []
for descriptor in image_desctiptors1:
    if descriptor is not None:
        for des in descriptor:
            all_descriptors1.append(des)

vocab_size = 20
BoW1 = Clustering(all_descriptors1, vocab_size)

X_features1 = feature_vector_creation(image_desctiptors1, BoW1, vocab_size)
grid_model.fit(X_features1,testing_label)


print("--------------------------------------")
print("Best Accuracy for test data")
print("best accuracy on test set using grid search: ", grid_model.best_score_)
print("best parameters on test set using grid search: ", grid_model.best_params_)
