In [89]:
import pickle
import cv2
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn import svm

In [90]:
nClusters = 256
svm_c = 10
svm_gamma = 0.01
svm_kernal = 'rbf'

# datasetPath = './dataset/cifar-10-batches-py' 
datasetPath = '../../'
vocabFile = 'vocab.sav'
clsFile = 'cls.sav'
trainBatch = 1
'''trainImgPerBatch = 50
testImgPerBatch = 50'''

In [91]:
def getLabel(id):
    txtLabels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    return txtLabels[id]

In [92]:
def loadBatch(filePath):
    with open(filePath, 'rb') as fo:
        batchDict = pickle.load(fo, encoding='latin1')
        nImgs = len(batchDict['data'])
        imgs = batchDict['data'].reshape(nImgs,3,32,32).transpose(0, 2, 3, 1)
        labels = batchDict['labels']
    return (imgs,labels)

In [93]:
def getDescriptors(img, extractor):
    gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    keypoints, descriptors = extractor.detectAndCompute(gray, None)
    return descriptors

In [94]:
def getBatchDesc(imgs):
    descriptors = np.asarray([])
    extractor = cv2.xfeatures2d.SIFT_create()
    #i = 0
    for img in imgs:
        desc = getDescriptors(img,extractor)
        if type(desc) == np.ndarray :
            if descriptors.shape[0] == 0:
                descriptors = desc
            else:
                descriptors = np.concatenate((descriptors, desc), axis=0)
    return descriptors

In [95]:
def getDatasetDesc():
    descriptors = np.asarray([])
    for batchId in range(1,trainBatch+1):
        batchPath = datasetPath+'/'+'data_batch_'+str(batchId)
        imgs,labels = loadBatch(batchPath)
        #imgs = imgs[:trainImgPerBatch]
        if descriptors.shape[0] == 0:
            descriptors = getBatchDesc(imgs)
        else:
            descriptors = np.concatenate((descriptors, getBatchDesc(imgs)), axis=0)
    return descriptors

In [96]:
def getVocabularies():
    descriptors = getDatasetDesc()
    kmeans = KMeans(n_clusters = nClusters)
    kmeans.fit(descriptors)
    return kmeans

In [97]:
def getBagOfKP(img,extractor,vocab):
    bok = [0]*nClusters
    gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    keypoints, descriptors = extractor.detectAndCompute(gray, None)
    if type(descriptors) == np.ndarray:
        pred = vocab.predict(descriptors)
        for ele in pred:
            bok[ele] = bok[ele] + 1
    return bok

In [98]:
def getBatchBOK(imgs,vocab):
    batchBOK = []
    extractor = cv2.xfeatures2d.SIFT_create()
    for img in imgs:
        bok = getBagOfKP(img,extractor,vocab)
        batchBOK.append(bok)
    return  np.asarray(batchBOK)

In [2]:
def getTrainingDT(vocab):
    dataset = np.asarray([])
    labels = []
    for batchId in range(1,trainBatch+1):
        print('collecting batch ',str(batchId),' features ...')
        batchPath = datasetPath+'/'+'data_batch_'+str(batchId)
        imgs,labs = loadBatch(batchPath)
        #imgs = imgs[:trainImgPerBatch]
        #labels = labels[:trainImgPerBatch]
        if dataset.shape[0] == 0:
            dataset = getBatchBOK(imgs,vocab)
            labels = labs
        else:
            dataset = np.concatenate((dataset, getBatchBOK(imgs,vocab)), axis=0)
            labels.extend(labs)
    return (dataset,np.asarray(labels))

In [100]:
def getTestingDT(vocab):
    batchPath = datasetPath+'/test_batch'
    imgs,labels = loadBatch(batchPath)
    #imgs = imgs[:testImgPerBatch]
    #labels = labels[:testImgPerBatch]
    dataset = getBatchBOK(imgs,vocab)
    return (dataset,np.asarray(labels))

In [101]:
vocab = getVocabularies()
pickle.dump(vocab, open(vocabFile, 'wb'))

In [102]:
# dataset,labels = getTrainingDT(vocab)

In [103]:
#model = svm.SVC(C=10,kernel='rbf',gamma=0.01)
model = svm.SVC(C=svm_c,kernel=svm_kernal,gamma=svm_gamma)
print("training started...")
model.fit(dataset,labels)
pickle.dump(model, open(clsFile, 'wb'))

In [104]:
# testDt,testLb = getTestingDT(vocab)

In [106]:
# #out = model.predict(testDt)
# acc = model.score(testDt,testLb)

In [107]:
# print(acc)

0.1
