In [1]:
import cv2
from cv2.xfeatures2d import SIFT_create
import os
import configparser
import numpy as np
import sklearn
import sklearn.cluster as cls
import pickle
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from scipy.spatial.distance import euclidean, pdist, squareform,mahalanobis,sqeuclidean,seuclidean,correlation

In [2]:
class DSG:
    
    def __init__(self):
        self.sift = SIFT_create()#nfeatures,contrastThreshold,edgethreshold,nOctaveLayers = 3, sigma = 1.5
        self.classifier = SVC(C=1, kernel='rbf', probability=True) #C inversely proportional to regularisation
        self.features_len = []
        self.all_features = np.array([[]])
        self.trainimage_label = []
        self.test_set = np.array([[]])
        self.testimage_list = []
        self.testimage_label = []
        self.config_path = '/home/soliton/work/projects/dataset_generator/models/dsgconfig.ini'
        self.configure()
    
    def configure(self):
        config = configparser.ConfigParser()
        config.read(self.config_path)
        self.positive_training_images = config['Paths']['positive_training_images']
        self.random_training_images = config['Paths']['random_training_images']
        self.positive_testing_images = config['Paths']['positive_testing_images']
        self.random_testing_images = config['Paths']['random_testing_images']
        self.resize_height = int(config['Image']['resize_height'])
        self.resize_width = int(config['Image']['resize_width'])
        self.number_of_clusters = int(config['Cluster']['number_of_clusters'])
    
    def load_trainingset(self, path, trainimage_label):
        print("loading trainingset")
        for image in os.listdir(path):
            self.trainingset(path+'/'+image, trainimage_label)
    
    def trainingset(self, image_path, trainimage_label):
        des = self.get_features(image_path)
        #cv2.imshow('img',des)
        #cv2.waitKey(1000)
        #cv2.destroyAllWindows()
        self.trainimage_label.append(trainimage_label)
        self.features_len.append(len(des))
        if(self.all_features.shape == (1, 0)):
            self.all_features = np.array(des)
        else:
            self.all_features = np.concatenate((self.all_features, des), axis=0)
        
    def get_features(self, path):
        img = cv2.imread(path, 1)
        re_img=cv2.resize(img, (self.resize_height,self.resize_width))
        gray= cv2.cvtColor(re_img,cv2.COLOR_BGR2GRAY)
        kp,des = self.sift.detectAndCompute(gray, None)
        return des
    
    def cluster(self):
        self.k_means()
        #self.affinityprop()
        print(len(self.centroids),len(self.all_features))
    
    def affinityprop(self): 
        s_matrix = squareform(pdist(self.all_features, metric='euclidean'))
        self.centroids, self.cluster_labels = cls.affinity_propagation(s_matrix)
    
    def k_means(self):
         self.centroids, self.cluster_labels, _ = cls.k_means(self.all_features, self.number_of_clusters)
  
    def meanshift(self):
        self.centroids, self.cluster_labels = cls.mean_shift(self.all_features, bandwidth = 100)
    
    def train(self):
        self.load_trainingset(self.positive_training_images,0)
        self.load_trainingset(self.random_training_images,1)
        self.cluster()
        training_data = np.zeros((len(self.trainimage_label), max(self.cluster_labels)+1))
        feature_index = 0
        for image in range(len(self.trainimage_label)):
            for feature in range(self.features_len[image]):
                training_data[image][self.cluster_labels[feature_index]] = 1 + training_data[image][self.cluster_labels[feature_index]]
                feature_index += 1
        self.classifier.fit(training_data, self.trainimage_label)
    
    def load_testset(self, path, flag = -1):
        print("loading testset")
        for image in os.listdir(path):
            self.testimage_list.append(image)
            self.testimage_label.append(flag)
            self.testset(path+'/'+image)

    
    def testset(self, imagepath):
        test_set = np.zeros((1, max(self.cluster_labels)+1))
        des = self.get_features(imagepath)
        for feature in des:
            low_dif, bst_label = 0,0 
            for label in range(len(self.centroids)):
                dist = sum(abs(self.centroids[label]-feature))
                if(low_dif == 0 or dist <= low_dif):
                    low_dif = dist
                    bst_label = label
            test_set[0][bst_label] += 1 
        if(self.test_set.shape == (1,0)):
            self.test_set = np.array(test_set)
        else:
            self.test_set = np.concatenate((self.test_set, test_set), axis=0)
            
    def predict(self):
        self.load_testset(self.positive_testing_images,0)
        self.load_testset(self.random_testing_images,1)
        return self.format_result(self.classifier.predict_proba(self.test_set)[:,0])
    
    def format_result(self, result):
        self.testimage_list = [x for _,x in sorted(zip(result,self.testimage_list))]
        self.testimage_label = [x for _,x in sorted(zip(result,self.testimage_label))]
        result.sort()
        result = result[::-1]
        self.testimage_list.reverse()
        self.testimage_label.reverse()
        result = [[value0,value1, value2] for value0,value1, value2 in zip(self.testimage_list,self.testimage_label,result)]
        print(self.classifier.classes_)
        return result
        
    def store_model(self):
        model = {}
        model['classfier'] = self.classifier
        model['centroids'] = self.centroids
        model['cluster_labels'] = self.cluster_labels
        with open(model_path,'wb') as f:
            pickle.dump(model, f) 
    
    def load_model(self):
        with open(model_path,'rb') as f:
            model = pickle.load(f)
        self.sift = cv2.xfeatures2d.SIFT_create() 
        self.classifier = model['classfier']
        self.centroids = model['centroids']
        self.cluster_labels = model['cluster_labels']
        
    def score(self,predicted_label):
        x=0
        for a,b in zip(predicted_label,self.testimage_label):
            if(a == b):
                x = x+1  
        return x/len(self.testimage_label)

In [3]:
if __name__ == '__main__':
    dsg = DSG()
    dsg.train()
    result = dsg.predict()

loading trainingset
loading trainingset
500 4061
loading testset
loading testset
[0 1]


In [4]:
x=[]
for i in result:
    print (i)
    x.append(i[2]<0.5)
x = list(map(int,x))
print(x)
print(dsg.score(x))

['chair.jpg', 1, 0.5195308862325948]
['download.jpeg', 0, 0.5165659053151621]
['images (1).jpeg', 0, 0.51298861620849]
['images.jpeg', 1, 0.5124264061225745]
['download (1).jpeg', 0, 0.5121821256906787]
['images.jpeg', 0, 0.5118527580730468]
['images (5).jpeg', 1, 0.5114576537421237]
['images (14).jpeg', 0, 0.5113690902109758]
['images (9).jpeg', 0, 0.5110958449924237]
['car.jpeg', 0, 0.5110111597888606]
['images (6).jpeg', 1, 0.5105704601588709]
['truck', 1, 0.5103817458563633]
['images (2).jpeg', 0, 0.5102949475436337]
['images (4).jpeg', 1, 0.5101636791778007]
['images (4).jpeg', 0, 0.5099916006402694]
['images (3).jpeg', 0, 0.5098504813081952]
['images (12).jpeg', 1, 0.5094777600818227]
['images (6).jpeg', 0, 0.5094619712413632]
['images (3).jpeg', 1, 0.5093070460013708]
['images (11).jpeg', 0, 0.5091862615296975]
['images (5).jpeg', 0, 0.5091452203730079]
['images (2).jpeg', 1, 0.5089345605716861]
['images (10).jpeg', 0, 0.508719027444864]
['images (8).jpeg', 0, 0.5085863536379127