In [1]:
import numpy as np
import os

class CIFAR10:    
    def __init__(self, path):
        self.path = path
        # Pre-load all data
        self.train = {}
        self.test = {}
        print('Pre-loading training data')
        self.train['images'] = np.load(os.path.join(path, 'images.npy')).astype('uint8')
        self.train['images_gs'] = np.load(os.path.join(path, 'images_gs.npy')).astype('uint8')
        self.train['images_hog'] = np.load(os.path.join(path, 'images_hog.npy'))
        self.train['images_pca'] = np.load(os.path.join(path, 'images_pca.npy'))
        self.train['labels'] = np.load(os.path.join(path, 'labels_.npy')).astype('uint8')
        print('Pre-loading test data')
        self.test['images'] = np.load(os.path.join(path, 'test_images.npy')).astype('uint8')
        self.test['images_gs'] = np.load(os.path.join(path, 'test_images_gs.npy')).astype('uint8')
        self.test['images_hog'] = np.load(os.path.join(path, 'test_images_hog.npy'))
        self.test['images_pca'] = np.load(os.path.join(path, 'test_images_pca.npy'))
        self.test['labels'] = np.load(os.path.join(path, 'test_labels.npy')).astype('uint8')
        
        self.labels = ['Airplane', 'Bird', 'Horse']

        from sklearn.metrics import confusion_matrix

from sklearn.metrics import confusion_matrix

def evaluate_classifier(clf, test_data, test_labels):
    pred = clf.predict(test_data)
    C = confusion_matrix(pred, test_labels)
    return C.diagonal().sum()*100./C.sum(),C 

dataset = CIFAR10('../CIFAR10')

Pre-loading training data
Pre-loading test data


In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold

In [9]:
train_norm = (dataset.train['images_hog'] - dataset.train['images_hog'].mean(axis=0))/dataset.train['images_hog'].std(axis=0)
test_norm = (dataset.test['images_hog'] - dataset.train['images_hog'].mean(axis=0))/dataset.train['images_hog'].std(axis=0)

clf = SVC(C=100000.)
clf.fit(train_norm, dataset.train['labels'])

SVC(C=100000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [4]:
print(dataset.train['labels'].shape)

(15000,)


In [11]:
print(evaluate_classifier(clf, train_norm, dataset.train['labels']))
print(evaluate_classifier(clf, test_norm, dataset.test['labels']))

(100.0, array([[5000,    0,    0],
       [   0, 5000,    0],
       [   0,    0, 5000]], dtype=int64))
(83.233333333333334, array([[859, 117,  36],
       [109, 789, 115],
       [ 32,  94, 849]], dtype=int64))


In [10]:
all_support_vectors = clf.support_vectors_ #Each line = 1 "Support Vector" 
vectors_per_class = clf.n_support_ #Number of "Support Vector" for each class

print(all_support_vectors.shape)
print(vectors_per_class)

(9938, 256)
[3119 3946 2873]


In [12]:
n_splits = 3
kf = StratifiedKFold(n_splits)

In [None]:
g = [1e6,1e7]

for c in Cs:
    acc_s = 0
    cm_s = np.zeros((3,3))
    for train,test in kf.split(dataset.train['images_hog'], dataset.train['labels']):
        train_x = dataset.train['images_hog'][train]
        train_y = dataset.train['labels'][train]

        clf = SVC(C=c)
        clf.fit(train_x, train_y)

        test_x = dataset.train['images_hog'][test]
        test_y = dataset.train['labels'][test]

        acc, cm = evaluate_classifier(clf, test_x, test_y)
        acc_s += acc/n_splits
        cm_s += cm
    
    print("C = %d"%c)
    print("Accuracy: %.2f %%"%acc_s)
    print("Confusion matrix:")
    print(cm_s)

C = 1000000
Accuracy: 74.74 %
Confusion matrix:
[[ 3843.   735.   296.]
 [  765.  3445.   781.]
 [  392.   820.  3923.]]
C = 10000000
Accuracy: 73.94 %
Confusion matrix:
[[ 3866.   793.   337.]
 [  763.  3361.   799.]
 [  371.   846.  3864.]]
C = 100000000
Accuracy: 74.18 %
Confusion matrix:
[[ 3859.   773.   324.]
 [  766.  3386.   794.]
 [  375.   841.  3882.]]
C = 1000000000
Accuracy: 74.05 %
Confusion matrix:
[[ 3880.   793.   364.]
 [  748.  3377.   786.]
 [  372.   830.  3850.]]


In [14]:
acc_s = 0
cm_s = np.zeros((3,3))
for train,test in kf.split(dataset.train['images_hog'], dataset.train['labels']):
    train_x = dataset.train['images_hog'][train]
    train_y = dataset.train['labels'][train]

    clf = SVC(C=1e5)
    clf.fit(train_x, train_y)

    test_x = dataset.train['images_hog'][test]
    test_y = dataset.train['labels'][test]

    acc, cm = evaluate_classifier(clf, test_x, test_y)
    acc_s += acc/n_splits
    cm_s += cm

print("C = %d"%c)
print("Accuracy: %.2f %%"%acc_s)
print("Confusion matrix:")
print(cm_s)

C = 1000000000
Accuracy: 74.87 %
Confusion matrix:
[[ 3836.   716.   302.]
 [  765.  3474.   777.]
 [  399.   810.  3921.]]


In [15]:
gammas = [1./512,1./256,1./128,1./64]

for g in gammas:
    acc_s = 0
    cm_s = np.zeros((3,3))
    for train,test in kf.split(dataset.train['images_hog'], dataset.train['labels']):
        train_x = dataset.train['images_hog'][train]
        train_y = dataset.train['labels'][train]

        clf = SVC(C=1e5, gamma=g)
        clf.fit(train_x, train_y)

        test_x = dataset.train['images_hog'][test]
        test_y = dataset.train['labels'][test]

        acc, cm = evaluate_classifier(clf, test_x, test_y)
        acc_s += acc/n_splits
        cm_s += cm
    
    print("g = %.6f"%g)
    print("Accuracy: %.2f %%"%acc_s)
    print("Confusion matrix:")
    print(cm_s)

C = 1000000000
Accuracy: 74.83 %
Confusion matrix:
[[ 3829.   704.   289.]
 [  759.  3467.   783.]
 [  412.   829.  3928.]]
C = 1000000000
Accuracy: 74.87 %
Confusion matrix:
[[ 3836.   716.   302.]
 [  765.  3474.   777.]
 [  399.   810.  3921.]]
C = 1000000000
Accuracy: 75.02 %
Confusion matrix:
[[ 3837.   712.   291.]
 [  775.  3499.   792.]
 [  388.   789.  3917.]]
C = 1000000000
Accuracy: 75.35 %
Confusion matrix:
[[ 3867.   709.   272.]
 [  763.  3485.   778.]
 [  370.   806.  3950.]]


In [16]:
acc_s = 0
cm_s = np.zeros((3,3))
for train,test in kf.split(dataset.train['images_hog'], dataset.train['labels']):
    train_x = dataset.train['images_hog'][train]
    train_y = dataset.train['labels'][train]

    clf = SVC(C=1e5, gamma=1./32)
    clf.fit(train_x, train_y)

    test_x = dataset.train['images_hog'][test]
    test_y = dataset.train['labels'][test]

    acc, cm = evaluate_classifier(clf, test_x, test_y)
    acc_s += acc/n_splits
    cm_s += cm

print("g = %.6f"%g)
print("Accuracy: %.2f %%"%acc_s)
print("Confusion matrix:")
print(cm_s)

g = 0.015625
Accuracy: 76.07 %
Confusion matrix:
[[ 3920.   699.   253.]
 [  749.  3496.   753.]
 [  331.   805.  3994.]]


In [17]:
clf = SVC(C=1e5, gamma=1./32)
clf.fit( dataset.train['images_hog'], dataset.train['labels'] )

print("Descriptive")
print(evaluate_classifier(clf, dataset.train['images_hog'], dataset.train['labels']))
print("Predictive")
print(evaluate_classifier(clf, dataset.test['images_hog'], dataset.test['labels']))

Descriptive
(79.053333333333327, array([[4061,  624,  194],
       [ 650, 3656,  665],
       [ 289,  720, 4141]], dtype=int64))
Predictive
(75.966666666666669, array([[790, 129,  37],
       [162, 692, 166],
       [ 48, 179, 797]], dtype=int64))
