In [1]:
import cv2
from skimage.feature import hog
import numpy as np
import mahotas
from sklearn.svm import LinearSVC, SVC, NuSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from skopt import gp_minimize
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
def color_hist(input_image, nbins=32):
    ch1 = np.histogram(input_image[:,:,0], bins = nbins, range = (0, 256))[0] # [0] is because we need only the histogram, not bins edges
    ch2 = np.histogram(input_image[:,:,1], bins = nbins, range = (0, 256))[0]
    ch3 = np.histogram(input_image[:,:,2], bins = nbins, range = (0, 256))[0]
    return np.hstack((ch1, ch2, ch3))

In [3]:
def extract_features(input_image, nbins, orient, pix_per_cell, cell_per_block):
    output = []
    for img in input_image:
        img_ = np.asarray(Image.fromarray((img*255).astype('uint8')).convert('L'))
        features = hog(img_, orientations = orient, pixels_per_cell = (pix_per_cell, pix_per_cell), 
                       cells_per_block = (cell_per_block, cell_per_block), 
                       multichannel=False, transform_sqrt = True, visualize = False, feature_vector = True )
        output.append(np.hstack((cv2.resize(img, dsize=(16,16)).ravel(),color_hist(img, nbins),features)))
    return np.array(output)


In [None]:
#labels = np.load('labels_SVM.npy')
#features = np.load('features.npy')

#X_test_ = np.load('test_features.npy')
#y_test_ = np.load('test_labels_SVM.npy')

In [4]:
X_train = np.load('train_data.npy')
y_train = np.load('train_labels.npy')
#X_test = np.load("test_data.npy")
#y_test = np.load('test_labels.npy')
X_val = np.load('val_data.npy')
y_val = np.load('val_labels.npy')

X_train = np.concatenate((X_train,X_val), axis=0)
y_train = np.concatenate((y_train,y_val), axis=0)

In [5]:
label_list = []
for label in y_train:
    label_list.append(np.argmax(label))


In [6]:
nbins, orient, pix_per_cell, cell_per_block = [64, 8, 4, 1]
features = extract_features(X_train, nbins, orient, pix_per_cell, cell_per_block)
y_train = np.array(label_list)
                  
# Fit a per-column scaler
X_scaler = StandardScaler().fit(features)
# Apply the scaler to X
scaled_X = X_scaler.transform(features)
X_train, X_val, y_train, y_val = train_test_split(scaled_X, y_train, test_size=0.2, random_state=22)

In [7]:
print(X_train.shape)

(34488, 2112)


In [8]:
#np.save('SVM_train_data',X_train)
#np.save('SVM_train_labels', y_train)
#np.save('SVM_val_data', X_val)
#np.save('SVM_val_labels', y_val)

In [4]:
X_train = np.load('SVM_train_data.npy')
y_train = np.load('SVM_train_labels.npy')
X_val = np.load('SVM_val_data.npy')
y_val = np.load('SVM_val_labels.npy')

In [10]:
# C = 0.2 optimal (not much difference)
reg = np.arange(0.1,2,0.1)
test_acc = []
for c in reg:
    svc = LinearSVC(dual=True, C=c, max_iter=10)
    svc.fit(X_train, y_train)
    test_acc.append(svc.score(X_val, y_val))
    print(test_acc)



[0.964857341684064]




[0.964857341684064, 0.9675249362096962]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064, 0.9647413593133843]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064, 0.9647413593133843, 0.965553235908142]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064, 0.9647413593133843, 0.965553235908142, 0.9631176061238692]




[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064, 0.9647413593133843, 0.965553235908142, 0.9631176061238692, 0.9630016237531895]
[0.964857341684064, 0.9675249362096962, 0.9654372535374623, 0.9671769890976571, 0.9659011830201809, 0.9603340292275574, 0.964509394572025, 0.9663651125028996, 0.9638135003479471, 0.9639294827186268, 0.965553235908142, 0.9633495708652284, 0.9624217118997912, 0.9640454650893064, 0.9647413593133843, 0.965553235908142, 0.9631176061238692, 0.9630016237531895, 0.9625376942704709]




In [6]:
svc = LinearSVC(dual=True, C=0.2, max_iter=20)
svc.fit(X_train, y_train)
print(svc.score(X_val, y_val))


0.9654372535374623




In [7]:
X_test = np.load("test_data.npy")
y_test = np.load('test_labels.npy')

In [8]:

label_list = []
for label in y_test:
    label_list.append(np.argmax(label))
y_test = np.array(label_list)

nbins, orient, pix_per_cell, cell_per_block = [64, 8, 4, 1]
features = extract_features(X_test, nbins, orient, pix_per_cell, cell_per_block)
                   
# Fit a per-column scaler
X_scaler = StandardScaler().fit(features)
# Apply the scaler to X
scaled_X_test = X_scaler.transform(features)
#X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=22)

In [9]:
svc.score(scaled_X_test, y_test)

0.964690094774805

In [None]:
img_ = np.asarray(Image.fromarray(features[2]).convert('L'))
features_ = hog(img_, orientations = orient, pixels_per_cell = (pix_per_cell, pix_per_cell), 
               cells_per_block = (cell_per_block, cell_per_block), 
               multichannel=False, transform_sqrt = True, visualize = False, feature_vector = True )
print(features_.shape)

In [None]:
svc = SVC(gamma = 'auto', cache_size = 2048)
svc.fit(X_train, y_train)
test_acc = svc.score(X_test, y_test)

In [None]:
space  = [(8, 32),                  # Number of bins for color histogram
          (4, 8),                  # HOG number of orientations
          (4, 8),                  # HOG pixels per cell
          (1, 2)]                   # HOG cells per block
i = 0

def obj(params):
    global i
    nbins, orient, pix_per_cell, cell_per_block = params
    # Use only every 10th images to speed things up.
    features_ = extract_features(features[::20,:,:,:], nbins, orient, pix_per_cell, cell_per_block)
    y = labels[::20]
    X = features_                    
    # Fit a per-column scaler
    X_scaler = StandardScaler().fit(X)
    # Apply the scaler to X
    scaled_X = X_scaler.transform(X)
    X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=22)
    svc = LinearSVC()
    svc.fit(X_train, y_train)
    test_acc = svc.score(X_test, y_test)
    print (i, params, test_acc)
    i += 1
    return 1.0 - test_acc
    
res = gp_minimize(obj, space, n_calls = 20, random_state = 22)
print("Best score=%.4f" % res.fun)