In [17]:
%pylab inline --no-import-all
# OpenCV bindings
import cv2 as cv
# To performing path manipulations 
import os
# Local Binary Pattern function
from skimage.feature import local_binary_pattern
# To calculate a normalized histogram 
from scipy.stats import itemfreq
from sklearn.preprocessing import normalize
# Utility package -- use pip install cvutils to install
import cvutils
# To read class from file
import csv
import glob
from sklearn.svm import LinearSVC

Populating the interactive namespace from numpy and matplotlib


In [10]:
# Prepare dataset
DataPath = '/Users/zhuonijie/Desktop/TrainingImages copy/'

# load images
def loadImg(fileName,dsize1,dsize2): 
    '''
    load image, resize, rgb2gray, equal histo 
    :param fileName: file name 
    :param dsize: same size, tuple 
    :return: image 
    '''
    img = cv.imread(fileName)
    retImg = cv.resize(img,(dsize1,dsize2)) 
    img = cv.normalize(img, img, 0, 255, cv.NORM_MINMAX)
    retImg = cv.cvtColor(retImg,cv.COLOR_BGR2GRAY) 
    retImg = cv.equalizeHist(retImg)
    return retImg 

In [11]:
test_face_image_files = glob.glob(os.path.join(DataPath + 'test/FACES', '*.bmp'), recursive=True)
test_nface_image_files = glob.glob(os.path.join(DataPath + 'test/NFACES', '*.bmp'), recursive=True)
train_face_image_files = glob.glob(os.path.join(DataPath + 'train/FACES', '*.bmp'), recursive=True)
train_nface_image_files = glob.glob(os.path.join(DataPath + 'train/NFACES', '*.bmp'), recursive=True)
print (len(test_face_image_files), len(test_nface_image_files), len(train_face_image_files),len(train_nface_image_files))

1458 2729 3400 6367


In [12]:
# decide width * height of image, select training set size
IMG_SIZE = 24
TRAIN_SIZE_FACE = 2000
TRAIN_SIZE_NFACE = 2000
TEST_SIZE_FACE = 500
TEST_SIZE_NFACE = 500

In [13]:
train_images = []
for i in range(0, TRAIN_SIZE_FACE):
    train_images.append(loadImg(train_face_image_files[i],IMG_SIZE,IMG_SIZE))
for i in range(0, TRAIN_SIZE_NFACE):
    train_images.append(loadImg(train_nface_image_files[i],IMG_SIZE,IMG_SIZE))
    
test_images = []
for i in range(0, TEST_SIZE_FACE):
    test_images.append(loadImg(test_face_image_files[i],IMG_SIZE,IMG_SIZE))
for i in range(0, TEST_SIZE_NFACE):
    test_images.append(loadImg(test_nface_image_files[i],IMG_SIZE,IMG_SIZE))

In [75]:
tpos = TRAIN_SIZE_FACE
tneg = TRAIN_SIZE_NFACE
texamples = tneg + tpos
    
# create vector of correct classifications
y_train = np.ones(texamples, int)
y_train[tpos:texamples] = 0


ttpos = TEST_SIZE_FACE
ttneg = TEST_SIZE_NFACE
ttexamples = ttneg + ttpos
    
# create vector of correct classifications
y_test = np.ones(ttexamples, int)
y_test[ttpos:ttexamples] = 0

In [76]:
# List for storing the LBP Histograms, address of images and the corresponding label 
X_train = []

# For each image in the training set calculate the LBP histogram
# and update X_test, X_name and y_test
for train_image in train_images:
    radius = 3
    # Number of points to be considered as neighbourers 
    no_points = 8 * radius
    # Uniform LBP is used
    lbp = local_binary_pattern(train_image, no_points, radius, method='uniform')
    # Calculate the histogram
    x = itemfreq(lbp.ravel())
    # Normalize the histogram
    hist = x[:, 1]/sum(x[:, 1])
    # Append histogram to X_name
    X_train.append(hist)

`itemfreq` is deprecated and will be removed in a future version. Use instead `np.unique(..., return_counts=True)`
  del sys.path[0]


In [77]:
y_train = list(y_train)

ind = []
for i in range(len(X_train)):
    if len(X_train[i]) != len(X_train[0]):
        print (len(X_train[i]))
        ind.append(i)

25
24
25
25
25
25
25
25
24
25
25
25
25
24
25
25
25
25
25
25
25
25
25
25
24
25
25
25
24
25
25
25
25
25
25
25
25
25
25
25
25
25
23
25
24
25
25
25
24
25
25
24
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
24
25
24
25
24
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
25
24
25
25
25
25
25
23
25
25
25
25
24
25
24
25
23
24
25
25
25
21
25
23
25
25
24
25
25
25
25
25
25
25
24
25
25
25
21
25
25
24
25
25
25
25
25
25
25
24
23
25
25
21
25
25
24
25
25
25
25
24
25
25
25
25
25
25
25
25
25
25
24
24
23
25
23
22
25
25
25
25
23
25
25
25
25
20
24
25
24
25
25
11
25
25
25
22
21
25
24
24
21
22
25
25
25
25
25
24
25
25
25
25
25
25
25
23
22
25
25
25
25
25
25
25
25
22
20
13
25
25
23
22
22
25
5
24
24
22
25
25
24
25
24
25
24
25
25
25
25
25
23
25
1
23
20
24
24
24
22
25
24
22
24
25
25
1
25
25
25
24
15
25
25
25
25
25
25
23
25
24
25
13
25
25
17
25
24
22
25
25
1
24
25
22
18
25
25
25
6
25
25
25
23
24
25
25
23
25
1
1


In [84]:
X_train_ft = []
y_train_ft = []

for i in range(len(X_train)):
    if i not in ind:
        X_train_ft.append(X_train[i])
        y_train_ft.append(y_train[i])

In [85]:
model = LinearSVC(C=100.0, random_state=42)
model.fit(np.asarray(X_train_ft), np.asarray(y_train_ft))



LinearSVC(C=100.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [87]:
# List for storing the LBP Histograms, address of images and the corresponding label 
X_test = []

# For each image in the training set calculate the LBP histogram
# and update X_test, X_name and y_test
for test_image in test_images:
    radius = 3
    # Number of points to be considered as neighbours 
    no_points = 8 * radius
    # Uniform LBP is used
    lbp = local_binary_pattern(test_image, no_points, radius, method='uniform')
    # Calculate the histogram
    x = itemfreq(lbp.ravel())
    # Normalize the histogram
    hist = x[:, 1]/sum(x[:, 1])
    # Append histogram to X_name
    X_test.append(hist)

`itemfreq` is deprecated and will be removed in a future version. Use instead `np.unique(..., return_counts=True)`
  del sys.path[0]


In [88]:
y_test = list(y_test)

ind = []
for i in range(len(X_test)):
    if len(X_test[i]) != len(X_test[0]):
        print (len(X_test[i]))
        ind.append(i)

X_test_ft = []
y_test_ft = []

for i in range(len(X_test)):
    if i not in ind:
        X_test_ft.append(X_test[i])
        y_test_ft.append(y_test[i])

25
24
25
25
25
25
25
24
24
25
24
25
25
24
25
25
25
25
25
23
25
25
25
25
24
25
25
25
25
25
25
25
25
25
24
25
25
23
25
20
25
25
25
25
25
25
25
20
21
25
25
21
20
25
24
25
25
25
25
24
25
25
25
25
25
25
25
25
25
25
25
22
22
25
24
25
25
25
25
24
25
24
25
25
25
24
24
25
25
20
25
25
25
25
25
25


In [89]:
prediction = model.predict(X_test_ft)

In [92]:
output = sum(abs(y_test_ft-prediction))

In [94]:
output/len(prediction)

0.13827433628318583

In [98]:
# List for storing the LBP Histograms, address of images and the corresponding label 
X_train_hog = []

# For each image in the training set calculate the LBP histogram
# and update X_test, X_name and y_test
for train_image in train_images:
    cell_size = (8, 8)  # h x w in pixels
    block_size = (2, 2)  # h x w in cells
    nbins = 9  # number of orientation bins

    # winSize is the size of the image cropped to an multiple of the cell size
    hog = cv.HOGDescriptor(_winSize=(train_image.shape[1] // cell_size[1] * cell_size[1],
                                  train_image.shape[0] // cell_size[0] * cell_size[0]),
                        _blockSize=(block_size[1] * cell_size[1],
                                    block_size[0] * cell_size[0]),
                        _blockStride=(cell_size[1], cell_size[0]),
                        _cellSize=(cell_size[1], cell_size[0]),
                        _nbins=nbins)

    n_cells = (train_image.shape[0] // cell_size[0], train_image.shape[1] // cell_size[1])
    hog_feats = hog.compute(train_image)\
               .reshape(n_cells[1] - block_size[1] + 1,
                        n_cells[0] - block_size[0] + 1,
                        block_size[0], block_size[1], nbins) \
               .transpose((1, 0, 2, 3, 4))  # index blocks by rows first
    # hog_feats now contains the gradient amplitudes for each direction,
    # for each cell of its group for each group. Indexing is by rows then columns.

    gradients = np.zeros((n_cells[0], n_cells[1], nbins))

    # count cells (border cells appear less often across overlapping groups)
    cell_count = np.full((n_cells[0], n_cells[1], 1), 0, dtype=int)

    for off_y in range(block_size[0]):
        for off_x in range(block_size[1]):
            gradients[off_y:n_cells[0] - block_size[0] + off_y + 1,
                      off_x:n_cells[1] - block_size[1] + off_x + 1] += \
                hog_feats[:, :, off_y, off_x, :]
            cell_count[off_y:n_cells[0] - block_size[0] + off_y + 1,
                       off_x:n_cells[1] - block_size[1] + off_x + 1] += 1

    # Average gradients
    gradients /= cell_count
        
    h = hog.compute(train_image)

    # Append histogram to X_name
    X_train_hog.append(h)

In [110]:
X_train_hog_cvt = []
for i in range(len(X_train_hog)):
    X_train_hog_cvt.append([float(j) for j in X_train_hog[i]])

In [113]:
model = LinearSVC(C=100.0, random_state=42)
model.fit(X_train_hog_cvt, y_train)



LinearSVC(C=100.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [114]:
# List for storing the LBP Histograms, address of images and the corresponding label 
X_test_hog = []

# For each image in the training set calculate the LBP histogram
# and update X_test, X_name and y_test
for test_image in test_images:
    cell_size = (8, 8)  # h x w in pixels
    block_size = (2, 2)  # h x w in cells
    nbins = 9  # number of orientation bins

    # winSize is the size of the image cropped to an multiple of the cell size
    hog = cv.HOGDescriptor(_winSize=(test_image.shape[1] // cell_size[1] * cell_size[1],
                                  test_image.shape[0] // cell_size[0] * cell_size[0]),
                        _blockSize=(block_size[1] * cell_size[1],
                                    block_size[0] * cell_size[0]),
                        _blockStride=(cell_size[1], cell_size[0]),
                        _cellSize=(cell_size[1], cell_size[0]),
                        _nbins=nbins)
        
    h = hog.compute(test_image)

    # Append histogram to X_name
    X_test_hog.append(h)

In [115]:
X_test_hog_cvt = []
for i in range(len(X_test_hog)):
    X_test_hog_cvt.append([float(j) for j in X_test_hog[i]])

In [116]:
prediction = model.predict(X_test_hog_cvt)

In [117]:
output = sum(abs(y_test-prediction))

In [119]:
output/len(prediction)

0.018