In [1]:
import numpy as np
from glob import iglob
from time import time

from skimage.io import imread, imsave, show, imshow
from skimage.filters import threshold_otsu
from skimage.morphology import skeletonize
from skimage.transform import resize
from skimage.measure import label, regionprops

from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import KFold
from sklearn.grid_search import GridSearchCV

In [2]:
# Обрезаем изображения в базе
koef = 0.3
new_size = (50, 50)
input_dirs_path = ['02', '07']
for input_dir_path in input_dirs_path:
    output_dir_path = input_dir_path + '_output'
    for filename in iglob(input_dir_path + '/*.jpeg'):
        img = imread(filename)
        img = resize(img, new_size) * 255
        img = img[1:-1, 1:-1]
        img = img.astype('uint8')
        thresh = threshold_otsu(img)
        bin = img < thresh
        nr, nc = new_size
        cat_bin = bin[:, : int(nc * koef)]
        cat_top = cat_bin.sum(axis = 1).nonzero()[0][0]
        cat_bin = cat_bin[cat_top :, :]
        cat_bot = cat_bin.sum(axis = 1).argmin()
        cat_bin = cat_bin[: cat_bot, :]
        img = img[cat_top : cat_bot, :]
        imsave(output_dir_path + '/' + filename[len(input_dir_path) + 1:], img)

In [86]:
# Горизонтальные линии
def get_val(img):
    thresh = threshold_otsu(img)
    bin = img < thresh
    skl = skeletonize(bin)
    lbl = label(skl)
    cnt_lbl = np.bincount(lbl.reshape(-1))
    max_lbl = np.argmax(cnt_lbl[1:]) + 1
    lbl[lbl != max_lbl] = 0
    lbl[lbl == max_lbl] = 1
    pxl = lbl.sum()
    r, c = lbl.shape
    dist = lbl[np.arange(r - 1, -1, -1), :].argmax(axis = 0)
    hist = list(np.histogram(dist, bins=np.arange(dist.max() + 1)))
    hist[0] -= hist[0].min()
    return np.array(hist[0]).nonzero()[0].shape[0]

In [9]:
# Горизонтальные линии с resize'ом
def get_val(img):
    new_size = (18, 50)
    thresh = threshold_otsu(img)
    bin = img < thresh
    bin = resize(bin, new_size)
    bin = bin[1:-1, 1:-1] * 255
    bin = bin.astype('uint8')
    thresh = threshold_otsu(bin)
    bin = bin > thresh
    skl = skeletonize(bin)
    lbl = label(skl)
    cnt_lbl = np.bincount(lbl.reshape(-1))
    max_lbl = np.argmax(cnt_lbl[1:]) + 1
    lbl[lbl != max_lbl] = 0
    lbl[lbl == max_lbl] = 1
    pxl = lbl.sum()
    r, c = lbl.shape
    dist = lbl[np.arange(r - 1, -1, -1), :].argmax(axis = 0)
    hist = list(np.histogram(dist, bins=np.arange(dist.max() + 1)))
    hist[0] -= hist[0].min()
    return np.array(hist[0]).nonzero()[0].shape[0]

In [121]:
# Гистограмма расстояний от нижнего края
def get_val(img):
    thresh = threshold_otsu(img)
    bin = img < thresh
    skl = skeletonize(bin)
    lbl = label(skl)
    cnt_lbl = np.bincount(lbl.reshape(-1))
    max_lbl = np.argmax(cnt_lbl[1:]) + 1
    lbl[lbl != max_lbl] = 0
    lbl[lbl == max_lbl] = 1
    pxl = lbl.sum()
    r, c = lbl.shape
    dist = lbl[np.arange(r - 1, -1, -1), :].argmax(axis = 0)
    hist = list(np.histogram(dist, bins=np.arange(dist.max() + 1)))
    hist[0] -= hist[0].min()
    return hist[0].max()

In [91]:
# Ориентация скелета
def get_val(img):
    thresh = threshold_otsu(img)
    bin = img < thresh
    skl = skeletonize(bin)
    lbl = label(skl)
#    lbl = label(bin)
    cnt_lbl = np.bincount(lbl.reshape(-1))
    max_lbl = np.argmax(cnt_lbl[1:]) + 1
    lbl[lbl != max_lbl] = 0
    lbl[lbl == max_lbl] = 1
    r, c = lbl.shape
    lbl = lbl[:, int(c / 2) :]
    rgp = regionprops(lbl)
    if len(rgp) == 0:
        return 0
    return rgp[0]['orientation']

In [92]:
# Сбор данных
output_dirs_path = [input_dir_path + '_output' for input_dir_path in input_dirs_path]
val = dict([])
for output_dir_path in output_dirs_path:
    val[output_dir_path] = list()
    for num, filename in enumerate(iglob(output_dir_path + '/*.jpeg')):
        img = imread(filename)
        val[output_dir_path].append(get_val(img))
#        if num == 20: break

# Список в numpy-массив
for k in val.keys():
    val[k] = np.array(val[k])

In [93]:
res = dict()
for k in val.keys():
    print(k, val[k].mean(), val[k].std())
    res[k] = val[k].mean()

07_output -0.0705918465367 0.084657964371
02_output -0.392867745325 0.102428090527


In [94]:
s = 0
n = 0
output_dirs_path = [input_dir_path + '_output' for input_dir_path in input_dirs_path]
tic = time()
for output_dir_path in output_dirs_path:
    for filename in iglob(output_dir_path + '/*.jpeg'):
        img = imread(filename)
        res_v = get_val(img)
        
        dist_to_res = {k: v - res_v for k, v in res.items()}
        res_k = min(dist_to_res, key = dist_to_res.get)
        res_keys = list(res.keys())
        if abs(res[res_keys[0]] - res_v) < abs(res[res_keys[1]] - res_v):
            res_k = res_keys[0]
        else:
            res_k = res_keys[1]
        n += 1
        if res_k == output_dir_path:
            s += 1
toc = time()
print('%s seconds for one image' % str((toc - tic) / n))
ip_time = (toc - tic) / n

0.0006719167669751653 seconds for one image


In [95]:
print('Accuracy: %s' % str(s / n))

Accuracy: 0.972318339100346


In [78]:
# Make data
output_dirs_path = [input_dir_path + '_output' for input_dir_path in input_dirs_path]
X = np.array([])
for n_class, output_dir_path in enumerate(output_dirs_path):
    for num, filename in enumerate(iglob(output_dir_path + '/*.jpeg')):
        img = imread(filename)
        new_size = (18, 50)
        thresh = threshold_otsu(img)
        bin = img < thresh
        bin = resize(bin, new_size)
        bin = bin[1:-1, 1:-1] * 255
        bin = bin.astype('uint8')
        thresh = threshold_otsu(bin)
        bin = bin > thresh
        
        r, c = bin.shape
        row = bin.reshape(1, r * c).squeeze()
        
        if X.shape[0] == 0:
            X = np.array(row)
            y = np.array(n_class)
        else:
            X = np.vstack((X, row))
            y = np.hstack((y, n_class))
            
print(X.shape)
print(y.shape)

(578, 768)
(578,)


In [84]:
# Split data
train_part = 0.8
idx = np.random.permutation(X.shape[0])

train_idx = idx[: int(X.shape[0] * train_part)]
test_idx = idx[int(X.shape[0] * train_part) :]

X_train = X[train_idx]
y_train = y[train_idx]
X_test = X[test_idx]
y_test = y[test_idx]

print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)

(462, 768)
(462,)
(116, 768)
(116,)


In [147]:
# Grid Search logreg

kf = KFold(y_train.shape[0], n_folds = 5, shuffle = True)
grid = {'C': np.power(10.0, np.arange(-5, 6))}
clf = LogisticRegression(penalty = 'l2')
gs = GridSearchCV(clf, grid, scoring = 'accuracy', cv = kf)
gs.fit(X_train, y_train)

GridSearchCV(cv=sklearn.cross_validation.KFold(n=462, n_folds=5, shuffle=True, random_state=None),
       error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': array([  1.00000e-05,   1.00000e-04,   1.00000e-03,   1.00000e-02,
         1.00000e-01,   1.00000e+00,   1.00000e+01,   1.00000e+02,
         1.00000e+03,   1.00000e+04,   1.00000e+05])},
       pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)

In [148]:
# Train logreg
clf = LogisticRegression(penalty = 'l2', **gs.best_params_)
clf.fit(X_train, y_train)

LogisticRegression(C=0.10000000000000001, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [149]:
# Test logreg
tic = time()
print('Accuracy on test_set: %s' % str(clf.score(X_test, y_test)))
toc = time()
print('Accuracy on all data: %s' % str(clf.score(X, y)))
print('%s seconds for one image' % str((toc - tic) / y_test.shape[0]))
logreg_time = (toc - tic) / y_test.shape[0]

Accuracy on test_set: 1.0
Accuracy on all data: 0.998269896194
1.2956816574622845e-05 seconds for one image


In [150]:
# Grid Search SVM

kf = KFold(y_train.shape[0], n_folds = 5, shuffle = True)
grid = {'C': np.power(2.0, np.arange(-5, 15)), 'gamma': np.power(2.0, np.arange(-13, 3))}
clf = svm.SVC(kernel = 'rbf')
gs = GridSearchCV(clf, grid, scoring = 'accuracy', cv = kf, verbose = True)
gs.fit(X_train, y_train)

Fitting 5 folds for each of 320 candidates, totalling 1600 fits


[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    6.7s
[Parallel(n_jobs=1)]: Done 199 tasks       | elapsed:   26.6s
[Parallel(n_jobs=1)]: Done 449 tasks       | elapsed:   56.1s
[Parallel(n_jobs=1)]: Done 799 tasks       | elapsed:  1.5min
[Parallel(n_jobs=1)]: Done 1249 tasks       | elapsed:  2.2min
[Parallel(n_jobs=1)]: Done 1600 out of 1600 | elapsed:  2.8min finished


GridSearchCV(cv=sklearn.cross_validation.KFold(n=462, n_folds=5, shuffle=True, random_state=None),
       error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'gamma': array([  1.22070e-04,   2.44141e-04,   4.88281e-04,   9.76562e-04,
         1.95312e-03,   3.90625e-03,   7.81250e-03,   1.56250e-02,
         3.12500e-02,   6.25000e-02,   1.25000e-01,   2.50000e-01,
         5.00000e-01,   1.00000e+00,   2.00000e+00,   4.00000e+00]), 'C': arra...,   5.12000e+02,   1.02400e+03,
         2.04800e+03,   4.09600e+03,   8.19200e+03,   1.63840e+04])},
       pre_dispatch='2*n_jobs', refit=True, scoring='accuracy',
       verbose=True)

In [151]:
# Train SVM
clf = svm.SVC(kernel = 'rbf', **gs.best_params_)
clf.fit(X_train, y_train)

SVC(C=4.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0078125, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [152]:
# Test SVM
tic = time()
print('Accuracy on test_set: %s' % str(clf.score(X_test, y_test)))
toc = time()
print('Accuracy on all data: %s' % str(clf.score(X, y)))
print('%s seconds for one image' % str((toc - tic) / y_test.shape[0]))
svm_time = (toc - tic) / y_test.shape[0]

Accuracy on test_set: 1.0
Accuracy on all data: 1.0
8.977487169463059e-05 seconds for one image


In [153]:
print('svm faster, than logreg: %sx' % str(logreg_time / svm_time))

print('logreg faster, than image processing methods: %sx' % str(ip_time / logreg_time))
print('svm faster, than image processing methods: %sx' % str(ip_time / svm_time))

svm faster, than logreg: 0.14432564848096338x
logreg faster, than image processing methods: 86.10595897809706x
svm faster, than image processing methods: 12.42729836758909x
