In [1]:
from array import array
import numpy as np
import random
import struct
from sklearn.metrics import confusion_matrix
import os
import cv2
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import pickle

In [2]:
def read(fname_img, fname_lbl):
    f = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", f.read(8))
    lbl = array("b", f.read())
    f.close()

    f = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", f.read(16))
    img = array("B", f.read())
    f.close()

    tmp = []
    cur_img = []
    img_matrix = []
    for x in img.tolist():
        tmp.append(x)
        if len(tmp) == 28:
            cur_img.append(tmp)
            tmp = []
        if len(cur_img) == 28:
            img_matrix.append(cur_img)
            cur_img = []

    return lbl.tolist(), img_matrix

In [3]:
train_size = 60000

train_lbl, train_img = read("train-images-idx3-ubyte", "train-labels-idx1-ubyte")
test_lbl, test_img = read("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte")

index_shuf = range(train_size)
random.shuffle(index_shuf)
x = []
y = []
for i in range(train_size):
    x.append(train_img[index_shuf[i]])
    y.append(train_lbl[index_shuf[i]])
train_lbl, train_img = y, x

train_img = np.uint8(np.array(train_img))
test_img = np.uint8(np.array(test_img))

In [4]:
def cal_sift(in_img):
    sift = cv2.xfeatures2d.SIFT_create()
    kp, des = sift.detectAndCompute(in_img, None)
    return kp, des

In [17]:
def sift_sample(imgs, train_lbl, sample_num):
    descriptors = np.array([])
    samples = [0] * 10
    
    cur = 0
    while (sum(samples) < sample_num):
        if (samples[train_lbl[cur]] >= (sample_num/10)):
            cur += 1
            continue
        samples[train_lbl[cur]] += 1
        
        kp, des = cal_sift(np.array(imgs[cur]))
        if (des is None):
            cur += 1
            continue
        if len(descriptors):
            descriptors = np.append(descriptors, des, axis=0)
        else:
            descriptors = des
        cur += 1
        
    return descriptors

In [18]:
cluster_num = 200
sample_num = 10000
iter_num = 300

descriptors = sift_sample(train_img, train_lbl, sample_num)
print descriptors.shape

(79081, 128)


In [19]:
def descriptor_cluster(descriptors, cluster_num, iter_nume):
    cluster = KMeans(n_clusters=cluster_num, max_iter=iter_num).fit(descriptors)
    return cluster

In [20]:
cluster = descriptor_cluster(descriptors, cluster_num, iter_num)

In [21]:
def img_to_feature(img, cluster, cluster_num):
    sift = cv2.xfeatures2d.SIFT_create()
    feature_layer1 = [0.0] * cluster_num
    feature_layer2 = [0.0] * cluster_num * 4
    feature_layer3 = [0.0] * cluster_num * 16
    
    kp, des = cal_sift(img)
    if (des is not None):
        cluster_result = cluster.predict(des)
        for c in cluster_result:
            feature_layer1[c] += 0.5
            
    cur = 0
    for i in range(2):
        for j in range(2):
            cur += 1
            crop_img = img[i*14 : (i+1)*14 -1, j*14 : (j+1)*14 -1]
            kp, des = cal_sift(crop_img)
            if (des is not None):
                cluster_result = cluster.predict(des)
                for c in cluster_result:
                    feature_layer2[(cur-1)*cluster_num + c] += 0.25
                    
    cur = 0
    for i in range(4):
        for j in range(4):
            cur += 1
            crop_img = img[i*7 : (i+1)*7 -1, j*7 : (j+1)*7 -1]
            kp, des = cal_sift(crop_img)
            if (des is not None):
                cluster_result = cluster.predict(des)
                for c in cluster_result:
                    feature_layer3[(cur-1)*cluster_num + c] += 0.25
    
    feature = feature_layer1 + feature_layer2 + feature_layer3
    return np.array(feature)

In [22]:
def generate_features(imgs, cluster, cluster_num):
    features = []
    for img in imgs:
        f = img_to_feature(img, cluster, cluster_num)
        features.append(f)
    return np.array(features)

In [23]:
features_train = generate_features(train_img, cluster, cluster_num)

In [24]:
features_test = generate_features(test_img, cluster, cluster_num)

In [25]:
def train_and_test(features_train, features_test, lbl_train, lbl_test):    
    clf = svm.LinearSVC()
    clf.fit(features_train, lbl_train)
    result = clf.predict(features_test)
    error = sum([int(result[i] != test_lbl[i]) for i in range(len(result))])
    print "Accuracy: ", 1 - float(error)/len(result)
    print confusion_matrix(result, test_lbl)
    return clf

In [26]:
train_and_test(features_train, features_test, train_lbl, test_lbl)

Accuracy:  0.8661
[[ 920    0   28    3    3   34   43   11   12   12]
 [   7 1120   28    9   18   21   20   69    2   13]
 [   6    4  850   21   21   10   13   44   18    7]
 [   2    1   13  881    4   23    7    8   18   11]
 [   2    1   10   10  864    8    9   18   12   20]
 [   9    1    8   43    7  714   27    4   12   19]
 [  16    1   13   12   11   40  761   11   12   42]
 [   8    6   58   18   11   12   20  843    9   11]
 [   7    1   15   11   13   10    8    2  855   21]
 [   3    0    9    2   30   20   50   18   24  853]]


LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)