In [1]:
%matplotlib inline
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
N_split = 30000

In [3]:
train = np.loadtxt('./data/digit/train.csv', delimiter=',', skiprows=1)
test = np.loadtxt('./data/digit/test.csv', delimiter=',', skiprows=1)

In [4]:
train_label = train[:, 0]
# приводим размерность к удобному для обаботки виду
train_img = np.resize(train[:, 1:], (train.shape[0], 28, 28))
test_img = np.resize(test, (test.shape[0], 28, 28))

In [5]:
train_img.shape

(42000, 28, 28)

In [6]:
def split_to(img):
    parts = []
    parts.append(img[:, 0 : img.shape[2]//2, 0 : img.shape[2]//2])
    parts.append(img[:, 0 : img.shape[2]//2, img.shape[2]//2 :])

    parts.append(img[:, img.shape[2]//2 : , 0 : img.shape[2]//2])
    parts.append(img[:, img.shape[2]//2 :, img.shape[2]//2 :])
    return parts

In [7]:
train_parts = split_to(train_img)
test_parts = split_to(test_img)

In [8]:
train_sobel_x = np.zeros_like(train_img)
train_sobel_y = np.zeros_like(train_img)
for i in range(len(train_img)):
    train_sobel_x[i] = cv2.Sobel(train_img[i], cv2.CV_64F, dx=1, dy=0, ksize=3)
    train_sobel_y[i] = cv2.Sobel(train_img[i], cv2.CV_64F, dx=0, dy=1, ksize=3)

In [9]:
test_sobel_x = np.zeros_like(test_img)
test_sobel_y = np.zeros_like(test_img)
for i in range(len(test_img)):
    test_sobel_x[i] = cv2.Sobel(test_img[i], cv2.CV_64F, dx=1, dy=0, ksize=3)
    test_sobel_y[i] = cv2.Sobel(test_img[i], cv2.CV_64F, dx=0, dy=1, ksize=3)

In [10]:
train_parts_sobel_x = split_to(train_sobel_x)
train_parts_sobel_y = split_to(train_sobel_y)
test_parts_sobel_x = split_to(test_sobel_x)
test_parts_sobel_y = split_to(test_sobel_y)

In [11]:
def get_part_len_and_theta(x, y):
    return cv2.cartToPolar(x, y)

In [12]:
def get_cart_polar(parts_sobel_x, parts_sobel_y):
    parts = []
    for i in range(4):
        g, theta = cv2.cartToPolar(parts_sobel_x[i], parts_sobel_y[i])
        parts.append((g, theta))
    return parts

In [13]:
train_part_sobel = get_cart_polar(train_parts_sobel_x, train_parts_sobel_y)
test_part_sobel = get_cart_polar(test_parts_sobel_x, test_parts_sobel_y)

In [14]:
# Гистограммы вычисляются с учетом длины вектора градиента
def get_part_hist(train_img, train_g, train_theta):
    train_hist = np.zeros((len(train_img), 16))
    for i in range(len(train_img)):
        hist, borders = np.histogram(train_theta[i],
                                 bins=16,
                                 range=(0., 2. * np.pi),
                                 weights=train_g[i])
        train_hist[i] = hist
    return train_hist

In [15]:
def get_hist(train_part, train_part_sobel):
    parts = []
    for i in range(4):
        part = get_part_hist(train_part[i], train_part_sobel[i][0], train_part_sobel[i][1])
        part = part / (np.linalg.norm(part, axis=1)[:, None] + 1e-26)
        parts.append(part)
    parts = np.concatenate([parts[0] , parts[1], parts[2], parts[3]], axis = 1 )
    return parts

In [16]:
train_part_hist = get_hist(train_parts, train_part_sobel)
test_part_hist = get_hist(test_parts, test_part_sobel)

In [17]:
np.resize(train_part_hist[:, 1:], (train_part_hist.shape[0], 28, 28))

array([[[  9.46646041e-01,   0.00000000e+00,   0.00000000e+00, ...,
           5.63769831e-02,   0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
           0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
        [  9.96751793e-01,   8.05348548e-02,   0.00000000e+00, ...,
           0.00000000e+00,   3.50265892e-01,   5.21447724e-01],
        ..., 
        [  9.33628089e-02,   0.00000000e+00,   3.64815597e-01, ...,
           7.84430289e-01,   4.93914669e-02,   3.22169042e-03],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
           3.44904696e-02,   6.92514324e-02,   2.61992709e-02],
        [  2.25512691e-01,   3.57233228e-02,   0.00000000e+00, ...,
           0.00000000e+00,   0.00000000e+00,   0.00000000e+00]],

       [[  0.00000000e+00,   0.00000000e+00,   9.51834488e-02, ...,
           0.00000000e+00,   4.74031714e-01,   8.48818927e-01],
        [  2.30732046e-01,   3.95358942e-02,   0.00000000e+0

In [18]:
X_train = train_part_hist
X_test = test_part_hist

In [19]:
X_train.shape

(42000, 64)

In [20]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.utils import np_utils
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta

Using TensorFlow backend.


In [21]:
# X_train.shape

In [22]:
X_train = X_train.reshape(X_train.shape[0], 8, 8, 1)
X_test = X_test.reshape(X_test.shape[0], 8, 8, 1)

y_train = np_utils.to_categorical(train_label)


In [23]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(8, 8, 1)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer=Adadelta(),
              metrics=['accuracy'])

model.fit(X_train[:N_split], y_train[:N_split],
          batch_size=32,
          epochs=100,
          verbose=1,
          validation_data=(X_train[N_split:], y_train[N_split:]))
score = model.evaluate(X_train[N_split:], y_train[N_split:], verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 30000 samples, validate on 12000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/1

In [26]:
pred = model.predict_classes(X_test, verbose=0)

In [27]:
with open('submit.txt', 'w') as dst:
    dst.write('ImageId,Label\n')
    for i, p in enumerate(pred, 1):
        dst.write('%s,%s\n' % (i, p))

![alt text](result/res_2_lab.png "Title")