In [1]:
import cv2
import numpy as np
import pandas as pd

In [2]:
train = pd.read_csv('data/digit/train.csv')
test = pd.read_csv('data/digit/test.csv')

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
def convert(data, size=(28, 28)):
    winSize = (64, 64)
    blockSize = (16,16)
    blockStride = cellSize = (8,8)
    nbins = 9
    data = np.array(data, dtype=np.uint8)
    data = np.array([cv2.resize(d.reshape(size), winSize) for d in data])
    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins)
    return np.array([hog.compute(d).ravel() for d in data])

In [5]:
from sklearn.ensemble import RandomForestClassifier

X, y = convert(train.drop(columns=['label'])), pd.get_dummies(train.label)

dr = RandomForestClassifier(n_estimators=10, max_depth=10, n_jobs=-1, random_state=123123)
dr.fit(X, y)
indices = dr.feature_importances_.argsort()[-300:]

X = np.array([x[indices] for x in X])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123123)

In [7]:
from keras.models import Sequential
from keras.layers import Dense, Activation

Using TensorFlow backend.


In [8]:
model = Sequential()
model.add(Dense(100, input_shape=(X_train.shape[1],)))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
activation_1 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                3232      
_________________________________________________________________
activation_2 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 33,662
Trainable params: 33,662
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(X_train, y_train, epochs=30, batch_size=100)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fd960ca6eb8>

In [10]:
model.evaluate(X_test, y_test)



[0.075624419894346442, 0.98392857142857137]

In [11]:
X = convert(test)
X = np.array([x[indices] for x in X])

In [12]:
preds = model.predict_classes(X)
df = pd.DataFrame([(ind+1, cl) for ind, cl in enumerate(preds)], columns=['ImageId', 'Label'])

In [13]:
df.to_csv('result.csv', index=False)

In [14]:
! head result.csv

ImageId,Label
1,2
2,0
3,9
4,0
5,3
6,7
7,0
8,3
9,0
