In [1]:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

cwd = os.getcwd()

In [2]:
img_rows, img_cols = 28, 28

data_dir = os.path.join(cwd, "..", "data")
model_dir = os.path.join(cwd, "..", "model")
LABELS = np.array(
    map(
        (lambda x : x.replace(".npy", "")),
        filter(lambda x: x.endswith('.npy'), os.listdir(data_dir))
    )
)

num_classes = len(LABELS)

In [3]:
def data_prepare():
    datas_path = filter(lambda x: x.endswith('.npy'), os.listdir(data_dir))
    dataset = np.array([]).reshape(0, img_rows * img_cols + 1)
    
    for i, d_path in enumerate(datas_path):
        data = np.load(os.path.join(data_dir, d_path))
        image_size = len(data)
        label = np.ones(image_size, dtype=int) * i
        data = np.concatenate((label[:, np.newaxis], data), axis=1)
        
        np.random.shuffle(data)

        dataset = np.append(dataset, data[0:5000], axis=0)
        print("Load {}".format(d_path))
    
    np.random.shuffle(dataset)
    dataset_len = len(dataset)
    split_x = (int)(dataset_len * 0.9)
    
    print("Dataset {} images".format(dataset_len))
    print("Train {} images".format(split_x))
    print("Test {} images".format(dataset_len - split_x))
    
    print("Write data to pickle files...")
    
    pickle.dump(dataset[0:split_x], open(os.path.join(data_dir, "train.pickle"), "wb"))
    pickle.dump(dataset[split_x:-1], open(os.path.join(data_dir, "test.pickle"), "wb"))
    
    print("Finish")

if not os.path.exists(os.path.join(data_dir, "train.pickle")):
    print("Prepare pickle data")
    data_prepare()

In [4]:
x_train = None
y_train = None
x_test = None
y_test = None

def load_dataset():
    global x_train, y_train, x_test, y_test
    train_data = pickle.load(open(os.path.join(data_dir, "train.pickle"), "rb"))
    test_data = pickle.load(open(os.path.join(data_dir, "test.pickle"), "rb"))
    x_train = train_data[:,1:]
    y_train = train_data[:,0]
    x_test = test_data[:,1:]
    y_test = test_data[:,0]
    print("Load dataset complete")

load_dataset()

Load dataset complete


In [None]:
for i in range(10):
    plt.imshow(x_train[i].reshape(28,28))
    plt.title(LABELS[(int)(y_train[i])])
    plt.show()

### Data Prepossing

In [6]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

Using TensorFlow backend.


In [7]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(45000, 28, 28, 1)
(45000, 10)
(4999, 28, 28, 1)
(4999, 10)


### Model

In [None]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(img_rows, img_cols, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adadelta(),
    metrics=['accuracy']
)

model.fit(
    x_train, y_train,
    batch_size = 32,
    epochs = 2,
    verbose = 1
)

model.save(os.path.join(model_dir, 'model.h5'))

Epoch 1/2
Epoch 2/2

In [13]:
model = keras.models.load_model(os.path.join(model_dir, 'model.h5'))

score = model.evaluate(x_test, y_test, verbose = 1)
print("\nAccuracy {}".format(score[1]))

Accuracy 0.893978795759
