# Optical Character Recognition for Japanese Character (Hiragana)
## Data Science Questions
- What character is this handwriting?
- Multiple Classification (A, B, .....or z?)
- Machine Learning Model: kNN, Naive Bayes, Random Forest

In [39]:
import numpy as np
import scipy.misc
from sklearn.model_selection import train_test_split

## kNN

## Every single character; 71 classes

In [40]:
nb_classes = 71 # every sigle character
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [41]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(9088, 1024)

In [42]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(2272, 1024)

In [43]:
from sklearn.neighbors import KNeighborsClassifier
import time
from sklearn.metrics import accuracy_score

In [44]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---43.304912090301514s


In [45]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

0.5448943661971831% accurate


# Regualr characters; 46 classes

In [46]:
nb_classes = 46 # regular charcters
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("regular_hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [47]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(5888, 1024)

In [48]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(1472, 1024)

In [49]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---17.14592671394348s


In [50]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

0.6766304347826086% accurate


# Voiced characters; 25 classes

In [51]:
nb_classes = 25 # regular charcters
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("voiced_hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [52]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(3200, 1024)

In [53]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(800, 1024)

In [54]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---4.860741138458252s


In [55]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

0.61875% accurate
