# Optical Character Recognition for Japanese Character (Hiragana)
## Dataset
- ETL Character Database: ETL-8 (number of writers: 160, number of characters: 956)

## Data Science Questions
- What character is this handwriting?
- Multiple Classification (A, B, .....or Z?)
- Machine Learning Model: kNN, Random Forest

In [56]:
import numpy as np
import scipy.misc
from sklearn.model_selection import train_test_split

## kNN

## Every single character; 71 classes

In [57]:
nb_classes = 71 # every single character
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [58]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(9088, 1024)

In [59]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(2272, 1024)

In [60]:
from sklearn.neighbors import KNeighborsClassifier
import time
from sklearn.metrics import accuracy_score

In [61]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---42.515969038009644s


In [62]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

54.84154929577465% accurate


# Regualr characters; 46 classes

In [63]:
nb_classes = 46 # regular charcters
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("regular_hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [64]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(5888, 1024)

In [65]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(1472, 1024)

In [66]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---17.093602895736694s


In [67]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

69.36141304347827% accurate


# Voiced characters; 25 classes

In [68]:
nb_classes = 25 # regular charcters
# input image dimensions
img_rows, img_cols = 32, 32
# img_rows, img_cols = 127, 128

ary = np.load("voiced_hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15
X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
for i in range(nb_classes * 160):
    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
    # X_train[i] = ary[i]
Y_train = np.repeat(np.arange(nb_classes), 160)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2)

In [69]:
# change demention
number_of_samples, nx, ny = X_train.shape
train_dataset = X_train.reshape((number_of_samples,nx*ny))
train_dataset.shape

(3200, 1024)

In [70]:
# change demention
number_of_samples, nx, ny = X_test.shape
test_dataset = X_test.reshape((number_of_samples,nx*ny))
test_dataset.shape

(800, 1024)

In [71]:
start_time = time.time()
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(train_dataset, y_train)
predict = clf.predict(test_dataset)
end_time = time.time()
print("---{}s".format(end_time - start_time))

---4.916400909423828s


In [72]:
accuracy = accuracy_score(y_test, predict)
print("{}% accurate".format(accuracy * 100))

61.625% accurate
