# くずし字

## KMNIST Data Setup

くずし字　[KMNIST Dataset description](http://codh.rois.ac.jp/kmnist/)

[Resources for hentaigana](https://wakancambridge.files.wordpress.com/2017/05/useful-resources-for-the-study-of-hentaigana-with-recommended1.pdf)
<img src="images/hentaigana.png" alt="hentaigana chart" width="48%" align="left"/>

[Dataset on github:](https://github.com/rois-codh/kmnist)

| File            | Examples | Download (MNIST format)    | Download (NumPy format)      |
|-----------------|--------------------|----------------------------|------------------------------|
| Training images | 60,000             | [train-images-idx3-ubyte.gz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz) (18MB) | [kmnist-train-imgs.npz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz) (18MB)   |
| Training labels | 60,000             | [train-labels-idx1-ubyte.gz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz) (30KB) | [kmnist-train-labels.npz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-labels.npz) (30KB)  |
| Testing images  | 10,000             | [t10k-images-idx3-ubyte.gz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz) (3MB) | [kmnist-test-imgs.npz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-imgs.npz) (3MB)   |
| Testing labels  | 10,000             | [t10k-labels-idx1-ubyte.gz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz) (5KB)  | [kmnist-test-labels.npz](http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-labels.npz) (5KB) |

In [None]:
from pathlib import Path
import requests
import gzip

import struct
from pathlib import Path
import numpy as np
import pandas as pd
from io import BytesIO
from PIL import Image as pi
from ipywidgets import HBox, VBox, Layout, HTML
from ipywidgets import Image as Image_widget

try:
    from fastai.vision import *
    from fastai.metrics import error_rate
    fastai_imported = True
except Exception as ex:
    print('Switch to fastapi-cpu kernel to train model.')
    fastai_imported = False

In [None]:
from pathlib import Path
import requests

DATA_PATH = Path("../data/raw")
PATH = DATA_PATH / "kmnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "http://codh.rois.ac.jp/kmnist/dataset/kmnist/"
FILENAMES = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 
             't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

for FILENAME in FILENAMES:
    if not (PATH / FILENAME).exists():
            content = requests.get(URL + FILENAME).content
            (PATH / FILENAME).open("wb").write(content)

In [None]:
def extract_kmnist():
    metadata = []
    
    with gzip.open(PATH / 't10k-labels-idx1-ubyte.gz', 'rb') as fp:
        magic, size = struct.unpack(">II", fp.read(8))
        labels = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
        print(magic, size)

    with gzip.open(PATH / 't10k-images-idx3-ubyte.gz', 'rb') as fp:
        magic, size = struct.unpack(">II", fp.read(8))
        nrows, ncols = struct.unpack(">II", fp.read(8))
        data = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
        data = data.reshape((size, nrows, ncols))
        print(magic, size, nrows, ncols)

    VALID_PATH = PATH / 'valid'
    VALID_PATH.mkdir(parents=True, exist_ok=True)
    for n,label in zip(range(len(data)), labels):
        im = pi.fromarray(data[n,:,:])
        outfilename = str(n) + '.png'
        outfile = VALID_PATH / outfilename
        im.save(outfile, format='png')
        metadata.append(['valid/' + outfilename, label])

    # Now Training data.
    with gzip.open(PATH / 'train-labels-idx1-ubyte.gz', 'rb') as fp:
        magic, size = struct.unpack(">II", fp.read(8))
        labels = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
        print(magic, size)

    with gzip.open(PATH / 'train-images-idx3-ubyte.gz', 'r') as fp:
        magic, size = struct.unpack(">II", fp.read(8))
        nrows, ncols = struct.unpack(">II", fp.read(8))
        data = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
        data = data.reshape((size, nrows, ncols))
        print(magic, size, nrows, ncols)
        
    TRAIN_PATH = PATH / 'train'
    TRAIN_PATH.mkdir(parents=True, exist_ok=True)
    for n,label in zip(range(len(data)), labels):
        im = pi.fromarray(data[n,:,:])
        outfilename = str(n) + '.png'
        outfile = TRAIN_PATH / outfilename
        im.save(outfile, format='png')
        metadata.append(['train/' + outfilename, label])

    metadata_df = pd.DataFrame(metadata, columns=['name', 'label'])
    metadata_df.to_csv(PATH / 'labels.csv', index=False)

In [None]:
if not (PATH / 'train').exists():
    extract_kmnist()

In [None]:
if fastai_imported == True:
    data = ImageDataBunch.from_csv(PATH)
    data.show_batch(rows=3, figsize=(5,5))

In [None]:
if fastai_imported == True:
    learn = cnn_learner(data, models.resnet50, metrics=accuracy)
    if not learn.load('kmnist-stage-2-50'):
        print('Could not load model, training instead.')
        learn.fit(4)
        learn.save('kmnist-stage-1-50');

<img src="images/learning_rate_example.png" alt="learning rate chart" width="40%" align="rigth"/>

In [None]:
# learn = cnn_learner(data, models.resnet50, metrics=accuracy)
# learn.fit(4, 3e-3)
# learn.save('kmnist-stage-1-50');
# learn.lr_find()
# learn.recorder.plot()
# learn.unfreeze()
# learn.fit_one_cycle(4, slice(3e-5, 3e-4))
# learn.save('kmnist-stage-2-50');

In [None]:
if fastai_imported == True:
    interp = ClassificationInterpretation.from_learner(learn)
    losses,idxs = interp.top_losses()
    len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
if fastai_imported == True:
    interp.plot_top_losses(9, figsize=(15,11))

In [None]:
if fastai_imported == True:
    interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
with gzip.open(PATH / 't10k-images-idx3-ubyte.gz', 'rb') as fp:
    magic, size = struct.unpack(">II", fp.read(8))
    nrows, ncols = struct.unpack(">II", fp.read(8))
    data_raw = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
    data_raw = data_raw.reshape((size, nrows, ncols))
    print(magic, size, nrows, ncols)

images = [pi.fromarray(data_raw[n,:,:]) for n in range(len(data_raw))]

with gzip.open(PATH / 't10k-labels-idx1-ubyte.gz', 'rb') as fp:
    magic, size = struct.unpack(">II", fp.read(8))
    labels = np.frombuffer(fp.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
    print(magic, size)

In [None]:
images = [pi.fromarray(data_raw[n,:,:]) for n in range(len(data_raw))]

In [None]:
images[0]

In [None]:
images = []
for n in range(len(data_raw)):
    b = BytesIO()
    im = pi.fromarray(data_raw[n,:,:])
    im.save(b, format='png')
    images.append(b.getvalue())

In [None]:
Image_widget(value=images[0])

In [None]:
# Grid of relearned kanji images.
z_container_layout = Layout(border='0px solid  grey', width='50px', length='50px', margin='0px 0px 0px 0px')
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='flex-start')
c_container_layout = Layout(border='0px solid black', width='50%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig1 = VBox(children=[HBox(children=[VBox(children=[Image_widget(value=image, layout=z_container_layout)], layout=a_container_layout) 
                     for image in images[10*m:10*m+10]], layout=b_container_layout) for m in range(13)], layout=c_container_layout)

In [None]:
# Grid of labels.
hiragana = ['お', 'き', 'す', 'つ', 'な', 'は', 'ま', 'や', 'れ', 'を']
h_labels = [hiragana[l] for l in labels]
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='space-between')
c_container_layout = Layout(border='0px solid black', width='50%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig2 = VBox(children=[HBox(children=[HBox(children=[HTML(value=str(label))], layout=a_container_layout) 
                     for label in h_labels[no_boxes_per_line*m:no_boxes_per_line*m+no_boxes_per_line]], layout=b_container_layout) for m in range(13)],
     layout=c_container_layout)

In [None]:
HBox(children=[fig1, fig2], layout=Layout(border='0px solid black', justify_content='space-around'))

In [None]:
character_images = []
for label in [0, 1, 2, 3, 4, 5, 6, 7, 8 , 9]:
    subscripts = [n for n,l in enumerate(labels) if l == label]
    character_images.extend([images[n] for n in subscripts[0:30]])

In [None]:
len(character_images)

In [None]:
# Grid of relearned kanji images.
z_container_layout = Layout(border='0px solid  grey', width='80px', length='80px', margin='0px 0px 0px 0px')
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='flex-start')
c_container_layout = Layout(border='0px solid black', width='100%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig3 = VBox(children=[HBox(children=[VBox(children=[Image_widget(value=image, layout=z_container_layout)], layout=a_container_layout) 
                     for image in character_images[30*m:30*m+30]], layout=b_container_layout) for m in range(10)], layout=c_container_layout)

In [None]:
fig3

In [None]:
z_container_layout = Layout(border='0px solid  grey', width='80px', length='80px', margin='0px 0px 0px 0px')
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='flex-start')
c_container_layout = Layout(border='0px solid black', width='30%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig4 = VBox(children=[HBox(children=[VBox(children=[Image_widget(value=image, layout=z_container_layout)], layout=a_container_layout) 
                     for image in character_images[10*m:10*m+10]], layout=b_container_layout) for m in range(10)], layout=c_container_layout)

In [None]:
fig4

In [None]:
b = BytesIO()
im = pi.open('images/200014735/image/200014735_00014.jpg')
im.save(b, format='png')

In [None]:
box = (1000, 820, 5300, 2950)
region = im.crop(box)
imgByteArr = BytesIO()
region.save(imgByteArr, format='PNG')
imgByteArr = imgByteArr.getvalue()

In [None]:
HBox(children=[fig4, VBox(children=[Image_widget(value=imgByteArr)], layout=Layout(width='60%'))], layout=Layout(border='0px solid black', justify_content='space-around'))

In [None]:
['お', 'き', 'す', 'つ', 'な', 'は', 'ま', 'や', 'れ', 'を']

In [None]:
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw

font = ImageFont.truetype("/System/Library/Fonts/ヒラギノ明朝 ProN.ttc",40)
img=Image.new("RGBA", (500,50),(255,255,255))
draw = ImageDraw.Draw(img)
draw.text((0, 0),"お き す つ な は ま や れ を",(0,0,0),font=font)
draw = ImageDraw.Draw(img)
# img.save("a_test.png")

In [None]:
img

In [None]:
font = ImageFont.truetype("/System/Library/Fonts/ヒラギノ明朝 ProN.ttc",40)
img=Image.new("RGBA", (520,50),(0,0,0))
draw = ImageDraw.Draw(img)
draw.text((0, 0),"お き す つ な は ま や れ を",(255,255,255),font=font)
draw = ImageDraw.Draw(img)

In [None]:
img

In [None]:
font = ImageFont.truetype("/System/Library/Fonts/ヒラギノ明朝 ProN.ttc", 28)
modern_images = []
for char in ['お', 'き', 'す', 'つ', 'な', 'は', 'ま', 'や', 'れ', 'を']:
    img=Image.new("L", (28,28), 0)
    draw = ImageDraw.Draw(img)
    draw.text((0, 0), char, 255, font=font)
    draw = ImageDraw.Draw(img)
    b = BytesIO()
    img.save(b, format='png')
    modern_images.append(b.getvalue())

In [None]:
character_images = []
for label in [0, 1, 2, 3, 4, 5, 6, 7, 8 , 9]:
    subscripts = [n for n,l in enumerate(labels) if l == label]
    character_images.extend([images[n] for n in subscripts[0:10]])

for n in range(10):
    character_images[10*n] = modern_images[n]    

In [None]:
z_container_layout = Layout(border='0px solid  grey', width='80px', length='80px', margin='0px 0px 0px 0px')
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='flex-start')
c_container_layout = Layout(border='0px solid black', width='33%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig5 = VBox(children=[HBox(children=[VBox(children=[Image_widget(value=image, layout=z_container_layout)], layout=a_container_layout) 
                     for image in character_images[10*m:10*m+10]], layout=b_container_layout) for m in range(10)], layout=c_container_layout)

In [None]:
HBox(children=[fig4, fig5], layout=Layout(border='1px solid red', justify_content='space-around'))

In [None]:
HBox(children=[fig5, VBox(children=[Image_widget(value=imgByteArr)], layout=Layout(width='66%'))], 
     layout=Layout(border='0px solid black', justify_content='space-around'))

In [None]:
character_images = []
for label in [0, 1, 2, 3, 4, 5, 6, 7, 8 , 9]:
    subscripts = [n for n,l in enumerate(labels) if l == label]
    character_images.extend([images[n] for n in subscripts[0:30]])

for n in range(10):
    character_images[30*n] = modern_images[n]

In [None]:
# Grid of relearned kanji images.
z_container_layout = Layout(border='0px solid  grey', width='80px', length='80px', margin='0px 0px 0px 0px')
a_container_layout = Layout(border='0px solid red')
b_container_layout = Layout(border='0px solid green', justify_content='flex-start')
c_container_layout = Layout(border='0px solid black', width='100%', flex_direction='column', justify_content='space-around')
no_boxes_per_line = 10
fig6 = VBox(children=[HBox(children=[VBox(children=[Image_widget(value=image, layout=z_container_layout)], layout=a_container_layout) 
                     for image in character_images[30*m:30*m+30]], layout=b_container_layout) for m in range(10)], layout=c_container_layout)

In [None]:
fig6