In [1]:
import glob

from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from PIL import Image
import keras

In [2]:
image_paths = glob.glob('./output/*.png')
image_paths

['./output/a39a.png',
 './output/1f64.png',
 './output/1a02.png',
 './output/0b2f.png',
 './output/1804.png',
 './output/2cc6.png',
 './output/2d3a.png',
 './output/2cb2.png',
 './output/1a16.png',
 './output/1faa.png',
 './output/0b08.png',
 './output/1810.png',
 './output/119d.png',
 './output/1f70.png',
 './output/2c79.png',
 './output/2c4c.png',
 './output/2dd4.png',
 './output/a38e.png',
 './output/0e46.png',
 './output/0298.png',
 './output/18f3.png',
 './output/1186.png',
 './output/1f6b.png',
 './output/2c51.png',
 './output/a397.png',
 './output/2d37.png',
 './output/1a0d.png',
 './output/1838.png',
 './output/180b.png',
 './output/0b20.png',
 './output/1fa7.png',
 './output/2cbd.png',
 './output/2d23.png',
 './output/2c45.png',
 './output/a383.png',
 './output/2ddb.png',
 './output/0e52.png',
 './output/0bee.png',
 './output/1192.png',
 './output/1179.png',
 './output/114c.png',
 './output/16d4.png',
 './output/0e8c.png',
 './output/0267.png',
 './output/2c9d.png',
 './output

In [3]:
X = np.array(Image.open(image_paths[0])).reshape(1, 28, 28)
for image_path in image_paths[1:]:
    X = np.concatenate((X, np.array(Image.open(image_path)).reshape(1, 28, 28)))

X.shape

(12828, 28, 28)

In [4]:
X = X.astype('float32') / 255

In [5]:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import optimizers
from keras import backend as K

input_img = Input(shape=(28, 28, 1))

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(4, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)

# x = Flatten()(x),
# encoded = Dense(16, activation='relu')(x),
# x = Dense(64, activation='relu')(encoded),
# x = Reshape([4, 4, 4], input_shape=(64,))(x),

x = Conv2D(4, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 16)        160       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 8)         1160      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 8)           0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 4)           292       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 4)           0     

In [6]:
model = autoencoder
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9)
model.compile(optimizer=sgd, loss='binary_crossentropy')

model.fit(X, X,
          epochs=25,
          batch_size=16,
          shuffle=True)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x13eae7110>

In [7]:
prediction_model = keras.Sequential()
for i in range(7):
    prediction_model.add(model.layers[i])

prediction_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 16)        160       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 8)         1160      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 8)           0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 4)           292       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 4)           0         
Total params: 1,612
Trainable params: 1,612
Non-trainable params: 0
______________________________________________________

In [8]:
v = prediction_model.predict_on_batch(X.reshape(-1, 28, 28, 1))
v.shape

(12828, 4, 4, 4)

In [9]:
df = pd.DataFrame(v.reshape(-1, v.shape[1] * v.shape[2] * v.shape[3]))
code = [path.split('/')[2].split('.')[0] for path in image_paths]
df = df.assign(Code=code)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,Code
0,0.122313,1.087518,0.000000,1.268470,3.679622,4.010160,0.000000,5.043204,2.624470,1.823944,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,a39a
1,0.235773,1.591107,0.000000,1.074342,3.119168,4.061254,0.000000,2.766952,2.060194,1.083211,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,1f64
2,0.202460,0.623280,0.000000,0.461253,0.829757,0.380272,0.176684,0.724164,0.431529,0.650142,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,1a02
3,0.390531,1.186383,0.000000,0.743788,1.852367,2.047860,0.000000,3.780098,1.506696,1.184449,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,0b2f
4,0.080888,0.516111,0.000000,0.704435,1.679603,2.085731,0.000000,2.160766,0.653180,0.847321,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,1804
5,0.147023,0.565901,0.000000,0.503410,0.909958,1.204132,0.000000,1.017088,0.842522,0.710913,...,0.583558,0.108954,0.527491,0.067597,0.551539,0.043329,0.190522,0.028578,0.490774,2cc6
6,0.267012,0.834626,0.000000,0.795506,2.103314,1.535362,0.000000,2.695942,1.323042,0.591649,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,2d3a
7,0.370144,0.728472,0.000000,0.750826,1.978030,1.243275,0.325313,1.231810,0.739678,0.673404,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,2cb2
8,0.486709,0.540050,0.000000,0.575943,1.180828,0.549761,0.000000,0.254759,1.120549,0.685078,...,0.491451,0.031572,0.507155,0.011767,0.493734,0.043329,0.190522,0.028578,0.490774,1a16
9,0.644648,4.579720,0.064800,1.189977,3.469482,5.135531,0.000000,4.250576,2.106666,1.232667,...,0.498496,0.539712,0.692774,0.093628,0.618884,0.043329,0.190522,0.028578,0.490774,1faa


In [11]:
from sklearn.decomposition import PCA

pca = PCA().fit(v.reshape(-1, v.shape[1] * v.shape[2] * v.shape[3]))
cumulative_contribution_ratio = pca.explained_variance_ratio_.cumsum()
cumulative_contribution_ratio

array([0.4579105 , 0.62828934, 0.7334829 , 0.79893106, 0.8497356 ,
       0.8730796 , 0.89345056, 0.9115216 , 0.92367345, 0.93466765,
       0.9446089 , 0.95182234, 0.9586344 , 0.9640743 , 0.96928203,
       0.9726888 , 0.9757795 , 0.97838145, 0.9808539 , 0.9831015 ,
       0.9851157 , 0.9868203 , 0.98820055, 0.9894499 , 0.99064356,
       0.9917271 , 0.99273115, 0.9935545 , 0.99429053, 0.9949416 ,
       0.99557066, 0.9961801 , 0.9966738 , 0.99709606, 0.99748296,
       0.9978239 , 0.9981113 , 0.99837875, 0.9986328 , 0.9988656 ,
       0.9990639 , 0.9992284 , 0.9993828 , 0.99952936, 0.9996592 ,
       0.9997484 , 0.9998217 , 0.9998818 , 0.99992365, 0.9999459 ,
       0.99996626, 0.99997514, 0.999982  , 0.9999876 , 0.9999923 ,
       0.999996  , 0.9999977 , 0.9999988 , 0.9999993 , 0.9999996 ,
       0.9999997 , 0.99999976, 0.9999998 , 0.9999998 ], dtype=float32)

In [12]:
decomposed_v = PCA(n_components=16).fit_transform(v.reshape(-1, v.shape[1] * v.shape[2] * v.shape[3]))
df = pd.DataFrame(decomposed_v)
code = [path.split('/')[2].split('.')[0] for path in image_paths]
df = df.assign(Code=code)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,Code
0,3.106442,1.841746,1.471731,1.175872,-1.668063,-1.479198,-0.399084,-1.729137,0.536944,0.093548,-0.525621,-0.528956,0.192087,-0.284958,0.002506,-0.470604,a39a
1,0.654448,-0.251579,0.937545,1.817708,-0.511268,0.890338,0.296542,-1.003554,0.631095,-0.601195,0.566769,0.570161,0.231691,0.354147,-0.136003,-0.171589,1f64
2,-6.189570,-2.018452,-1.200590,1.836893,-0.286404,0.633353,0.958065,0.408850,-0.697468,-0.025899,0.015872,-0.021477,0.187608,0.286263,-0.342344,0.153008,1a02
3,2.998296,-0.346382,-2.075089,-1.943368,0.704311,-0.808703,-0.168690,-1.665234,-0.586548,-0.548458,-0.855040,0.366730,-0.277910,-0.127139,0.490649,-0.061825,0b2f
4,-6.066229,1.619178,0.018200,0.702063,-0.835954,0.055822,0.144378,-0.907101,-1.359327,0.419818,0.150712,0.239901,-0.234855,0.154709,0.271618,0.022399,1804
5,-6.538302,-0.255378,0.007245,1.892340,0.485622,-0.276396,-1.120747,0.126411,-0.422665,0.747624,0.504343,-0.227303,0.312870,-0.198956,0.672300,0.589360,2cc6
6,-2.037426,-1.682822,1.859403,0.169517,-0.649945,-0.991639,0.296762,0.516223,0.529198,-0.050331,-0.838102,0.330741,0.364350,-0.253398,-0.427752,-0.208479,2d3a
7,-5.333674,1.750943,-1.918792,-2.751790,-0.080464,0.983234,0.397305,0.263313,0.834899,0.436256,-0.609805,-0.288871,-0.086332,0.465225,-0.239532,-0.050083,2cb2
8,-0.905101,-4.282963,-2.967629,0.220225,0.368117,1.364718,0.098975,-0.493194,0.399301,-0.804985,0.069234,-0.319924,0.126542,0.334417,-0.140512,0.089771,1a16
9,2.267342,0.986296,3.585218,2.607619,0.588987,0.544692,0.949983,0.642096,0.309071,-0.853858,0.685863,0.370527,1.158023,-0.041958,0.786542,-0.103272,1faa


In [13]:
ord('S')

83

In [14]:
df[df['Code'] == '0083']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,Code
12697,8.971355,2.442003,-3.681734,1.233327,1.933487,2.665909,-0.150377,2.894092,-0.291634,2.672046,-2.332411,0.086173,0.307735,0.042796,1.491852,0.313369,83


In [16]:
s_vec = np.array(df[df['Code'] == '0083'].iloc[:, 0:16])[0]
s_vec.shape

(16,)

In [18]:
def cos_sims(sc_vec):
    res = []
    for idx, row in df.iterrows():
        v1 = row.iloc[0:16]
        cos_sim = np.dot(v1, sc_vec) / (np.linalg.norm(v1) * np.linalg.norm(sc_vec))
        res.append([cos_sim, row['Code']])
    return res

In [20]:
search_vec = np.array(df[df['Code'] == '0fd6'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x0fd6', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f accuracy)' % (candidate, chr(int('0x' + candidate, base=16)), score))

࿖ に類似する見た目の文字：
0fd6: ࿖  (1.000000 accuracy)
0fd8: ࿘  (0.996058 accuracy)
24fe: ⓾  (0.964396 accuracy)
270c: ✌  (0.964000 accuracy)
24f8: ⓸  (0.957751 accuracy)
0fd5: ࿕  (0.952654 accuracy)
24fa: ⓺  (0.952103 accuracy)
24fd: ⓽  (0.951915 accuracy)
24f9: ⓹  (0.951811 accuracy)
2473: ⑳  (0.951671 accuracy)
0bf5: ௵  (0.951220 accuracy)
246a: ⑪  (0.951106 accuracy)
26fe: ⛾  (0.950702 accuracy)
278e: ➎  (0.950461 accuracy)
0fd7: ࿗  (0.950323 accuracy)
246d: ⑭  (0.949502 accuracy)
24f5: ⓵  (0.949401 accuracy)
24fc: ⓼  (0.949156 accuracy)
206e: ⁮  (0.948605 accuracy)
278c: ➌  (0.948133 accuracy)
2472: ⑲  (0.947828 accuracy)
269c: ⚜  (0.947510 accuracy)
206f: ⁯  (0.947163 accuracy)
2791: ➑  (0.945987 accuracy)
24f6: ⓶  (0.945937 accuracy)
278b: ➋  (0.945792 accuracy)
278f: ➏  (0.945412 accuracy)
2469: ⑩  (0.945351 accuracy)
24fb: ⓻  (0.944977 accuracy)
2792: ➒  (0.944932 accuracy)
24f7: ⓷  (0.944904 accuracy)
246f: ⑯  (0.943887 accuracy)
2a37: ⨷  (0.943792 accuracy)
246b: ⑫  (0.941264 accuracy)

In [23]:
search_vec = np.array(df[df['Code'] == '004c'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x004c', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

L に類似する見た目の文字：
a4e1: ꓡ  (1.000000 %)
13de: Ꮮ  (1.000000 %)
004c: L  (1.000000 %)
2cd0: Ⳑ  (0.995905 %)
14aa: ᒪ  (0.995347 %)
a6da: ꛚ  (0.992697 %)
053c: Լ  (0.992605 %)
1e36: Ḷ  (0.980792 %)
013f: Ŀ  (0.978215 %)
abae: ꮮ  (0.972701 %)
029f: ʟ  (0.972701 %)
a746: Ꝇ  (0.971995 %)
2cd1: ⳑ  (0.967773 %)
221f: ∟  (0.959293 %)
1602: ᘂ  (0.955071 %)
1490: ᒐ  (0.955071 %)
00bf: ¿  (0.950733 %)
ffa4: ﾤ  (0.947918 %)
a6e2: ꛢ  (0.936310 %)
1ca1: Ს  (0.935910 %)
10e1: ს  (0.935910 %)
a6b3: ꚳ  (0.932501 %)
013b: Ļ  (0.928695 %)
06ce: ێ  (0.928314 %)
07c7: ߇  (0.926541 %)
063d: ؽ  (0.925057 %)
227d: ≽  (0.924393 %)
2d3e: ⴾ  (0.922373 %)
29a3: ⦣  (0.920853 %)
10b1: Ⴑ  (0.920074 %)
2c97: ⲗ  (0.916013 %)
063f: ؿ  (0.912526 %)
06d3: ۓ  (0.912517 %)
a492: ꒒  (0.912222 %)
13d3: Ꮣ  (0.911868 %)
2d34: ⴴ  (0.910775 %)
231e: ⌞  (0.910654 %)
25fa: ◺  (0.910400 %)
a721: ꜡  (0.909772 %)
1d64: ᵤ  (0.909480 %)
2099: ₙ  (0.908639 %)
227f: ≿  (0.906153 %)
063e: ؾ  (0.905431 %)
06bd: ڽ  (0.905141 %)
1efc: Ỽ  (0.90444

In [24]:
search_vec = np.array(df[df['Code'] == '0052'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x0052', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

R に類似する見た目の文字：
a4e3: ꓣ  (1.000000 %)
0052: R  (1.000000 %)
1e5a: Ṛ  (0.957866 %)
1ca9: Ჩ  (0.933543 %)
10e9: ჩ  (0.933543 %)
2d0c: ⴌ  (0.923481 %)
13d2: Ꮢ  (0.910768 %)
0494: Ҕ  (0.905420 %)
02ad: ʭ  (0.902464 %)
13f2: Ᏺ  (0.900712 %)
2a43: ⩃  (0.900004 %)
2c64: Ɽ  (0.892653 %)
10ac: Ⴌ  (0.892532 %)
1c5e: ᱞ  (0.881261 %)
053e: Ծ  (0.877575 %)
1fb1: ᾱ  (0.873972 %)
a7c7: Ꟈ  (0.871768 %)
13a1: Ꭱ  (0.871699 %)
028e: ʎ  (0.868829 %)
0266: ɦ  (0.867949 %)
a27f: ꉿ  (0.864683 %)
04e8: Ө  (0.864493 %)
0472: Ѳ  (0.864493 %)
03f4: ϴ  (0.864493 %)
019f: Ɵ  (0.864493 %)
042e: Ю  (0.862574 %)
a4e8: ꓨ  (0.862002 %)
042f: Я  (0.861585 %)
10a3: Ⴃ  (0.857543 %)
a7a8: Ꞩ  (0.856353 %)
2c02: Ⰲ  (0.852408 %)
a4d0: ꓐ  (0.850959 %)
13f4: Ᏼ  (0.850959 %)
0412: В  (0.850959 %)
0392: Β  (0.850959 %)
0042: B  (0.850959 %)
211e: ℞  (0.850511 %)
0182: Ƃ  (0.850398 %)
a668: Ꙩ  (0.848251 %)
0398: Θ  (0.848251 %)
0298: ʘ  (0.848251 %)
2647: ♇  (0.847379 %)
1c5c: ᱜ  (0.845646 %)
13eb: Ꮻ  (0.842631 %)
0156: Ŗ  (0.84231

In [26]:
search_vec = np.array(df[df['Code'] == '0079'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x0079', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

y に類似する見た目の文字：
0079: y  (1.000000 %)
ab5a: ꭚ  (0.996572 %)
10e7: ყ  (0.985214 %)
1e8f: ẏ  (0.982449 %)
0ed8: ໘  (0.977707 %)
1e73: ṳ  (0.977487 %)
1ef5: ỵ  (0.975661 %)
1fb3: ᾳ  (0.975567 %)
2d18: ⴘ  (0.972671 %)
1e77: ṷ  (0.966272 %)
10e4: ფ  (0.966099 %)
10d5: ვ  (0.963923 %)
0581: ց  (0.963332 %)
0261: ɡ  (0.963332 %)
a76f: ꝯ  (0.962651 %)
1d8c: ᶌ  (0.960398 %)
0173: ų  (0.960001 %)
1ee5: ụ  (0.957244 %)
018d: ƍ  (0.957129 %)
1d8d: ᶍ  (0.955795 %)
1d99: ᶙ  (0.954627 %)
10df: ჟ  (0.954463 %)
10f8: ჸ  (0.952431 %)
01eb: ǫ  (0.950737 %)
10ea: ც  (0.950524 %)
024b: ɋ  (0.948481 %)
1e75: ṵ  (0.948227 %)
01b4: ƴ  (0.948091 %)
057e: վ  (0.947706 %)
2d1e: ⴞ  (0.947627 %)
1ecd: ọ  (0.946740 %)
0758: ݘ  (0.945819 %)
04ab: ҫ  (0.945190 %)
10e3: უ  (0.944951 %)
015f: ş  (0.944050 %)
0263: ɣ  (0.942355 %)
0219: ș  (0.941879 %)
0499: ҙ  (0.939367 %)
051b: ԛ  (0.939141 %)
0071: q  (0.939141 %)
1ea1: ạ  (0.939057 %)
10fd: ჽ  (0.938963 %)
ab53: ꭓ  (0.938777 %)
03c2: ς  (0.937409 %)
10f7: ჷ  (0.93736

In [27]:
search_vec = np.array(df[df['Code'] == '006f'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x006f', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

o に類似する見た目の文字：
1d0f: ᴏ  (1.000000 %)
0ed0: ໐  (1.000000 %)
0585: օ  (1.000000 %)
043e: о  (1.000000 %)
03bf: ο  (1.000000 %)
006f: o  (1.000000 %)
0e84: ຄ  (0.989822 %)
0e8d: ຍ  (0.989710 %)
03b1: α  (0.988955 %)
0e50: ๐  (0.988048 %)
237a: ⍺  (0.987319 %)
0e9a: ບ  (0.983164 %)
0e22: ย  (0.982518 %)
222a: ∪  (0.980065 %)
1ecd: ọ  (0.979523 %)
0e86: ຆ  (0.979504 %)
0e89: ຉ  (0.978662 %)
1d1c: ᴜ  (0.977479 %)
03c5: υ  (0.977479 %)
abad: ꮭ  (0.976825 %)
0e99: ນ  (0.976167 %)
0251: ɑ  (0.975858 %)
029a: ʚ  (0.975394 %)
0e81: ກ  (0.974443 %)
057d: ս  (0.974301 %)
0075: u  (0.974301 %)
022f: ȯ  (0.973956 %)
a79f: ꞟ  (0.973574 %)
0e8f: ຏ  (0.971180 %)
ab70: ꭰ  (0.970821 %)
0ea7: ວ  (0.969742 %)
ab9b: ꮛ  (0.969693 %)
ab64: ꭤ  (0.968576 %)
a79d: ꞝ  (0.967671 %)
0ed7: ໗  (0.967433 %)
0e94: ດ  (0.967308 %)
abaf: ꮯ  (0.965382 %)
1d04: ᴄ  (0.965382 %)
0441: с  (0.965382 %)
03f2: ϲ  (0.965382 %)
0063: c  (0.965382 %)
050d: ԍ  (0.964635 %)
abaa: ꮪ  (0.964191 %)
a731: ꜱ  (0.964191 %)
0455: ѕ  (0.96419

In [28]:
search_vec = np.array(df[df['Code'] == '0068'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x0068', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

h に類似する見た目の文字：
0068: h  (1.000000 %)
1e23: ḣ  (0.975681 %)
1e25: ḥ  (0.966625 %)
0570: հ  (0.936403 %)
04ba: Һ  (0.936403 %)
028e: ʎ  (0.926064 %)
a694: Ꚕ  (0.924225 %)
056b: ի  (0.912628 %)
1c9d: Ო  (0.903540 %)
1e45: ṅ  (0.902866 %)
13c2: Ꮒ  (0.888352 %)
10b9: Ⴙ  (0.881372 %)
2c8e: Ⲏ  (0.876936 %)
2c94: Ⲕ  (0.874594 %)
1c60: ᱠ  (0.874031 %)
01f9: ǹ  (0.873883 %)
a4e7: ꓧ  (0.869465 %)
13bb: Ꮋ  (0.869465 %)
041d: Н  (0.869465 %)
0397: Η  (0.869465 %)
0048: H  (0.869465 %)
a6ce: ꛎ  (0.867773 %)
00df: ß  (0.864468 %)
a44d: ꑍ  (0.863205 %)
a727: ꜧ  (0.862397 %)
0195: ƕ  (0.860729 %)
0266: ɦ  (0.857522 %)
043f: п  (0.855861 %)
1500: ᔀ  (0.853368 %)
a743: ꝃ  (0.852835 %)
1e22: Ḣ  (0.850745 %)
22c2: ⋂  (0.847946 %)
1e27: ḧ  (0.847601 %)
a6b8: ꚸ  (0.846287 %)
a4f5: ꓵ  (0.844815 %)
0548: Ո  (0.844815 %)
1c55: ᱕  (0.843956 %)
154a: ᕊ  (0.843167 %)
1f70: ὰ  (0.841816 %)
13d2: Ꮢ  (0.840094 %)
006b: k  (0.839835 %)
1f75: ή  (0.837528 %)
1c98: Ი  (0.837037 %)
0578: ո  (0.836750 %)
006e: n  (0.83675

In [29]:
search_vec = np.array(df[df['Code'] == '0053'].iloc[:, 0:16])[0]
res = cos_sims(search_vec)
res.sort(reverse=True)

print('%s に類似する見た目の文字：' % chr(int('0x0053', base=16)))

for (score, candidate) in res[:200]:
    print('%s: %s  (%.6f %%)' % (candidate, chr(int('0x' + candidate, base=16)), score))

S に類似する見た目の文字：
a4e2: ꓢ  (1.000000 %)
13da: Ꮪ  (1.000000 %)
054f: Տ  (1.000000 %)
0405: Ѕ  (1.000000 %)
0053: S  (1.000000 %)
1cbd: Ჽ  (0.988203 %)
0033: 3  (0.987167 %)
1c95: Ვ  (0.982961 %)
a644: Ꙅ  (0.982101 %)
01a7: Ƨ  (0.982101 %)
1cb7: Ჷ  (0.978336 %)
1c94: Ე  (0.975750 %)
0551: Ց  (0.970811 %)
a73e: Ꜿ  (0.970620 %)
03ff: Ͽ  (0.970620 %)
a4db: ꓛ  (0.967856 %)
2183: Ↄ  (0.967856 %)
03fd: Ͻ  (0.967856 %)
0186: Ɔ  (0.967856 %)
0038: 8  (0.967203 %)
0190: Ɛ  (0.967068 %)
13bc: Ꮌ  (0.965023 %)
0417: З  (0.962512 %)
218a: ↊  (0.962202 %)
a7ab: Ɜ  (0.960520 %)
13cb: Ꮛ  (0.959914 %)
1e62: Ṣ  (0.958762 %)
2cde: Ⳟ  (0.958742 %)
218b: ↋  (0.958076 %)
215e: ⅞  (0.953398 %)
03e8: Ϩ  (0.953104 %)
a6ef: ꛯ  (0.952759 %)
0510: Ԑ  (0.952627 %)
0035: 5  (0.952482 %)
1cbf: Ჿ  (0.952212 %)
01bc: Ƽ  (0.950337 %)
a4f3: ꓳ  (0.950072 %)
0555: Օ  (0.950072 %)
041e: О  (0.950072 %)
039f: Ο  (0.950072 %)
004f: O  (0.950072 %)
a4da: ꓚ  (0.949517 %)
13df: Ꮯ  (0.949517 %)
0421: С  (0.949517 %)
03f9: Ϲ  (0.94951

In [30]:
df.to_csv('vector.csv')