In [1]:
import numpy as np
from sklearn.cluster import KMeans

from gensim.models.keyedvectors import KeyedVectors
import procrustes
from scipy.stats import ortho_group, wasserstein_distance

import os
import sys
import urllib.request
import requests
import time
import datetime
import pickle
import json

In [2]:
def embedding_load():
    """
    Load pretrained embedding vec
    """
    en_model = KeyedVectors.load_word2vec_format('./fasttext/wiki.en.vec')
    ko_model = KeyedVectors.load_word2vec_format('./fasttext/wiki.ko.vec')
    
    return en_model, ko_model

In [113]:
en_model, ko_model = embedding_load()

In [206]:
with open('./data/ko_noun_dict(vocab_size-1250).pkl', "rb") as f:
    ko_noun_dict = pickle.load(f)
    
with open('./data/en_noun_dict(vocab_size-1250).pkl', "rb") as f:
    en_noun_dict = pickle.load(f)
    
with open('./data/ko_verb_dict(vocab_size-547).pkl', "rb") as f:
    ko_verb_dict = pickle.load(f)
    
with open('./data/en_verb_dict(vocab_size-547).pkl', "rb") as f:
    en_verb_dict = pickle.load(f)
    
with open('./data/ko_v_1.pkl', "rb") as f:
    ko_adjective_dict = pickle.load(f)
    
with open('./data/en_v_1.pkl', "rb") as f:
    en_adjective_dict = pickle.load(f)
    
with open('./data/ko_verb_1.pkl', "rb") as f:
    ko_verb_dict_2 = pickle.load(f)
    
with open('./data/en_verb_1.pkl', "rb") as f:
    en_verb_dict_2 = pickle.load(f)

In [208]:
ko_data = np.array(list(ko_noun_dict.values()) + list(ko_verb_dict.values()) + list(ko_verb_dict_2.values()) + list(ko_adjective_dict.values()))
en_data = np.array(list(en_noun_dict.values()) + list(en_verb_dict.values()) + list(en_verb_dict_2.values()) + list(en_adjective_dict.values())) 
ko_noun_data = np.array(list(ko_noun_dict.values()))
en_noun_data = np.array(list(en_noun_dict.values())) 
ko_verb_data = np.array(list(ko_verb_dict.values()) + list(ko_verb_dict_2.values()))
en_verb_data = np.array(list(en_verb_dict.values()) + list(en_verb_dict_2.values())) 
ko_adjective_data = np.array(list(ko_adjective_dict.values()))
en_adjective_data = np.array(list(en_adjective_dict.values()))

In [253]:
en_adjective_data.shape

(763, 300)

In [67]:
result_orth = procrustes.orthogonal(ko_data, en_data, scale=False, translate=False)
result_generic = procrustes.generic(ko_data, en_data, scale=False, translate=False)
# result_orth_2side = procrustes.orthogonal_2sided(ko_data, en_data, scale=False, translate=False)
result_orth_2side_2 = procrustes.orthogonal_2sided(ko_data, en_data, single=False, scale=False, translate=False)
result_permut = procrustes.permutation(ko_data, en_data)
# result_soft = procrustes.softassign(ko_data, en_data, kopt=2)
result_rotate = procrustes.rotational(ko_data, en_data)
result_symmetric = procrustes.symmetric(ko_data, en_data)

# result_kopt = procrustes.kopt.ko

In [214]:
def train_procrustes(ko_data, en_data, print_f=True, return_state='Generic', num_data=500):
    
    ko_data = ko_data[:num_data]
    en_data = en_data[:num_data]
    print(return_state)
    
    result_orth = procrustes.orthogonal(ko_data, en_data, scale=False, translate=False)
    result_generic = procrustes.generic(ko_data, en_data, scale=False, translate=False)
    # result_orth_2side = procrustes.orthogonal_2sided(ko_data, en_data, scale=False, translate=False)
    result_orth_2side_2 = procrustes.orthogonal_2sided(ko_data, en_data, single=False, scale=False, translate=False)
    result_permut = procrustes.permutation(ko_data, en_data)
    # result_soft = procrustes.softassign(ko_data, en_data, kopt=2)
    result_rotate = procrustes.rotational(ko_data, en_data)
    result_symmetric = procrustes.symmetric(ko_data, en_data)
    print(len(ko_data))
    if print_f:
        print('Orth error : ', result_orth.error / len(ko_data))
        print('Generic error : ', result_generic.error/ len(ko_data))
        print('Permute error : ', result_permut.error/ len(ko_data))
        print('Rotate error : ', result_rotate.error/ len(ko_data))
        print('Symmetric error : ', result_symmetric.error/ len(ko_data))
        if return_state == 'Generic':
            return result_generic
        elif return_state == 'Orth':
            return result_orth
        elif return_state == 'Permute':
            return result_permut
        elif return_state == 'Rotate':
            return result_rotate
        elif return_state =='Symmetric':
            return result_symmetric
    
    else:
        return 0 

In [193]:
noun_ko2en = train_procrustes(ko_noun_data, en_noun_data)
noun_en2ko = train_procrustes(en_noun_data, ko_noun_data)
verb_ko2en = train_procrustes(ko_verb_data, en_verb_data)
verb_en2ko = train_procrustes(en_verb_data, ko_verb_data)

Generic
1250
Orth error :  16.40404037708212
Generic error :  9.524618462204234
Permute error :  33.82008071566402
Rotate error :  16.404044191924118
Symmetric error :  12.009193847268763
Generic
1250
Orth error :  16.40404037708212
Generic error :  9.711636035447986
Permute error :  33.82008071566402
Rotate error :  16.40404296303643
Symmetric error :  12.906590739041302
Generic
547
Orth error :  12.555398170736241
Generic error :  4.819750563867699
Permute error :  23.232146164195456
Rotate error :  12.555442788835053
Symmetric error :  8.104849233360943
Generic


KeyboardInterrupt: 

In [None]:
changee = np.dot(en_verb_data, noun_en2ko.t)
changee2 = np.dot(en_noun_data, verb_en2ko.t)

In [216]:
def wasserstein_error(ko_data, en_data, ko_eval_data, en_eval_data, method='Generic', num_data=500):
    en2ko = train_procrustes(en_data, ko_data, return_state=method, num_data=num_data)
    
    change_tmp = np.dot(en_eval_data, en2ko.t)
    
    wasserstein_error = 0
    for i in range(len(change_tmp)):

        wasserstein_error += wasserstein_distance(np.ndarray.flatten(change_tmp[i]), ko_eval_data[i])

    print(wasserstein_error / len(change_tmp))

In [242]:
shuffle_idx = np.arange((ko_data.shape[0]))
np.random.shuffle(shuffle_idx)
wasserstein_error(ko_data[shuffle_idx][:250], en_data[shuffle_idx][:250], ko_data, en_data, 'Orth')

Orth
250
Orth error :  8.381572089633497
Generic error :  0.15655668510885243
Permute error :  26.80725248468353
Rotate error :  8.381573505125063
Symmetric error :  6.81333750291196
0.039071158976026935


In [213]:
wasserstein_error(ko_verb_data, en_verb_data, ko_data, en_data, 'Generic')
wasserstein_error(ko_noun_data, en_noun_data, ko_data, en_data, 'Generic')
wasserstein_error(ko_adjective_data, en_adjective_data, ko_data, en_data, 'Generic')
wasserstein_error(ko_verb_data, en_verb_data, ko_data, en_data, 'Orth')
wasserstein_error(ko_noun_data, en_noun_data, ko_data, en_data, 'Orth')
wasserstein_error(ko_adjective_data, en_adjective_data, ko_data, en_data, 'Orth')
wasserstein_error(ko_verb_data, en_verb_data, ko_data, en_data, 'Permute')
wasserstein_error(ko_noun_data, en_noun_data, ko_data, en_data, 'Permute')
wasserstein_error(ko_adjective_data, en_adjective_data, ko_data, en_data, 'Permute')
wasserstein_error(ko_verb_data, en_verb_data, ko_data, en_data, 'Rotate')
wasserstein_error(ko_noun_data, en_noun_data, ko_data, en_data, 'Rotate')
wasserstein_error(ko_adjective_data, en_adjective_data, ko_data, en_data, 'Rotate')
wasserstein_error(ko_verb_data, en_verb_data, ko_data, en_data, 'Symmetric')
wasserstein_error(ko_noun_data, en_noun_data, ko_data, en_data, 'Symmetric')
wasserstein_error(ko_adjective_data, en_adjective_data, ko_data, en_data, 'Symmetric')
wasserstein_error(ko_data, en_data, ko_data, en_data, 'Generic')
wasserstein_error(ko_data, en_data, ko_data, en_data, 'Orth')
wasserstein_error(ko_data, en_data, ko_data, en_data, 'Permute')
wasserstein_error(ko_data, en_data, ko_data, en_data, 'Rotate')
wasserstein_error(ko_data, en_data, ko_data, en_data, 'Symmetric')

Generic
500
Orth error :  11.802725513935554
Generic error :  4.5692517239023
Permute error :  22.817105655034137
Rotate error :  11.802730423093374
Symmetric error :  8.751049468630184
0.09668974538827706
Generic
500
Orth error :  11.919351581852068
Generic error :  4.803395273457398
Permute error :  33.12029620725288
Rotate error :  11.919365589315618
Symmetric error :  10.146755936244944
0.07078329789355613
Generic
500
Orth error :  13.160810667956364
Generic error :  5.2012871948298995
Permute error :  23.853912955438403
Rotate error :  13.160809360048894
Symmetric error :  9.266246675317001
0.08075118316309142
Orth
500
Orth error :  11.802725513935554
Generic error :  4.5692517239023
Permute error :  22.817105655034137
Rotate error :  11.802730423093374
Symmetric error :  8.751049468630184
0.03950092210793967
Orth
500
Orth error :  11.919351581852068
Generic error :  4.803395273457398
Permute error :  33.12029620725288
Rotate error :  11.919365589315618
Symmetric error :  10.14675

In [244]:
shuffle_idx = np.arange((ko_noun_data.shape[0]))
np.random.shuffle(shuffle_idx)
wasserstein_error(ko_noun_data[shuffle_idx], en_noun_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=250)
wasserstein_error(ko_noun_data[shuffle_idx], en_noun_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=750)
wasserstein_error(ko_noun_data[shuffle_idx], en_noun_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=1000)
wasserstein_error(ko_noun_data[shuffle_idx], en_noun_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=1250)

Orth
250
Orth error :  8.596305390822003
Generic error :  7.731300248035952e-08
Permute error :  33.540499557897064
Rotate error :  8.596305689728498
Symmetric error :  6.6392524256709295
0.03907896266774536
Orth
750
Orth error :  14.296673559051007
Generic error :  7.545054950261566
Permute error :  33.78439626734629
Rotate error :  14.296715371994683
Symmetric error :  11.761895884823575
0.03875548203644586
Orth
1000
Orth error :  15.562684326410293
Generic error :  8.923572110686452
Permute error :  33.7550937211826
Rotate error :  15.563136487565455
Symmetric error :  12.494260365799175
0.03873944353494868
Orth
1250
Orth error :  16.404043873074652
Generic error :  9.711637380413897
Permute error :  33.82008071566402
Rotate error :  16.404043472158925
Symmetric error :  12.906590739047228
0.03878355260166344


In [252]:
shuffle_idx = np.arange((ko_verb_data.shape[0]))
np.random.shuffle(shuffle_idx)
wasserstein_error(ko_verb_data[shuffle_idx], en_verb_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=250)
wasserstein_error(ko_verb_data[shuffle_idx], en_verb_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=500)
wasserstein_error(ko_verb_data[shuffle_idx], en_verb_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=750)
wasserstein_error(ko_verb_data[shuffle_idx], en_verb_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=1000)
wasserstein_error(ko_verb_data[shuffle_idx], en_verb_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=1200)

Orth
250
Orth error :  7.956341380975733
Generic error :  0.7977347119334299
Permute error :  21.29492115772023
Rotate error :  7.956340785070989
Symmetric error :  8.931483205989153
0.03984820111757052
Orth
500
Orth error :  11.099990728497506
Generic error :  4.198672776122141
Permute error :  22.46823850142459
Rotate error :  11.099991015453368
Symmetric error :  8.043052324309274
0.03914070552406112
Orth
750
Orth error :  12.939353518948657
Generic error :  6.517930231951487
Permute error :  23.19541425860062
Rotate error :  12.939454165068145
Symmetric error :  9.350889763175935
0.03914251291861132
Orth
1000
Orth error :  13.960493759717851
Generic error :  7.722700822770654
Permute error :  23.217156759670974
Rotate error :  13.96049588961992
Symmetric error :  9.956670316126935
0.039021594220415276
Orth
1200
Orth error :  14.537508326496463
Generic error :  8.312890208072544
Permute error :  23.34554242055629
Rotate error :  14.537546116707658
Symmetric error :  10.2658239132454

In [254]:
shuffle_idx = np.arange((ko_adjective_data.shape[0]))
np.random.shuffle(shuffle_idx)
wasserstein_error(ko_adjective_data[shuffle_idx], en_adjective_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=250)
wasserstein_error(ko_adjective_data[shuffle_idx], en_adjective_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=500)
wasserstein_error(ko_adjective_data[shuffle_idx], en_adjective_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=750)

Orth
250
Orth error :  9.880508942822052
Generic error :  3.030197333109275e-07
Permute error :  23.255587231567162
Rotate error :  9.880509162863486
Symmetric error :  5.701788823613089
0.039743550982530566
Orth
500
Orth error :  13.554787633829866
Generic error :  5.265185862268438
Permute error :  23.918843844193578
Rotate error :  13.554792797280713
Symmetric error :  9.35087675008495
0.03977024229111824
Orth
750
Orth error :  15.520900084965009
Generic error :  7.860917868023117
Permute error :  24.416292635034775
Rotate error :  15.521068253150847
Symmetric error :  10.670033680391969
0.04000897907409044


In [248]:
shuffle_idx = np.arange((ko_data.shape[0]))
np.random.shuffle(shuffle_idx)
wasserstein_error(ko_data[shuffle_idx], en_data[shuffle_idx], ko_data, en_data, 'Generic', num_data=500)
wasserstein_error(ko_data[shuffle_idx], en_data[shuffle_idx], ko_data, en_data, 'Orth', num_data=500)
wasserstein_error(ko_data[shuffle_idx], en_data[shuffle_idx], ko_data, en_data, 'Permute', num_data=500)
wasserstein_error(ko_data[shuffle_idx], en_data[shuffle_idx], ko_data, en_data, 'Rotate', num_data=500)
wasserstein_error(ko_data[shuffle_idx], en_data[shuffle_idx], ko_data, en_data, 'Symmetric', num_data=500)

Generic
500
Orth error :  12.012019886999507
Generic error :  5.032311402240564
Permute error :  27.852480576285927
Rotate error :  12.012252445023414
Symmetric error :  9.763196361930643
0.05826142695267305
Orth
500
Orth error :  12.012019886999507
Generic error :  5.032311402240564
Permute error :  27.852480576285927
Rotate error :  12.012252445023414
Symmetric error :  9.763196361930643
0.03915849824395097
Permute
500
Orth error :  12.012019886999507
Generic error :  5.032311402240564
Permute error :  27.852480576285927
Rotate error :  12.012252445023414
Symmetric error :  9.763196361930643
0.041528497505318805
Rotate
500
Orth error :  12.012019886999507
Generic error :  5.032311402240564
Permute error :  27.852480576285927
Rotate error :  12.012252445023414
Symmetric error :  9.763196361930643
0.039132406142971074
Symmetric
500
Orth error :  12.012019886999507
Generic error :  5.032311402240564
Permute error :  27.852480576285927
Rotate error :  12.012252445023414
Symmetric error :

In [178]:
wasserstein_error = 0
for i in range(len(changee)):
    
    wasserstein_error += wasserstein_distance(np.ndarray.flatten(changee[i]), ko_verb_data[i])
    
print(wasserstein_error / len(changee))

wasserstein_error = 0
for i in range(len(changee2)):
    
    wasserstein_error += wasserstein_distance(np.ndarray.flatten(changee2[i]), ko_noun_data[i])
    
print(wasserstein_error / len(changee))

0.03664267950359913
0.26034835163647313


In [159]:
noun_en2ko.t

array([[ 0.02532453,  0.01655369, -0.14944233, ..., -0.02394157,
         0.06333017, -0.00246632],
       [ 0.07032257, -0.06695946,  0.07871911, ..., -0.06899119,
         0.06208432, -0.00754868],
       [-0.06090844,  0.03375573, -0.01892716, ...,  0.00437703,
        -0.0258507 ,  0.10423156],
       ...,
       [-0.06986481, -0.02980642, -0.02814702, ..., -0.02388365,
        -0.01343068, -0.13722147],
       [ 0.00529884,  0.02941134,  0.00693259, ...,  0.04192578,
         0.04784838,  0.11237994],
       [ 0.00816763, -0.00347921, -0.03924865, ...,  0.13404962,
        -0.06358054,  0.02587356]], dtype=float32)

In [115]:
# sample_change_vec = np.dot(ko_model.vectors[:5000], result_generic.t)
# sample_change_vocab = list(ko_model.vocab)[:5000]
change_vec_noun = np.dot(en_model.vectors, noun_en2ko.t)
change_vec_verb = np.dot(en_model.vectors, verb_en2ko.t)

In [116]:
change_vec_noun.shape, change_vec_verb.shape

((2519370, 300), (2519370, 300))

In [130]:
from scipy.stats import wasserstein_distance

distance = 0
for i in range(50000):
    distance += wasserstein_distance(change_vec_noun[i], change_vec_verb[i])
print(distance/50000)

0.1432145169224416


In [122]:
en_model.vocab

{',': <gensim.models.keyedvectors.Vocab at 0x7fe278aedb70>,
 '.': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7a90>,
 'the': <gensim.models.keyedvectors.Vocab at 0x7fe278aedba8>,
 '</s>': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7780>,
 'of': <gensim.models.keyedvectors.Vocab at 0x7fe278aedc18>,
 '-': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7710>,
 'in': <gensim.models.keyedvectors.Vocab at 0x7fe278aedc50>,
 'and': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7748>,
 "'": <gensim.models.keyedvectors.Vocab at 0x7fe278aedcc0>,
 ')': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7ef0>,
 '(': <gensim.models.keyedvectors.Vocab at 0x7fe278aedcf8>,
 'to': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7dd8>,
 'a': <gensim.models.keyedvectors.Vocab at 0x7fe278aedd68>,
 'is': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7c50>,
 'was': <gensim.models.keyedvectors.Vocab at 0x7fe278aeddd8>,
 'on': <gensim.models.keyedvectors.Vocab at 0x7fe278ca7828>,
 's': <gensim.models.keyed

In [114]:
ko_model.vectors.shape

(879129, 300)

In [131]:
tmp_ko_model.add(list(en_model.vocab), change_vec_verb)

In [133]:
ko_model.add(list(en_model.vocab), change_vec_verb)

In [134]:
ko_model.vectors.shape

(3319279, 300)

In [135]:
ko_model.save('./data/emb_en2ko_verb.w2v')

In [18]:
en_model.add(sample_change_vocab, sample_change_vec, replace=True)

In [46]:
en_model.wv.most_similar('관계는', topn=100)

  if __name__ == '__main__':


[('관계가', 0.8391407132148743),
 ('관계를', 0.7764248251914978),
 ('관계에', 0.7076321244239807),
 ('관련이', 0.6967226266860962),
 ('관계', 0.6891700625419617),
 ('직접적인', 0.6761817932128906),
 ('상황이', 0.6239739656448364),
 ('외교', 0.6234864592552185),
 ('태도를', 0.6218520998954773),
 ('사이가', 0.6161556243896484),
 ('입장을', 0.6150044202804565),
 ('그러나', 0.6109069585800171),
 ('서로', 0.6048915386199951),
 ('유지하고', 0.5998672246932983),
 ('관련하여', 0.5971484780311584),
 ('간의', 0.5947040319442749),
 ('만나', 0.5940428972244263),
 ('하면서', 0.5927482843399048),
 ('이는', 0.5892982482910156),
 ('일은', 0.5884603261947632),
 ('그리하여', 0.587111234664917),
 ('관계로', 0.5869655609130859),
 ('결과는', 0.5868374109268188),
 ('전혀', 0.586111307144165),
 ('하지만', 0.5860042572021484),
 ('관심이', 0.5847064852714539),
 ('반응을', 0.5808483362197876),
 ('노력을', 0.5787227153778076),
 ('상황을', 0.5786213278770447),
 ('관련된', 0.5782474875450134),
 ('상호', 0.5781601667404175),
 ('정부는', 0.5779614448547363),
 ('성격이', 0.5774530172348022),
 ('공식적인', 0.57712

In [47]:
tic = time.time()
change_vec = np.dot(ko_model.vectors, result_generic.t)
change_vocab = list(ko_model.vocab)
en_model.add(change_vocab, change_vec, replace=True)
tok = time.time()
tictok = str(datetime.timedelta(seconds=tok-tic))
print(f'Transform and Add process end. Time spend : {tictok}')

Transform and Add process end. Time spend : 0:00:08.790378


In [54]:
en_model.add(change_vocab, change_vec, replace=True)

In [57]:
en_model.index2word[2522956]

'프랑크'

In [None]:
len(en_model.vocab)

In [None]:
en_model.word_vec('사라졌다')

In [139]:
list(ko_noun_dict.values())[:10]

[array([-0.46317  ,  0.35326  , -0.61864  ,  0.10093  ,  0.0090473,
         0.33205  ,  0.58962  ,  0.016882 , -0.18672  ,  0.18466  ,
         0.32964  ,  0.0619   ,  0.32783  ,  0.18034  , -0.0039152,
         0.13652  , -0.15772  , -0.20662  , -0.20453  ,  0.47451  ,
        -0.36532  ,  0.33947  ,  0.039544 ,  0.11628  ,  0.0050244,
        -0.093996 ,  0.20793  , -0.27935  , -0.11046  ,  0.24383  ,
        -0.10405  ,  0.058623 ,  0.67488  , -0.6181   , -0.46859  ,
         0.050933 ,  0.43336  ,  0.18848  , -0.34381  ,  0.5788   ,
         0.21283  , -0.43836  ,  0.31755  ,  0.50705  ,  0.13347  ,
         0.58818  ,  0.12601  , -0.26849  ,  0.13414  , -0.058524 ,
        -0.092368 , -0.066345 , -0.39079  , -0.30403  , -0.0031219,
         0.43592  , -0.15187  ,  0.34796  , -0.47533  ,  0.099807 ,
         0.16039  , -0.9916   , -0.081359 , -0.34265  ,  0.35924  ,
        -0.33283  , -0.22422  ,  0.008091 , -0.038127 , -0.60255  ,
         0.27112  , -0.10279  , -0.083529 ,  0.0

In [141]:
help(np.linalg.norm)

Help on function norm in module numpy.linalg:

norm(x, ord=None, axis=None, keepdims=False)
    Matrix or vector norm.
    
    This function is able to return one of eight different matrix norms,
    or one of an infinite number of vector norms (described below), depending
    on the value of the ``ord`` parameter.
    
    Parameters
    ----------
    x : array_like
        Input array.  If `axis` is None, `x` must be 1-D or 2-D, unless `ord`
        is None. If both `axis` and `ord` are None, the 2-norm of
        ``x.ravel`` will be returned.
    ord : {non-zero int, inf, -inf, 'fro', 'nuc'}, optional
        Order of the norm (see table under ``Notes``). inf means numpy's
        `inf` object. The default is None.
    axis : {None, int, 2-tuple of ints}, optional.
        If `axis` is an integer, it specifies the axis of `x` along which to
        compute the vector norms.  If `axis` is a 2-tuple, it specifies the
        axes that hold 2-D matrices, and the matrix norms of these

In [151]:
list(ko_noun_dict.values()) / np.linalg.norm(list(ko_noun_dict.values()))

array([[-2.3640203e-03,  1.8030396e-03, -3.1575395e-03, ...,
        -2.0729902e-03,  1.5588641e-03, -2.6703114e-03],
       [ 1.0887856e-03, -2.1845637e-05, -1.1447765e-03, ...,
        -1.9929593e-03,  3.9582042e-04, -2.0350164e-03],
       [-8.5502228e-04, -8.4047596e-04, -1.7954346e-03, ...,
        -7.7509362e-04, -9.0274472e-05, -3.1735660e-03],
       ...,
       [-5.8864447e-04,  3.4588797e-03, -6.1605298e-03, ...,
        -2.9158650e-03, -1.0209023e-03, -7.1741844e-04],
       [-7.3528243e-04,  1.7115248e-03, -8.8141002e-03, ...,
         2.4636504e-03, -1.3991089e-03,  4.8841210e-04],
       [ 1.4474437e-03,  5.9492240e-04, -2.3981151e-03, ...,
        -6.4208335e-04,  6.3580542e-04, -4.0979516e-03]], dtype=float32)

In [157]:
list(ko_noun_dict.values())[0] / np.linalg.norm(list(ko_noun_dict.values())[0])

array([-0.07849772,  0.05987025, -0.10484666,  0.01710554,  0.00153333,
        0.05627559,  0.09992837,  0.00286115, -0.03164517,  0.03129604,
        0.05586715,  0.01049077,  0.05556039,  0.03056389, -0.00066355,
        0.02313731, -0.02673027, -0.03501781, -0.0346636 ,  0.08041961,
       -0.06191417,  0.05753313,  0.00670189,  0.01970705,  0.00085153,
       -0.01593038,  0.03523983, -0.04734404, -0.01872068,  0.04132413,
       -0.01763432,  0.00993538,  0.11437818, -0.10475514, -0.07941629,
        0.00863209,  0.07344554,  0.03194346, -0.05826867,  0.09809461,
        0.03607028, -0.07429294,  0.05381814,  0.08593446,  0.0226204 ,
        0.09968432,  0.02135608, -0.04550349,  0.02273395, -0.00991861,
       -0.01565446, -0.0112441 , -0.0662308 , -0.05152678, -0.0005291 ,
        0.07387941, -0.02573882,  0.05897201, -0.08055858,  0.01691522,
        0.02718278, -0.16805565, -0.01378866, -0.05807208,  0.06088373,
       -0.05640779, -0.03800064,  0.00137126, -0.00646174, -0.10