ref: https://www.kaggle.com/hamishdickson/bidirectional-lstm-in-keras-with-glove-embeddings

ref: https://lovit.github.io/nlp/representation/2018/09/05/glove/

In [1]:
import pandas as pd
import time
import os, gc
import numpy as np
from tqdm import tqdm
import random

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import *
from keras.utils.np_utils import to_categorical
from keras.initializers import Constant
import re

import matplotlib.pyplot as plt
%matplotlib inline

import torch

Using TensorFlow backend.


In [2]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(42)

In [3]:
%%time
#train_df=pd.read_csv("../KB_NLP/morphs/komo_morphs_train.csv")
#test_df=pd.read_csv("../KB_NLP/morphs/komo_morphs_test.csv")
train_df=pd.read_csv("../KB_NLP/raw_data/train.csv")
test_df=pd.read_csv("../KB_NLP/raw_data/public_test.csv")

CPU times: user 938 ms, sys: 35 ms, total: 973 ms
Wall time: 973 ms


In [4]:
pd.set_option('display.max_colwidth',-1)
train_df.head()

Unnamed: 0,id,year_month,text,smishing
0,0,2017-01,XXX은행성산XXX팀장입니다.행복한주말되세요,0
1,1,2017-01,오늘도많이웃으시는하루시작하세요XXX은행 진월동VIP라운지 XXX올림,0
2,2,2017-01,안녕하십니까 고객님. XXX은행입니다.금일 납부하셔야 할 금액은 153600원 입니다.감사합니다. 새해 복 많이 받으십시오.XXX은행옥포XXX올림,0
3,4,2017-01,XXX 고객님안녕하세요XXX은행 XXX지점입니다지난 한 해 동안 저희 XXX지점에 보내주신 성원에 감사드립니다. 설렘으로 시작한 2017년소망하시는 일 모두 이XXX 고객님의 가정에 늘 건강과 행복이 함께하길 기원하겠습니다. 사랑하는 가족과 함께 정을 나누는 행복한 설 명절 보내세요 XXX은행 XXX지점직원일동,0
4,5,2017-01,1월은 새로움이 가득XXX입니다.올 한해 더 많이행복한 한해되시길바랍니다,0


In [5]:
from soynlp.hangle import decompose

doublespace_pattern = re.compile('\s+')

def jamo_sentence(sent):

    def transform(char):
        if char == ' ':
            return char
        cjj = decompose(char)
        try:
            len(cjj)
        except:
            return ' '
        if len(cjj) == 1:
            return cjj
        cjj_ = ''.join(c if c != ' ' else '' for c in cjj)
        return cjj_

    sent_ = ''.join(transform(char) for char in sent)
    sent_ = doublespace_pattern.sub(' ', sent_)
    return sent_
# 'ㅇㅓ-ㅇㅣ-ㄱㅗ- ㅋㅔㄱㅋㅔㄱ ㅇㅏ-ㅇㅣ-ㄱㅗ-ㅇㅗ-'

jamo_sentence(train_df.loc[0, 'text'])

' ㅇㅡㄴㅎㅐㅇㅅㅓㅇㅅㅏㄴ ㅌㅣㅁㅈㅏㅇㅇㅣㅂㄴㅣㄷㅏ ㅎㅐㅇㅂㅗㄱㅎㅏㄴㅈㅜㅁㅏㄹㄷㅚㅅㅔㅇㅛ'

In [6]:
%%time
test_df['jamo'] = test_df['text'].apply(lambda x: jamo_sentence(x))
train_df['jamo'] = train_df['text'].apply(lambda x: jamo_sentence(x))

CPU times: user 1min 2s, sys: 139 ms, total: 1min 2s
Wall time: 1min 2s


In [7]:
%%time
train_df.to_csv("../KB_NLP/jamo_data/jamo_train.csv", index=False)
test_df.to_csv("../KB_NLP/jamo_data/jamo_test.csv", index=False)

CPU times: user 2.84 s, sys: 216 ms, total: 3.05 s
Wall time: 5.91 s


In [8]:
raw_corpus_fname = '\n'.join(pd.concat([train_df['jamo'], test_df['jamo']]))
file=open('../KB_NLP/jamo_text.txt','w')
file.write(raw_corpus_fname)
file.close()

In [9]:
jamo_jumo = pd.concat([train_df['jamo'], test_df['jamo']])

In [10]:
input_sentences = list(jamo_jumo)

In [11]:
from soynlp.utils import DoublespaceLineCorpus
from soynlp.vectorizer import sent_to_word_contexts_matrix

#corpus_path = '2016-10-20_article_all_normed_ltokenize.txt'
#corpus = DoublespaceLineCorpus(corpus_path, iter_sent=True)

x, idx2vocab = sent_to_word_contexts_matrix(
    input_sentences,
    windows=1,
    min_tf=10,
    tokenizer=lambda x:x.split(), # (default) lambda x:x.split(),
    dynamic_weight=True,
    verbose=True)

print(x.shape) # (36002, 36002)

Create (word, contexts) matrix
  - counting word frequency from 297570 sents, mem=1.015 Gb
  - scanning (word, context) pairs from 297570 sents, mem=1.133 Gb
  - (word, context) matrix was constructed. shape = (38890, 38890)                    
  - done
(38890, 38890)


In [12]:
from glove import Corpus, Glove

In [13]:
glove = Glove(no_components=200, learning_rate=0.01, random_state=42)
glove.fit(x.tocoo(), epochs=10, no_threads=4, verbose=True)

Performing 10 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9


In [14]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)

In [15]:
glove.word_vectors.shape

(38890, 200)

In [16]:
glove.word_vectors[:1]

array([[-0.26504861, -0.19749828,  0.2253422 ,  0.31274439,  0.12714675,
        -0.40208698, -0.70234407,  0.23691935, -0.32004082, -0.29816814,
        -0.44003501,  0.40650774,  0.78600718, -0.34585916,  0.02659737,
        -0.29317514, -0.38033803, -0.08452949,  0.10737898, -0.23689778,
         0.29375821, -0.32402372, -0.54910766,  0.2707403 , -0.07759285,
         0.26242062, -0.14520058,  0.09714021, -0.19448539, -0.39819316,
        -0.15008974, -0.60583563, -0.58237357,  0.13608605,  0.78646772,
         0.36670834,  0.0966941 , -0.35796498,  0.02100134,  0.17540543,
         0.14057073, -0.32161751, -0.67802754,  0.26479479,  0.28361337,
         0.26312648, -0.15772616,  0.13191051,  0.04423378,  0.71645343,
        -0.01196593,  0.32287289,  0.72932142,  0.38288656,  0.21396925,
         0.31950716, -0.41658197, -0.3504529 , -0.30256063, -0.40048003,
        -0.13639626,  0.01508176,  0.29369455, -0.3292919 ,  0.18584617,
        -0.26536037, -0.39133735, -0.17996001, -0.5

In [20]:
glove.word_vectors[0]

array([-0.26504861, -0.19749828,  0.2253422 ,  0.31274439,  0.12714675,
       -0.40208698, -0.70234407,  0.23691935, -0.32004082, -0.29816814,
       -0.44003501,  0.40650774,  0.78600718, -0.34585916,  0.02659737,
       -0.29317514, -0.38033803, -0.08452949,  0.10737898, -0.23689778,
        0.29375821, -0.32402372, -0.54910766,  0.2707403 , -0.07759285,
        0.26242062, -0.14520058,  0.09714021, -0.19448539, -0.39819316,
       -0.15008974, -0.60583563, -0.58237357,  0.13608605,  0.78646772,
        0.36670834,  0.0966941 , -0.35796498,  0.02100134,  0.17540543,
        0.14057073, -0.32161751, -0.67802754,  0.26479479,  0.28361337,
        0.26312648, -0.15772616,  0.13191051,  0.04423378,  0.71645343,
       -0.01196593,  0.32287289,  0.72932142,  0.38288656,  0.21396925,
        0.31950716, -0.41658197, -0.3504529 , -0.30256063, -0.40048003,
       -0.13639626,  0.01508176,  0.29369455, -0.3292919 ,  0.18584617,
       -0.26536037, -0.39133735, -0.17996001, -0.51755435,  0.56

In [32]:
len(glove.word_vectors)

38890

In [35]:
with open("glove_test.pkl",'w') as f:
    for word in glove.dictionary:
        f.write(word)
        f.write(" ")
        for i in range(0, 200):
            f.write(str(glove.word_vectors[glove.dictionary[word]][i]))
            f.write(" ")
        f.write("\n")

In [27]:
gd = glove.dictionary

In [31]:
gd

{'ㅇㅗㄹㄹㅣㅁ': 0,
 'ㅇㅡㄴㅎㅐㅇ': 1,
 'ㄱㅗㄱㅐㄱㄴㅣㅁ': 2,
 'ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ': 3,
 'ㅇㅝㄹ': 4,
 'ㅂㅏㄹㅏㅂㄴㅣㄷㅏ': 5,
 'ㅅㅜ': 6,
 'ㅎㅐㅇㅂㅗㄱㅎㅏㄴ': 7,
 'ㅇㅣㅆㅅㅡㅂㄴㅣㄷㅏ': 8,
 'ㄱㅘㅇㄱㅗ': 9,
 'ㅈㅓㅎㅢ': 10,
 'ㅁㅣㅊ': 11,
 'ㄱㅗㄱㅐㄱㄴㅣㅁㅇㅢ': 12,
 'ㅎㅏㅇㅅㅏㅇ': 13,
 'ㅂㅗㄴㅐㅅㅔㅇㅛ': 14,
 'ㄷㅡㄹㅣㅂㄴㅣㄷㅏ': 15,
 'ㅎㅏㄹㅜ': 16,
 'ㄲㅗㄱ': 17,
 'ㅊㅚㄷㅐ': 18,
 'ㅇㅣㅂㄴㅣㄷㅏ': 19,
 'ㄴㅕㄴ': 20,
 'ㄱㅏㅁㅅㅏㄷㅡㄹㅣㅂㄴㅣㄷㅏ': 21,
 'ㅈㅜㅅㅕㅅㅓ': 22,
 'ㅇㅏㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ': 23,
 'ㅈㅜㅁㅏㄹ': 24,
 'ㅇㅣㄹ': 25,
 'ㅂㅜㅌㅏㄱㄷㅡㄹㅣㅂㄴㅣㄷㅏ': 26,
 'ㄸㅏㄹㅏ': 27,
 'ㅇㅣㅆㄴㅡㄴ': 28,
 'ㅈㅣㅈㅓㅁ': 29,
 'ㅈㅜㅅㅣㅁㅕㄴ': 30,
 'ㅈㅡㄹㄱㅓㅇㅜㄴ': 31,
 'ㄱㅏㄴㅡㅇ': 32,
 'ㅁㅜㄹㅛㅅㅜㅅㅣㄴㄱㅓㅂㅜ': 33,
 'ㅎㅏㄴㄷㅗ': 34,
 'ㄷㅚㅅㅔㅇㅛ': 35,
 'ㅌㅣㅁㅈㅏㅇ': 36,
 'ㄱㅕㅇㅇㅜ': 37,
 'ㄲㅏㅈㅣ': 38,
 'ㅈㅗㅎㅇㅡㄴ': 39,
 'ㄷㅓ': 40,
 'ㅁㅏㄴㅇㅝㄴ': 41,
 'ㅇㅕㄴ': 42,
 'ㅇㅗㄴㅡㄹㄷㅗ': 43,
 'ㅎㅏㄴ': 44,
 'ㄸㅗㄴㅡㄴ': 45,
 'ㅈㅓㅁ': 46,
 'ㅎㅏㅂㄴㅣㄷㅏ': 47,
 'ㅅㅏㅇㄷㅏㅁ': 48,
 'ㅅㅏㅇㅍㅜㅁ': 49,
 'ㅂㅜㄴ': 50,
 'ㄷㅡㅇ': 51,
 'ㅅㅗㅈㅜㅇㅎㅏㄴ': 52,
 'ㄱㅡㅁㄹㅣ': 53,
 'ㄱㅗㄱㅐㄱㄴㅣㅁㄲㅔ': 54,
 'ㅁㅏㄶㅇㅣ': 55,
 'ㅁㅐㅇㅜ': 56,
 'ㅊㅚㅅㅓㄴㅇㅡㄹ': 57,
 'ㅇㅗㄴㅡㄹ': 58,
 'ㄱㅏㄷㅡㄱㅎㅏㄴ': 59,
 'ㄷㅏㅂㅈㅏㅇㅇㅡㄹ': 60,
 'ㅡ': 61,
 'ㄷㅡㄹㅣㅁ': 62,
 'ㅅㅣ': 63,
 'ㅊㅚㅈㅓ': 64,
 'ㅁㅏㄴ': 65,
 'ㄷㅐㅎㅏㄴ': 66,
 'ㅂㅏㄹㅗ

In [16]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42)
glove.fit(x.tocoo(), epochs=100, no_threads=4, verbose=True)

Performing 100 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
CPU times: user 5min 2s, sys: 47 ms, total: 5min 2s
Wall time: 1

In [17]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)

In [18]:
glove.word_vectors.shape

(38890, 200)

In [19]:
glove.word_vectors[:1]

array([[-0.04014644,  0.14591996,  0.21770952,  0.27505619, -0.21774962,
        -0.3299259 , -0.34875664,  0.15955163, -0.30599767, -0.09654434,
        -0.36265585,  0.36642118,  0.58957813, -0.25477741, -0.77548881,
        -0.16812614, -0.30386485,  0.3014517 ,  0.11541772,  0.23872045,
         0.23607387, -0.32231933,  0.65029271, -0.57035232,  0.3518528 ,
         0.2721458 ,  0.29503168, -0.24414353,  0.24229787, -0.27020786,
         0.02250393, -0.64149403, -0.18691672,  0.57878591,  0.04031945,
         0.25008333, -0.11378724, -0.24900752, -0.36675543, -0.16945304,
        -0.26521683,  0.47874288, -0.39373579,  0.35531035, -0.0364649 ,
         0.24267091,  0.28361149,  0.0727594 , -0.18305883,  0.18589462,
         0.01891854,  0.31339466,  0.2110837 ,  0.2616362 ,  0.1850178 ,
         0.30698852, -0.34319437, -0.33101811, -0.0366438 , -0.10888972,
        -0.33897783, -0.18377894,  0.22498183, -0.16396334,  0.17689501,
        -0.09917777, -0.23181384, -0.07816078, -0.3

In [21]:
glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,"[-0.040146439146532854, 0.1459199564025886, 0.21770951649967288, 0.27505618905214874, -0.21774961539982576, -0.32992590369903885, -0.34875663718111205, 0.15955162607922904, -0.305997672446965, -0.09654433939962898, -0.3626558497403411, 0.3664211784936428, 0.5895781267518013, -0.25477740553566475, -0.7754888061202576, -0.16812614305628376, -0.3038648472127392, 0.3014516981329455, 0.11541771856676092, 0.23872045098218483, 0.23607387493931112, -0.3223193297401064, 0.6502927086724973, -0.5703523150599951, 0.3518527951391747, 0.2721457953773931, 0.2950316778258804, -0.24414352851925353, 0.2422978729500584, -0.27020786486751874, 0.022503931170752046, -0.6414940339397746, -0.18691671529030046, 0.5787859053707188, 0.04031944855763691, 0.25008332771133435, -0.11378724404866228, -0.2490075170225759, -0.3667554252195786, -0.16945303784637283, -0.2652168322474747, 0.47874287732138326, -0.3937357934693531, 0.3553103520420614, -0.03646489812035994, 0.2426709065582876, 0.283611489751039, 0.07275939930438592, -0.18305883253701133, 0.18589461868555524, 0.018918544221754655, 0.3133946598397176, 0.2110837041232433, 0.2616362013122453, 0.1850177996830624, 0.3069885215742826, -0.3431943654946833, -0.33101811024047656, -0.036643801577073046, -0.10888971595732212, -0.3389778346535938, -0.18377894383819907, 0.22498182770753475, -0.16396333881447284, 0.17689500936933583, -0.09917777024020573, -0.2318138382632774, -0.07816078123507537, -0.32654060854510836, 0.3917461140143674, 0.09102456177488191, 0.0580177063672472, -0.5810236975096904, -0.15678636539390664, 0.12635572264744868, 0.003218629273790827, 0.0629406559152243, -0.3518278754445992, -0.04854144943131424, -0.12647429335202598, 0.3510986852746356, 0.4051464808002084, -0.21645253747254242, -0.19755681746718312, -0.3228944423688359, -0.3002770816990503, 0.3483795838100865, -0.046157589155479815, 0.3533725154836427, -0.06437188334279607, -0.30470854975854267, 0.7943502240375381, 0.25375599377636954, -0.3947189615781569, -0.04646276048762151, 0.28683884154846023, 0.5676365611630606, -0.3430480468733849, -0.13901352788172927, -0.2517575370388552, ...]"
1,ㅇㅡㄴㅎㅐㅇ,"[-0.15727920467170306, -0.08054372063028327, -0.2784167129656523, 0.21973003896583776, 0.1345708537280672, -0.2774422316098419, -0.400841878504431, 0.30184944983642836, -0.37914002232502925, -0.2627290278595458, -0.16365064306242355, 0.1128674486396726, 0.3849630509308168, -0.3393446132997946, -0.3736597817841855, -0.29523862168727905, -0.30348419564073675, -0.08834723476573174, -0.07348613400918769, 0.10211750359834441, 0.12998961298497563, -0.3081142786792763, -0.3924729227360429, 0.01906074899715463, -0.07931723237395684, 0.05273993815782015, 0.6745660958607178, -0.5018620313279732, 0.774314659482358, -0.23300255532277203, -0.07766038389033927, 0.1186569755027705, -0.09071159349706298, 0.12439919776911805, -0.03220682619672916, -0.3979169872948048, -0.1483416568066553, -0.3256488848834462, 0.3223875163201774, -0.1931382718666288, 0.18513157221738175, 0.3644950224471216, 0.6357152262965683, 0.0407551202152704, 0.08497422841423914, 0.3487012146468191, -0.08026315269369984, 0.24847819842610344, 0.15154242508973784, 0.14718998742413336, -0.21447080637046176, 0.12459898480373152, 0.7576997656965404, 0.25016582680075883, 0.09443410587745431, 0.23656590915416306, 0.13067120527321718, -0.2009705858236229, 0.20186887847241536, -0.3305009875026934, -0.03671781304236953, 0.18318971495928815, 0.07951070770111159, -0.21976908822922528, 0.295480608496454, -0.14395484916926138, -0.3052021083437517, -0.21667290341446233, -0.4315845363953309, -0.05338219720879526, -0.06413475303666524, 0.27059306781339815, 0.3096129080816439, 0.6633131691762129, 0.25816638223029886, -0.01067129402339933, 0.6825768944273405, -0.2737465782620136, -0.04536977002346369, 0.7442248768664693, 0.35760636471853746, -0.03790119244674228, -0.2420740079887751, -0.18475701439268913, 0.04828208745885672, -0.3008422504077641, 0.19306437500761695, 0.11154074328408649, 0.20647253598795856, 0.534368730459033, -0.29766159394229813, -0.27991689690682525, 0.2800728623230272, -0.265122355013227, -0.24766068357297613, -0.010539936501116596, 0.5284232951761, -0.3123785625498622, -0.05439603101590575, -0.3390824645447112, ...]"
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,"[-0.1597745349845917, 0.013560189386259674, -0.036222217487630284, 0.16858401082072869, -0.13178771353448737, -0.12429784435248659, 0.20022860847736337, 0.20678051209654785, -0.25414346289123085, -0.277011121609237, -0.15336784866142583, 0.15247255560619424, 0.24610975665063942, -0.26218046077170526, 0.01585455543886052, -0.0979784456366289, -0.27310723720629565, -0.5611986860976371, -0.3041036213773179, 0.005303895829400219, 0.1840903877893535, -0.24486879182734414, -0.042220532522990406, 0.05418442921451441, -0.03175354299722056, 0.12713579660698557, 0.07355974957073855, 0.08047764000431303, 0.08086453809430875, -0.2363719412628069, -0.1213375183758481, 0.29946568416393393, -0.45223092796210207, 0.11566639206620552, -0.1376854579062381, 0.27379161872919194, -0.2646110593944506, -0.2020742390778186, 0.7991442250422304, -0.14805839989634684, 0.09664752431191137, 0.23049289216606497, 0.19880699235603322, 0.08873660437553722, 0.15334223031474323, 0.25937443881667005, 0.1668620021693682, -0.007799174302246304, -0.04175073307312873, -0.17069775779660332, -0.202600688045644, 0.23318257259824932, -0.005910291045224786, 0.28585787573803123, 0.15025258394349109, 0.12324448287345571, -0.21556373878514518, -0.25588753994258195, 0.43675909949770136, 0.21138261803940486, -0.13694436103820826, 0.05717806652997615, 0.18103457758071706, -0.11565909185291977, 0.23859917065664102, -0.1765263325532361, -0.2668210168396667, -0.1577628018657883, -0.6524388074827608, 0.3034388731910627, -0.19044898862669904, 0.13669089135976112, 0.2565055569629603, 0.6267301141155762, 0.15534569038984272, 0.060432344052985525, 0.15482988135717068, 0.0622165558827621, 0.12659675940263515, 0.5713068524674801, -0.27130474671315186, -0.014286634587276355, -0.1891147761054346, -0.12038000720779386, -0.19175650985965798, 0.38210259965453947, 0.18351235843185562, -0.00011090582079147755, 0.2719753052910863, 0.8261419776977916, -0.2445744183986712, 0.0806388655398789, 0.21738811317767923, -0.26443543483340237, -0.13278625747572173, 0.01757632727717061, 0.29641537227845927, -0.28082663440955047, -0.037437816617066105, -0.23873736684455435, ...]"
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,"[-0.30949347873295674, 0.0594615992566237, 0.11194597925692495, 0.11139642275213629, -0.17014131074212177, -0.15888300492540267, -0.4096666500993302, 0.22541192395834797, -0.2554380102045625, -0.15795146870552554, -0.20567883285328262, 0.13147658010704735, 0.4210270419663094, -0.18973075006494328, 0.18204655288288596, -0.036991250023264755, -0.24011825292565214, -0.0525553324294379, 0.2616669316185929, 0.12850865935871206, 0.08905903559669452, -0.22420536857596707, -0.17233434815698478, 0.053232948129125636, 0.21798266315250908, 0.157703705793137, -0.0326094627680965, 0.028388492788175002, -0.05552751810978055, -0.22592167392359006, -0.010159552640996636, -0.04843556093306686, -0.2920025809574028, -0.042041898940513814, -0.005841533084421416, 0.03330714867425825, -0.12490175339650302, -0.15087394291503384, -0.13501676877838464, -0.1061449962640955, 0.01714646268186141, -0.045602909679866466, -0.05235640648244271, 0.10581848984264958, 0.2571343235754455, 0.26773632178675755, 0.15339908678755626, 0.1850629262505611, -0.34342185999808406, -0.13202190560450142, -0.2940714737329587, 0.17297318004562764, -0.1363389144319819, 0.23465562580226712, 0.11743852958841822, 0.10270518952271752, -0.110883037675004, -0.16768313632872414, 0.20003684109641642, -0.03324226219093526, -0.1275020332823655, 0.15542274394803524, 0.215150699435846, -0.22294748165646752, 0.17039818389839728, -0.21416785929245663, -0.26153097624705735, -0.20613320944180208, -0.4615966057501567, 0.047777563087544224, -0.18641703297635726, 0.19955281620823476, -0.34544143047914805, 0.5032585370903512, 0.18568477003978798, 0.18604688534284436, -0.43049770720128655, -0.08946114952792736, 0.252809571324861, 0.37529315267789437, 0.07450914063595485, 0.1710039061098485, -0.2060700554947558, -0.15807394087512391, -0.024352840754614278, -0.2576598502468381, 0.21608363873353056, 0.10182593956440272, 0.20819151780996079, 0.48343805416410296, -0.23996691473053222, 0.2423153228526975, 0.1922629302762512, -0.2073086982358169, -0.22774425145617888, 0.06022101469327037, -0.029318545381784314, -0.22229408784772325, -0.20699543365715883, -0.21102870624428377, ...]"
4,ㅇㅝㄹ,"[-0.207543173901811, -0.10126383302300776, 0.12663856357366154, 0.09365658558210072, -0.15214850439036295, 0.01676780091562886, 0.3859737993008759, 0.19293043170135735, -0.22034247314667205, -0.21563599723469665, -0.08367861975282788, 0.17239945110534535, 0.08195125940436142, -0.16636275757723964, 0.11375667711653784, -0.030959406602681733, -0.2230893703509022, -0.3500946822516038, 0.07070867499100883, 0.06948771038753616, 0.11308669809161774, -0.2409729831005407, -0.10408944384287033, 0.14562679945502807, -0.059844979354090236, 0.19178740430251415, -0.056580343321612034, -0.24345800706225665, 0.7718960094453498, -0.18734593250122483, -0.09988026749493022, -0.24724901208216818, -0.3256238521317649, -0.3632924520292515, 0.13953768198605496, 0.12192561539573758, 0.03905335929374656, -0.17771254590500515, 0.051473126048748506, -0.07436220513866879, 0.09447720147997668, 0.2170487146225706, 0.013121823645414112, 0.15833193814478774, -0.10010747029405971, 0.2104428041023523, -0.0612985360562104, 0.1868389540485035, 0.16412723741452218, 0.18669903273950023, -0.25180359910409184, 0.13927908919183282, 0.0031591823994847248, 0.24241876071193227, -0.03466834494759662, 0.013801621375471156, -0.053656089640121495, -0.1772203485553253, 0.11315805765760426, 0.5900458192396987, 0.06502170060165967, 0.057612496314408256, 0.16771610180036875, -0.2053302776898888, 0.20146013116573552, -0.19343916543341624, -0.23378806394494378, -0.19377025336124093, -0.036952202452224324, 0.452333271321022, -0.17164022473942298, 0.14440364766428324, 0.5469929189590842, -0.3768228026254455, 0.13713820356160875, -0.24236037264899263, 0.6272787601546045, 0.19246810349207702, -0.42590602299881963, -0.1831739402504948, -0.7220820042049925, -0.29295093782405357, -0.15435646969443942, -0.14926624058674834, -0.12151011553752243, 0.2499818589336264, 0.19048467384150045, -0.22470350785046095, 0.11017413474089753, 0.030529444776095906, -0.17441777436704667, 0.652179239406073, 0.2556127836670539, 0.16375474278529106, -0.14029826855014432, -0.06965292387763845, -0.11519729645520464, -0.11245718540758347, -0.1913606479399667, -0.22064478148069874, ...]"


In [22]:
glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']

In [23]:
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')

100%|██████████| 38890/38890 [00:07<00:00, 5394.15it/s]


In [24]:
glove_embedding.head()

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.04014644 0.14591996 0.21770952 0.27505619 -0.21774962 -0.3299259 -0.34875664 0.15955163 -0.30599767 -0.09654434 -0.36265585 0.36642118 0.58957813 -0.25477741 -0.77548881 -0.16812614 -0.30386485 0.3014517 0.11541772 0.23872045 0.23607387 -0.32231933 0.65029271 -0.57035232 0.3518528 0.2721458 0.29503168 -0.24414353 0.24229787 -0.27020786 0.02250393 -0.64149403 -0.18691672 0.57878591 0.04031945 0.25008333 -0.11378724 -0.24900752 -0.36675543 -0.16945304 -0.26521683 0.47874288 -0.39373579 0.35531035 -0.0364649 0.24267091 0.28361149 0.0727594 -0.18305883 0.18589462 0.01891854 0.31339466 0.2110837 0.2616362 0.1850178 0.30698852 -0.34319437 -0.33101811 -0.0366438 -0.10888972 -0.33897783 -0.18377894 0.22498183 -0.16396334 0.17689501 -0.09917777 -0.23181384 -0.07816078 -0.32654061 0.39174611 0.09102456 0.05801771 -0.5810237 -0.15678637 0.12635572 0.00321863 0.06294066 -0.35182788 -0.04854145 -0.12647429 0.35109869 0.40514648 -0.21645254 -0.19755682 -0.32289444 -0.30027708 0.34837958 -0.04615759 0.35337252 -0.06437188 -0.30470855 0.79435022 0.25375599 -0.39471896 -0.04646276 0.28683884 0.56763656 -0.34304805 -0.13901353 -0.25175754 -0.36208369 0.05205259 -0.22300126 0.2905658 -0.36620431 -0.54267439 -0.55635134 -0.13487925 0.12809894 -0.27533946 -0.09402707 -0.13418316 0.31926451 0.25038203 0.56734689 0.50725485 0.13491343 -0.09576212 0.56955517 0.04188273 -0.26275488 0.27024855 -0.4486555 -0.37352145 -0.07066747 0.19620842 -0.05598441 -0.13595502 0.37554276 -0.24108017 -0.14587224 -0.02038195 -0.28231163 0.27694228 0.03949041 -0.36484638 0.25809373 0.03587891 -0.25194114 0.38679681 0.3846123 -0.36218281 -0.54193586 0.05941491 -0.22653054 -0.19972402 0.68535539 -0.20749228 -0.20579772 0.10440364 0.28986191 -0.41898958 -0.41396729 -0.21446917 -0.01976362 -0.33942792 0.55116389 0.35188357 0.22111976 0.29210891 -0.15658744 -0.19720351 0.36857953 0.45014699 -0.14041504 0.18623169 0.14295688 -0.2884932 -0.26274614 -0.16985756 -0.02450327 -0.59542114 -0.22030671 -0.15137482 0.10464277 -0.2418877 0.27974102 0.15631665 0.33281475 -0.11659845 -0.13164111 -0.41712243 0.2061083 0.58005565 -0.50544492 0.27397353 0.23080702 0.3383612 0.28207726 0.05866299 0.24901911 0.72161691 0.10578238 -0.03012658 -0.1668963 0.1184556 0.24818298 0.11677119 0.49216082 -0.1081244
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.1572792 -0.08054372 -0.27841671 0.21973004 0.13457085 -0.27744223 -0.40084188 0.30184945 -0.37914002 -0.26272903 -0.16365064 0.11286745 0.38496305 -0.33934461 -0.37365978 -0.29523862 -0.3034842 -0.08834723 -0.07348613 0.1021175 0.12998961 -0.30811428 -0.39247292 0.01906075 -0.07931723 0.05273994 0.6745661 -0.50186203 0.77431466 -0.23300256 -0.07766038 0.11865698 -0.09071159 0.1243992 -0.03220683 -0.39791699 -0.14834166 -0.32564888 0.32238752 -0.19313827 0.18513157 0.36449502 0.63571523 0.04075512 0.08497423 0.34870121 -0.08026315 0.2484782 0.15154243 0.14718999 -0.21447081 0.12459898 0.75769977 0.25016583 0.09443411 0.23656591 0.13067121 -0.20097059 0.20186888 -0.33050099 -0.03671781 0.18318971 0.07951071 -0.21976909 0.29548061 -0.14395485 -0.30520211 -0.2166729 -0.43158454 -0.0533822 -0.06413475 0.27059307 0.30961291 0.66331317 0.25816638 -0.01067129 0.68257689 -0.27374658 -0.04536977 0.74422488 0.35760636 -0.03790119 -0.24207401 -0.18475701 0.04828209 -0.30084225 0.19306438 0.11154074 0.20647254 0.53436873 -0.29766159 -0.2799169 0.28007286 -0.26512236 -0.24766068 -0.01053994 0.5284233 -0.31237856 -0.05439603 -0.33908246 -0.20242762 0.00664636 -0.13741498 0.03119037 -0.39012542 0.31199905 0.10927213 0.19657683 0.20031728 0.01557362 0.17524775 -0.10757874 0.18476346 0.25980472 0.09886934 0.30814531 -0.30672309 -0.00927756 -0.60975119 -0.14668808 0.03265322 0.19673261 -0.03530028 -0.11285591 0.14772563 0.27864426 -0.10311523 -0.21172293 -0.35814708 0.1042695 -0.03706134 0.19600204 -0.24872752 -0.23623717 -0.27154828 -0.53309937 0.11644491 -0.108796 -0.11860072 -0.12478401 0.17179854 0.0179788 0.06609469 0.03399539 -0.16891539 0.65338818 -0.06689487 -0.41351475 -0.11247675 0.22247287 -0.05430595 0.15229692 -0.1267627 -0.33409057 -0.2822526 0.19294731 -0.09861426 -0.03568986 0.28677243 0.06149134 -0.08263764 -0.26492316 0.36749904 0.42704584 0.42509172 0.1097473 0.35410968 -0.12505456 -0.59345382 -0.27992716 -0.18356801 -0.32856005 -0.27886826 0.01089768 -0.1810331 -0.14253432 0.12018746 -0.2276067 0.25153082 -0.01316961 -0.2637732 -0.27916181 0.05193879 0.28711377 0.12331934 0.1486321 0.02538425 -0.15962544 0.22544001 0.1950349 0.28615124 0.35690848 -0.43954099 -0.11264198 -0.22533619 0.26111029 0.31871354 -0.00189822 0.3384831 0.29622957
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-1.59774535e-01 1.35601894e-02 -3.62222175e-02 1.68584011e-01 -1.31787714e-01 -1.24297844e-01 2.00228608e-01 2.06780512e-01 -2.54143463e-01 -2.77011122e-01 -1.53367849e-01 1.52472556e-01 2.46109757e-01 -2.62180461e-01 1.58545554e-02 -9.79784456e-02 -2.73107237e-01 -5.61198686e-01 -3.04103621e-01 5.30389583e-03 1.84090388e-01 -2.44868792e-01 -4.22205325e-02 5.41844292e-02 -3.17535430e-02 1.27135797e-01 7.35597496e-02 8.04776400e-02 8.08645381e-02 -2.36371941e-01 -1.21337518e-01 2.99465684e-01 -4.52230928e-01 1.15666392e-01 -1.37685458e-01 2.73791619e-01 -2.64611059e-01 -2.02074239e-01 7.99144225e-01 -1.48058400e-01 9.66475243e-02 2.30492892e-01 1.98806992e-01 8.87366044e-02 1.53342230e-01 2.59374439e-01 1.66862002e-01 -7.79917430e-03 -4.17507331e-02 -1.70697758e-01 -2.02600688e-01 2.33182573e-01 -5.91029105e-03 2.85857876e-01 1.50252584e-01 1.23244483e-01 -2.15563739e-01 -2.55887540e-01 4.36759099e-01 2.11382618e-01 -1.36944361e-01 5.71780665e-02 1.81034578e-01 -1.15659092e-01 2.38599171e-01 -1.76526333e-01 -2.66821017e-01 -1.57762802e-01 -6.52438807e-01 3.03438873e-01 -1.90448989e-01 1.36690891e-01 2.56505557e-01 6.26730114e-01 1.55345690e-01 6.04323441e-02 1.54829881e-01 6.22165559e-02 1.26596759e-01 5.71306852e-01 -2.71304747e-01 -1.42866346e-02 -1.89114776e-01 -1.20380007e-01 -1.91756510e-01 3.82102600e-01 1.83512358e-01 -1.10905821e-04 2.71975305e-01 8.26141978e-01 -2.44574418e-01 8.06388655e-02 2.17388113e-01 -2.64435435e-01 -1.32786257e-01 1.75763273e-02 2.96415372e-01 -2.80826634e-01 -3.74378166e-02 -2.38737367e-01 -1.64600439e-01 -6.46687298e-01 -1.18838267e-01 2.28615551e-01 -3.60456107e-02 1.02354349e-01 7.00208990e-02 -1.01846005e-01 1.71657818e-01 -1.16689624e-01 1.92315075e-01 3.67106983e-01 2.68170847e-01 2.47345018e-01 -5.67269297e-02 -1.67791377e-01 -8.80511671e-02 1.04767538e-01 -2.11712500e-01 -1.03736686e-01 -1.24706549e-02 9.00582777e-02 5.21616787e-01 8.85927143e-02 1.83159704e-01 6.41675789e-02 -9.95466735e-02 -2.24394985e-01 -5.15215033e-01 7.20427212e-02 -3.18454183e-02 1.56768460e-01 -2.37868608e-02 -1.85783353e-03 -1.55038712e-01 -3.04176675e-02 5.07301340e-02 -9.86613048e-02 -1.88483787e-01 1.40914133e-01 2.73056131e-01 -1.24619872e-01 -2.40882670e-01 2.14382587e-01 -6.91382651e-02 6.95400661e-01 -1.76571711e-01 1.00786554e-01 -3.70676595e-01 1.37287830e-01 -4.64829169e-01 8.10677371e-02 1.13221683e-01 4.59398949e-02 1.76689141e-01 1.28926183e-01 -1.22438741e-01 2.37550251e-02 1.84000557e-01 1.70612222e-01 -9.19960364e-02 -2.50505274e-01 7.83623162e-02 -3.67610080e-01 6.35533834e-01 1.96856852e-01 2.15016094e-01 -2.64781221e-01 1.41865599e-01 -1.47284667e-01 -5.94803556e-02 -7.75125804e-02 -2.53884475e-01 1.71920020e-02 -6.69925865e-02 -1.03723733e-01 -2.25550254e-02 -2.71748762e-01 2.05700156e-01 -4.98145754e-03 -2.16777901e-01 1.25821891e-01 -2.96438880e-02 -2.19661576e-01 1.45792651e-01 1.02842153e-01 2.64636296e-01 -7.05999342e-02 9.47407554e-02 1.78405903e-01 -4.22844709e-01 -3.02994701e-02 -6.79062536e-01 -3.02442348e-01 -1.65240712e-01 1.04986135e-01 1.99250358e-01 -2.22102909e-01 -1.86846348e-01 -3.62570916e-01
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-3.09493479e-01 5.94615993e-02 1.11945979e-01 1.11396423e-01 -1.70141311e-01 -1.58883005e-01 -4.09666650e-01 2.25411924e-01 -2.55438010e-01 -1.57951469e-01 -2.05678833e-01 1.31476580e-01 4.21027042e-01 -1.89730750e-01 1.82046553e-01 -3.69912500e-02 -2.40118253e-01 -5.25553324e-02 2.61666932e-01 1.28508659e-01 8.90590356e-02 -2.24205369e-01 -1.72334348e-01 5.32329481e-02 2.17982663e-01 1.57703706e-01 -3.26094628e-02 2.83884928e-02 -5.55275181e-02 -2.25921674e-01 -1.01595526e-02 -4.84355609e-02 -2.92002581e-01 -4.20418989e-02 -5.84153308e-03 3.33071487e-02 -1.24901753e-01 -1.50873943e-01 -1.35016769e-01 -1.06144996e-01 1.71464627e-02 -4.56029097e-02 -5.23564065e-02 1.05818490e-01 2.57134324e-01 2.67736322e-01 1.53399087e-01 1.85062926e-01 -3.43421860e-01 -1.32021906e-01 -2.94071474e-01 1.72973180e-01 -1.36338914e-01 2.34655626e-01 1.17438530e-01 1.02705190e-01 -1.10883038e-01 -1.67683136e-01 2.00036841e-01 -3.32422622e-02 -1.27502033e-01 1.55422744e-01 2.15150699e-01 -2.22947482e-01 1.70398184e-01 -2.14167859e-01 -2.61530976e-01 -2.06133209e-01 -4.61596606e-01 4.77775631e-02 -1.86417033e-01 1.99552816e-01 -3.45441430e-01 5.03258537e-01 1.85684770e-01 1.86046885e-01 -4.30497707e-01 -8.94611495e-02 2.52809571e-01 3.75293153e-01 7.45091406e-02 1.71003906e-01 -2.06070055e-01 -1.58073941e-01 -2.43528408e-02 -2.57659850e-01 2.16083639e-01 1.01825940e-01 2.08191518e-01 4.83438054e-01 -2.39966915e-01 2.42315323e-01 1.92262930e-01 -2.07308698e-01 -2.27744251e-01 6.02210147e-02 -2.93185454e-02 -2.22294088e-01 -2.06995434e-01 -2.11028706e-01 -1.66563052e-01 -4.85574919e-01 -1.82388095e-01 -5.42742746e-02 -8.23135886e-01 -2.21013840e-01 -4.40681069e-02 2.12629231e-01 1.36213261e-01 1.06658047e-02 1.04830263e-03 1.44372069e-01 5.94634059e-02 2.12221010e-01 -1.14599112e-01 3.95763335e-01 -7.42433721e-02 1.55815986e-01 -3.89856348e-01 7.38338280e-01 5.84981670e-02 2.23452731e-01 -3.63964271e-01 -1.46125066e-01 1.08509729e-01 2.44283535e-01 -1.38359246e-01 -1.26746461e-01 -9.85321453e-01 -1.34348087e-01 -2.61074940e-01 1.78749183e-01 -1.18572059e-01 4.70259617e-02 -2.36366936e-01 1.35290417e-01 -1.54890058e-01 9.80860852e-02 -1.27319184e-01 5.85905727e-02 1.38498984e-01 -1.18930202e-01 -1.83147297e-01 1.77572835e-01 -1.34141367e-01 1.27452504e+00 1.84194519e-01 -2.14419861e-01 -1.65492920e-01 1.36767898e-01 1.17405707e-01 2.65861717e-02 -3.84100396e-01 3.24110383e-02 -7.35314764e-02 1.75616971e-01 2.39121538e-02 1.37870375e-01 1.78037625e-01 1.68168970e-01 -1.22840706e-01 -2.20283540e-01 1.89091101e-01 2.09259033e-01 -1.22405056e+00 1.58515297e-01 2.17919184e-01 -1.99272248e-01 -1.80702019e-01 -1.04559933e-01 -2.07485549e-01 7.96664902e-02 -2.00619006e-01 -6.98733066e-01 2.63047121e-02 -1.58938573e-01 1.89042325e-02 1.38065401e-01 1.48210255e-01 9.43055024e-02 -2.46891821e-01 -2.45973843e-01 3.59452876e-01 1.71412638e-01 -4.48088575e-02 1.95162914e-01 -1.05858767e-01 1.85876795e-01 1.71215244e-01 3.10503064e-01 -9.07120405e-01 2.01275062e-01 -4.48620514e-01 9.75750336e-02 -1.92659790e-01 1.59513384e-01 1.60265219e-01 6.44474712e-02 -1.31309510e-01 2.44154373e-01
4,ㅇㅝㄹ,ㅇㅝㄹ-2.07543174e-01 -1.01263833e-01 1.26638564e-01 9.36565856e-02 -1.52148504e-01 1.67678009e-02 3.85973799e-01 1.92930432e-01 -2.20342473e-01 -2.15635997e-01 -8.36786198e-02 1.72399451e-01 8.19512594e-02 -1.66362758e-01 1.13756677e-01 -3.09594066e-02 -2.23089370e-01 -3.50094682e-01 7.07086750e-02 6.94877104e-02 1.13086698e-01 -2.40972983e-01 -1.04089444e-01 1.45626799e-01 -5.98449794e-02 1.91787404e-01 -5.65803433e-02 -2.43458007e-01 7.71896009e-01 -1.87345933e-01 -9.98802675e-02 -2.47249012e-01 -3.25623852e-01 -3.63292452e-01 1.39537682e-01 1.21925615e-01 3.90533593e-02 -1.77712546e-01 5.14731260e-02 -7.43622051e-02 9.44772015e-02 2.17048715e-01 1.31218236e-02 1.58331938e-01 -1.00107470e-01 2.10442804e-01 -6.12985361e-02 1.86838954e-01 1.64127237e-01 1.86699033e-01 -2.51803599e-01 1.39279089e-01 3.15918240e-03 2.42418761e-01 -3.46683449e-02 1.38016214e-02 -5.36560896e-02 -1.77220349e-01 1.13158058e-01 5.90045819e-01 6.50217006e-02 5.76124963e-02 1.67716102e-01 -2.05330278e-01 2.01460131e-01 -1.93439165e-01 -2.33788064e-01 -1.93770253e-01 -3.69522025e-02 4.52333271e-01 -1.71640225e-01 1.44403648e-01 5.46992919e-01 -3.76822803e-01 1.37138204e-01 -2.42360373e-01 6.27278760e-01 1.92468103e-01 -4.25906023e-01 -1.83173940e-01 -7.22082004e-01 -2.92950938e-01 -1.54356470e-01 -1.49266241e-01 -1.21510116e-01 2.49981859e-01 1.90484674e-01 -2.24703508e-01 1.10174135e-01 3.05294448e-02 -1.74417774e-01 6.52179239e-01 2.55612784e-01 1.63754743e-01 -1.40298269e-01 -6.96529239e-02 -1.15197296e-01 -1.12457185e-01 -1.91360648e-01 -2.20644781e-01 -1.13727032e-01 -5.69626472e-01 -1.33273956e-01 -6.59166633e-04 -2.45182579e-01 2.47954587e-01 9.37162451e-02 -3.30785260e-02 1.54434188e-01 1.05433836e-01 1.61869395e-01 -4.16800339e-01 1.78003039e-01 1.08066788e-01 4.87426430e-01 -1.34939466e-01 -1.14443743e-01 1.84902759e-01 -3.18457480e-01 2.15180926e-01 2.11916558e-01 8.92627609e-02 6.60362507e-01 3.37890619e-01 9.84967576e-02 1.13256852e-01 -1.67288098e-01 -1.60780883e-01 -5.24549544e-01 6.48892316e-02 -1.97777472e-01 1.83043419e-01 -1.57161905e-01 7.47437323e-02 -2.12751595e-01 3.97028490e-02 -1.29083648e-01 -4.11200696e-01 -1.43165755e-01 -4.87978000e-02 1.62505604e-01 -1.12123034e-01 2.17820616e-02 2.00909486e-01 -9.43656613e-02 1.01302617e-01 -3.64311214e-01 5.07613794e-01 3.94370917e-02 1.66529309e-01 1.51838193e-01 7.40891890e-02 -1.71141511e-01 1.81023410e-01 -6.92126992e-02 9.02600565e-02 -1.11472314e-01 1.13758591e-01 1.71081352e-01 1.41558394e-01 -1.81943980e-01 -2.15555957e-01 -1.14436321e-01 -4.30688901e-01 2.36485530e-02 1.75774379e-01 2.62191743e-01 -1.97027935e-01 -3.73835512e-01 -1.51544333e-01 -1.20699944e-01 2.44332789e-01 -1.84968239e-01 -8.70590447e-01 -1.63512853e-01 -1.19869269e-01 -2.95490953e-01 -2.68816005e-03 1.48533264e-01 -6.81354214e-02 -1.86784827e-01 1.60469802e-01 -1.42405910e-01 -3.10615624e-01 3.25235341e-02 1.48908569e-01 7.32271722e-02 2.47296963e-02 6.81094160e-02 2.27964696e-01 -3.24181395e-01 1.42529717e-01 -1.13509342e-02 8.09770566e-02 -1.74362306e-01 1.85492757e-01 2.23699225e-01 -6.10087039e-01 7.63579517e-01 6.01198398e-02


In [25]:
np.savetxt('../KB_NLP/glove.200D.100E.txt',glove_embedding['vectors'],fmt='%s')

---
## 1000 epochs, max loss

In [31]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42,max_loss=0.0002)
glove.fit(x.tocoo(), epochs=1000, no_threads=4, verbose=True)

Performing 1000 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
Epoch 100
Epoch 101
Epoch 102
Epoch 103
Epoch 104
Epoch 105
Epo

---
# make every 10 th embeddings with 200dim

In [34]:
%%time
for k in range(10,210,10):
    glove = Glove(no_components=200, learning_rate=0.01, random_state=42,)
    glove.fit(x.tocoo(), epochs=k, no_threads=4, verbose=False)
    dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
    glove.add_dictionary(dictionary)
    
    glove_embedding = pd.DataFrame()
    glove_embedding['key'] = list(glove.dictionary.keys())
    glove_embedding['vectors'] = list(glove.word_vectors)

    glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
    glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']
    for i in tqdm(range(len(glove_embedding))):
        glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')
    np.savetxt('../KB_NLP/glove_embedding/glove.200D.{}E.txt'.format(k),glove_embedding['vectors'],fmt='%s')
    print("{}th embedding DONE".format(k))

100%|██████████| 38890/38890 [00:07<00:00, 5170.59it/s]


10th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5178.05it/s]


20th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5293.16it/s]


30th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5151.79it/s]


40th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5480.20it/s]


50th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5184.51it/s]


60th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5387.93it/s]


70th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5193.58it/s]


80th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5534.20it/s]


90th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5108.40it/s]


100th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5220.91it/s]


110th embedding DONE


100%|██████████| 38890/38890 [00:06<00:00, 5618.49it/s]


120th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5166.17it/s]


130th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5455.86it/s]


140th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5236.72it/s]


150th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5522.14it/s]


160th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5152.22it/s]


170th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5440.41it/s]


180th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5276.32it/s]


190th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5478.10it/s]


200th embedding DONE
CPU times: user 1h 57min 17s, sys: 8.18 s, total: 1h 57min 26s
Wall time: 41min 18s


---
## 1000 epochs

In [16]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42,)
glove.fit(x.tocoo(), epochs=1000, no_threads=4, verbose=True)

Performing 100 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
CPU times: user 5min 2s, sys: 47 ms, total: 5min 2s
Wall time: 1

In [22]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)
print("shape: {}".format(glove.word_vectors.shape))

glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)

glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']

In [23]:
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')

100%|██████████| 38890/38890 [00:07<00:00, 5394.15it/s]


In [24]:
glove_embedding.head()

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.04014644 0.14591996 0.21770952 0.27505619 -0.21774962 -0.3299259 -0.34875664 0.15955163 -0.30599767 -0.09654434 -0.36265585 0.36642118 0.58957813 -0.25477741 -0.77548881 -0.16812614 -0.30386485 0.3014517 0.11541772 0.23872045 0.23607387 -0.32231933 0.65029271 -0.57035232 0.3518528 0.2721458 0.29503168 -0.24414353 0.24229787 -0.27020786 0.02250393 -0.64149403 -0.18691672 0.57878591 0.04031945 0.25008333 -0.11378724 -0.24900752 -0.36675543 -0.16945304 -0.26521683 0.47874288 -0.39373579 0.35531035 -0.0364649 0.24267091 0.28361149 0.0727594 -0.18305883 0.18589462 0.01891854 0.31339466 0.2110837 0.2616362 0.1850178 0.30698852 -0.34319437 -0.33101811 -0.0366438 -0.10888972 -0.33897783 -0.18377894 0.22498183 -0.16396334 0.17689501 -0.09917777 -0.23181384 -0.07816078 -0.32654061 0.39174611 0.09102456 0.05801771 -0.5810237 -0.15678637 0.12635572 0.00321863 0.06294066 -0.35182788 -0.04854145 -0.12647429 0.35109869 0.40514648 -0.21645254 -0.19755682 -0.32289444 -0.30027708 0.34837958 -0.04615759 0.35337252 -0.06437188 -0.30470855 0.79435022 0.25375599 -0.39471896 -0.04646276 0.28683884 0.56763656 -0.34304805 -0.13901353 -0.25175754 -0.36208369 0.05205259 -0.22300126 0.2905658 -0.36620431 -0.54267439 -0.55635134 -0.13487925 0.12809894 -0.27533946 -0.09402707 -0.13418316 0.31926451 0.25038203 0.56734689 0.50725485 0.13491343 -0.09576212 0.56955517 0.04188273 -0.26275488 0.27024855 -0.4486555 -0.37352145 -0.07066747 0.19620842 -0.05598441 -0.13595502 0.37554276 -0.24108017 -0.14587224 -0.02038195 -0.28231163 0.27694228 0.03949041 -0.36484638 0.25809373 0.03587891 -0.25194114 0.38679681 0.3846123 -0.36218281 -0.54193586 0.05941491 -0.22653054 -0.19972402 0.68535539 -0.20749228 -0.20579772 0.10440364 0.28986191 -0.41898958 -0.41396729 -0.21446917 -0.01976362 -0.33942792 0.55116389 0.35188357 0.22111976 0.29210891 -0.15658744 -0.19720351 0.36857953 0.45014699 -0.14041504 0.18623169 0.14295688 -0.2884932 -0.26274614 -0.16985756 -0.02450327 -0.59542114 -0.22030671 -0.15137482 0.10464277 -0.2418877 0.27974102 0.15631665 0.33281475 -0.11659845 -0.13164111 -0.41712243 0.2061083 0.58005565 -0.50544492 0.27397353 0.23080702 0.3383612 0.28207726 0.05866299 0.24901911 0.72161691 0.10578238 -0.03012658 -0.1668963 0.1184556 0.24818298 0.11677119 0.49216082 -0.1081244
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.1572792 -0.08054372 -0.27841671 0.21973004 0.13457085 -0.27744223 -0.40084188 0.30184945 -0.37914002 -0.26272903 -0.16365064 0.11286745 0.38496305 -0.33934461 -0.37365978 -0.29523862 -0.3034842 -0.08834723 -0.07348613 0.1021175 0.12998961 -0.30811428 -0.39247292 0.01906075 -0.07931723 0.05273994 0.6745661 -0.50186203 0.77431466 -0.23300256 -0.07766038 0.11865698 -0.09071159 0.1243992 -0.03220683 -0.39791699 -0.14834166 -0.32564888 0.32238752 -0.19313827 0.18513157 0.36449502 0.63571523 0.04075512 0.08497423 0.34870121 -0.08026315 0.2484782 0.15154243 0.14718999 -0.21447081 0.12459898 0.75769977 0.25016583 0.09443411 0.23656591 0.13067121 -0.20097059 0.20186888 -0.33050099 -0.03671781 0.18318971 0.07951071 -0.21976909 0.29548061 -0.14395485 -0.30520211 -0.2166729 -0.43158454 -0.0533822 -0.06413475 0.27059307 0.30961291 0.66331317 0.25816638 -0.01067129 0.68257689 -0.27374658 -0.04536977 0.74422488 0.35760636 -0.03790119 -0.24207401 -0.18475701 0.04828209 -0.30084225 0.19306438 0.11154074 0.20647254 0.53436873 -0.29766159 -0.2799169 0.28007286 -0.26512236 -0.24766068 -0.01053994 0.5284233 -0.31237856 -0.05439603 -0.33908246 -0.20242762 0.00664636 -0.13741498 0.03119037 -0.39012542 0.31199905 0.10927213 0.19657683 0.20031728 0.01557362 0.17524775 -0.10757874 0.18476346 0.25980472 0.09886934 0.30814531 -0.30672309 -0.00927756 -0.60975119 -0.14668808 0.03265322 0.19673261 -0.03530028 -0.11285591 0.14772563 0.27864426 -0.10311523 -0.21172293 -0.35814708 0.1042695 -0.03706134 0.19600204 -0.24872752 -0.23623717 -0.27154828 -0.53309937 0.11644491 -0.108796 -0.11860072 -0.12478401 0.17179854 0.0179788 0.06609469 0.03399539 -0.16891539 0.65338818 -0.06689487 -0.41351475 -0.11247675 0.22247287 -0.05430595 0.15229692 -0.1267627 -0.33409057 -0.2822526 0.19294731 -0.09861426 -0.03568986 0.28677243 0.06149134 -0.08263764 -0.26492316 0.36749904 0.42704584 0.42509172 0.1097473 0.35410968 -0.12505456 -0.59345382 -0.27992716 -0.18356801 -0.32856005 -0.27886826 0.01089768 -0.1810331 -0.14253432 0.12018746 -0.2276067 0.25153082 -0.01316961 -0.2637732 -0.27916181 0.05193879 0.28711377 0.12331934 0.1486321 0.02538425 -0.15962544 0.22544001 0.1950349 0.28615124 0.35690848 -0.43954099 -0.11264198 -0.22533619 0.26111029 0.31871354 -0.00189822 0.3384831 0.29622957
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-1.59774535e-01 1.35601894e-02 -3.62222175e-02 1.68584011e-01 -1.31787714e-01 -1.24297844e-01 2.00228608e-01 2.06780512e-01 -2.54143463e-01 -2.77011122e-01 -1.53367849e-01 1.52472556e-01 2.46109757e-01 -2.62180461e-01 1.58545554e-02 -9.79784456e-02 -2.73107237e-01 -5.61198686e-01 -3.04103621e-01 5.30389583e-03 1.84090388e-01 -2.44868792e-01 -4.22205325e-02 5.41844292e-02 -3.17535430e-02 1.27135797e-01 7.35597496e-02 8.04776400e-02 8.08645381e-02 -2.36371941e-01 -1.21337518e-01 2.99465684e-01 -4.52230928e-01 1.15666392e-01 -1.37685458e-01 2.73791619e-01 -2.64611059e-01 -2.02074239e-01 7.99144225e-01 -1.48058400e-01 9.66475243e-02 2.30492892e-01 1.98806992e-01 8.87366044e-02 1.53342230e-01 2.59374439e-01 1.66862002e-01 -7.79917430e-03 -4.17507331e-02 -1.70697758e-01 -2.02600688e-01 2.33182573e-01 -5.91029105e-03 2.85857876e-01 1.50252584e-01 1.23244483e-01 -2.15563739e-01 -2.55887540e-01 4.36759099e-01 2.11382618e-01 -1.36944361e-01 5.71780665e-02 1.81034578e-01 -1.15659092e-01 2.38599171e-01 -1.76526333e-01 -2.66821017e-01 -1.57762802e-01 -6.52438807e-01 3.03438873e-01 -1.90448989e-01 1.36690891e-01 2.56505557e-01 6.26730114e-01 1.55345690e-01 6.04323441e-02 1.54829881e-01 6.22165559e-02 1.26596759e-01 5.71306852e-01 -2.71304747e-01 -1.42866346e-02 -1.89114776e-01 -1.20380007e-01 -1.91756510e-01 3.82102600e-01 1.83512358e-01 -1.10905821e-04 2.71975305e-01 8.26141978e-01 -2.44574418e-01 8.06388655e-02 2.17388113e-01 -2.64435435e-01 -1.32786257e-01 1.75763273e-02 2.96415372e-01 -2.80826634e-01 -3.74378166e-02 -2.38737367e-01 -1.64600439e-01 -6.46687298e-01 -1.18838267e-01 2.28615551e-01 -3.60456107e-02 1.02354349e-01 7.00208990e-02 -1.01846005e-01 1.71657818e-01 -1.16689624e-01 1.92315075e-01 3.67106983e-01 2.68170847e-01 2.47345018e-01 -5.67269297e-02 -1.67791377e-01 -8.80511671e-02 1.04767538e-01 -2.11712500e-01 -1.03736686e-01 -1.24706549e-02 9.00582777e-02 5.21616787e-01 8.85927143e-02 1.83159704e-01 6.41675789e-02 -9.95466735e-02 -2.24394985e-01 -5.15215033e-01 7.20427212e-02 -3.18454183e-02 1.56768460e-01 -2.37868608e-02 -1.85783353e-03 -1.55038712e-01 -3.04176675e-02 5.07301340e-02 -9.86613048e-02 -1.88483787e-01 1.40914133e-01 2.73056131e-01 -1.24619872e-01 -2.40882670e-01 2.14382587e-01 -6.91382651e-02 6.95400661e-01 -1.76571711e-01 1.00786554e-01 -3.70676595e-01 1.37287830e-01 -4.64829169e-01 8.10677371e-02 1.13221683e-01 4.59398949e-02 1.76689141e-01 1.28926183e-01 -1.22438741e-01 2.37550251e-02 1.84000557e-01 1.70612222e-01 -9.19960364e-02 -2.50505274e-01 7.83623162e-02 -3.67610080e-01 6.35533834e-01 1.96856852e-01 2.15016094e-01 -2.64781221e-01 1.41865599e-01 -1.47284667e-01 -5.94803556e-02 -7.75125804e-02 -2.53884475e-01 1.71920020e-02 -6.69925865e-02 -1.03723733e-01 -2.25550254e-02 -2.71748762e-01 2.05700156e-01 -4.98145754e-03 -2.16777901e-01 1.25821891e-01 -2.96438880e-02 -2.19661576e-01 1.45792651e-01 1.02842153e-01 2.64636296e-01 -7.05999342e-02 9.47407554e-02 1.78405903e-01 -4.22844709e-01 -3.02994701e-02 -6.79062536e-01 -3.02442348e-01 -1.65240712e-01 1.04986135e-01 1.99250358e-01 -2.22102909e-01 -1.86846348e-01 -3.62570916e-01
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-3.09493479e-01 5.94615993e-02 1.11945979e-01 1.11396423e-01 -1.70141311e-01 -1.58883005e-01 -4.09666650e-01 2.25411924e-01 -2.55438010e-01 -1.57951469e-01 -2.05678833e-01 1.31476580e-01 4.21027042e-01 -1.89730750e-01 1.82046553e-01 -3.69912500e-02 -2.40118253e-01 -5.25553324e-02 2.61666932e-01 1.28508659e-01 8.90590356e-02 -2.24205369e-01 -1.72334348e-01 5.32329481e-02 2.17982663e-01 1.57703706e-01 -3.26094628e-02 2.83884928e-02 -5.55275181e-02 -2.25921674e-01 -1.01595526e-02 -4.84355609e-02 -2.92002581e-01 -4.20418989e-02 -5.84153308e-03 3.33071487e-02 -1.24901753e-01 -1.50873943e-01 -1.35016769e-01 -1.06144996e-01 1.71464627e-02 -4.56029097e-02 -5.23564065e-02 1.05818490e-01 2.57134324e-01 2.67736322e-01 1.53399087e-01 1.85062926e-01 -3.43421860e-01 -1.32021906e-01 -2.94071474e-01 1.72973180e-01 -1.36338914e-01 2.34655626e-01 1.17438530e-01 1.02705190e-01 -1.10883038e-01 -1.67683136e-01 2.00036841e-01 -3.32422622e-02 -1.27502033e-01 1.55422744e-01 2.15150699e-01 -2.22947482e-01 1.70398184e-01 -2.14167859e-01 -2.61530976e-01 -2.06133209e-01 -4.61596606e-01 4.77775631e-02 -1.86417033e-01 1.99552816e-01 -3.45441430e-01 5.03258537e-01 1.85684770e-01 1.86046885e-01 -4.30497707e-01 -8.94611495e-02 2.52809571e-01 3.75293153e-01 7.45091406e-02 1.71003906e-01 -2.06070055e-01 -1.58073941e-01 -2.43528408e-02 -2.57659850e-01 2.16083639e-01 1.01825940e-01 2.08191518e-01 4.83438054e-01 -2.39966915e-01 2.42315323e-01 1.92262930e-01 -2.07308698e-01 -2.27744251e-01 6.02210147e-02 -2.93185454e-02 -2.22294088e-01 -2.06995434e-01 -2.11028706e-01 -1.66563052e-01 -4.85574919e-01 -1.82388095e-01 -5.42742746e-02 -8.23135886e-01 -2.21013840e-01 -4.40681069e-02 2.12629231e-01 1.36213261e-01 1.06658047e-02 1.04830263e-03 1.44372069e-01 5.94634059e-02 2.12221010e-01 -1.14599112e-01 3.95763335e-01 -7.42433721e-02 1.55815986e-01 -3.89856348e-01 7.38338280e-01 5.84981670e-02 2.23452731e-01 -3.63964271e-01 -1.46125066e-01 1.08509729e-01 2.44283535e-01 -1.38359246e-01 -1.26746461e-01 -9.85321453e-01 -1.34348087e-01 -2.61074940e-01 1.78749183e-01 -1.18572059e-01 4.70259617e-02 -2.36366936e-01 1.35290417e-01 -1.54890058e-01 9.80860852e-02 -1.27319184e-01 5.85905727e-02 1.38498984e-01 -1.18930202e-01 -1.83147297e-01 1.77572835e-01 -1.34141367e-01 1.27452504e+00 1.84194519e-01 -2.14419861e-01 -1.65492920e-01 1.36767898e-01 1.17405707e-01 2.65861717e-02 -3.84100396e-01 3.24110383e-02 -7.35314764e-02 1.75616971e-01 2.39121538e-02 1.37870375e-01 1.78037625e-01 1.68168970e-01 -1.22840706e-01 -2.20283540e-01 1.89091101e-01 2.09259033e-01 -1.22405056e+00 1.58515297e-01 2.17919184e-01 -1.99272248e-01 -1.80702019e-01 -1.04559933e-01 -2.07485549e-01 7.96664902e-02 -2.00619006e-01 -6.98733066e-01 2.63047121e-02 -1.58938573e-01 1.89042325e-02 1.38065401e-01 1.48210255e-01 9.43055024e-02 -2.46891821e-01 -2.45973843e-01 3.59452876e-01 1.71412638e-01 -4.48088575e-02 1.95162914e-01 -1.05858767e-01 1.85876795e-01 1.71215244e-01 3.10503064e-01 -9.07120405e-01 2.01275062e-01 -4.48620514e-01 9.75750336e-02 -1.92659790e-01 1.59513384e-01 1.60265219e-01 6.44474712e-02 -1.31309510e-01 2.44154373e-01
4,ㅇㅝㄹ,ㅇㅝㄹ-2.07543174e-01 -1.01263833e-01 1.26638564e-01 9.36565856e-02 -1.52148504e-01 1.67678009e-02 3.85973799e-01 1.92930432e-01 -2.20342473e-01 -2.15635997e-01 -8.36786198e-02 1.72399451e-01 8.19512594e-02 -1.66362758e-01 1.13756677e-01 -3.09594066e-02 -2.23089370e-01 -3.50094682e-01 7.07086750e-02 6.94877104e-02 1.13086698e-01 -2.40972983e-01 -1.04089444e-01 1.45626799e-01 -5.98449794e-02 1.91787404e-01 -5.65803433e-02 -2.43458007e-01 7.71896009e-01 -1.87345933e-01 -9.98802675e-02 -2.47249012e-01 -3.25623852e-01 -3.63292452e-01 1.39537682e-01 1.21925615e-01 3.90533593e-02 -1.77712546e-01 5.14731260e-02 -7.43622051e-02 9.44772015e-02 2.17048715e-01 1.31218236e-02 1.58331938e-01 -1.00107470e-01 2.10442804e-01 -6.12985361e-02 1.86838954e-01 1.64127237e-01 1.86699033e-01 -2.51803599e-01 1.39279089e-01 3.15918240e-03 2.42418761e-01 -3.46683449e-02 1.38016214e-02 -5.36560896e-02 -1.77220349e-01 1.13158058e-01 5.90045819e-01 6.50217006e-02 5.76124963e-02 1.67716102e-01 -2.05330278e-01 2.01460131e-01 -1.93439165e-01 -2.33788064e-01 -1.93770253e-01 -3.69522025e-02 4.52333271e-01 -1.71640225e-01 1.44403648e-01 5.46992919e-01 -3.76822803e-01 1.37138204e-01 -2.42360373e-01 6.27278760e-01 1.92468103e-01 -4.25906023e-01 -1.83173940e-01 -7.22082004e-01 -2.92950938e-01 -1.54356470e-01 -1.49266241e-01 -1.21510116e-01 2.49981859e-01 1.90484674e-01 -2.24703508e-01 1.10174135e-01 3.05294448e-02 -1.74417774e-01 6.52179239e-01 2.55612784e-01 1.63754743e-01 -1.40298269e-01 -6.96529239e-02 -1.15197296e-01 -1.12457185e-01 -1.91360648e-01 -2.20644781e-01 -1.13727032e-01 -5.69626472e-01 -1.33273956e-01 -6.59166633e-04 -2.45182579e-01 2.47954587e-01 9.37162451e-02 -3.30785260e-02 1.54434188e-01 1.05433836e-01 1.61869395e-01 -4.16800339e-01 1.78003039e-01 1.08066788e-01 4.87426430e-01 -1.34939466e-01 -1.14443743e-01 1.84902759e-01 -3.18457480e-01 2.15180926e-01 2.11916558e-01 8.92627609e-02 6.60362507e-01 3.37890619e-01 9.84967576e-02 1.13256852e-01 -1.67288098e-01 -1.60780883e-01 -5.24549544e-01 6.48892316e-02 -1.97777472e-01 1.83043419e-01 -1.57161905e-01 7.47437323e-02 -2.12751595e-01 3.97028490e-02 -1.29083648e-01 -4.11200696e-01 -1.43165755e-01 -4.87978000e-02 1.62505604e-01 -1.12123034e-01 2.17820616e-02 2.00909486e-01 -9.43656613e-02 1.01302617e-01 -3.64311214e-01 5.07613794e-01 3.94370917e-02 1.66529309e-01 1.51838193e-01 7.40891890e-02 -1.71141511e-01 1.81023410e-01 -6.92126992e-02 9.02600565e-02 -1.11472314e-01 1.13758591e-01 1.71081352e-01 1.41558394e-01 -1.81943980e-01 -2.15555957e-01 -1.14436321e-01 -4.30688901e-01 2.36485530e-02 1.75774379e-01 2.62191743e-01 -1.97027935e-01 -3.73835512e-01 -1.51544333e-01 -1.20699944e-01 2.44332789e-01 -1.84968239e-01 -8.70590447e-01 -1.63512853e-01 -1.19869269e-01 -2.95490953e-01 -2.68816005e-03 1.48533264e-01 -6.81354214e-02 -1.86784827e-01 1.60469802e-01 -1.42405910e-01 -3.10615624e-01 3.25235341e-02 1.48908569e-01 7.32271722e-02 2.47296963e-02 6.81094160e-02 2.27964696e-01 -3.24181395e-01 1.42529717e-01 -1.13509342e-02 8.09770566e-02 -1.74362306e-01 1.85492757e-01 2.23699225e-01 -6.10087039e-01 7.63579517e-01 6.01198398e-02


In [25]:
np.savetxt('../KB_NLP/glove.200D.1000E.txt',glove_embedding['vectors'],fmt='%s')