ref: https://www.kaggle.com/hamishdickson/bidirectional-lstm-in-keras-with-glove-embeddings

ref: https://lovit.github.io/nlp/representation/2018/09/05/glove/

In [1]:
import pandas as pd
import time
import os, gc
import numpy as np
from tqdm import tqdm
import random

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import *
from keras.utils.np_utils import to_categorical
from keras.initializers import Constant
import re

import matplotlib.pyplot as plt
%matplotlib inline

import torch

Using TensorFlow backend.


In [2]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(42)

In [3]:
%%time
#train_df=pd.read_csv("../KB_NLP/morphs/komo_morphs_train.csv")
#test_df=pd.read_csv("../KB_NLP/morphs/komo_morphs_test.csv")
train_df=pd.read_csv("../KB_NLP/raw_data/train.csv")
test_df=pd.read_csv("../KB_NLP/raw_data/public_test.csv")

CPU times: user 906 ms, sys: 61.3 ms, total: 968 ms
Wall time: 967 ms


In [4]:
pd.set_option('display.max_colwidth',-1)
train_df.head()

Unnamed: 0,id,year_month,text,smishing
0,0,2017-01,XXX은행성산XXX팀장입니다.행복한주말되세요,0
1,1,2017-01,오늘도많이웃으시는하루시작하세요XXX은행 진월동VIP라운지 XXX올림,0
2,2,2017-01,안녕하십니까 고객님. XXX은행입니다.금일 납부하셔야 할 금액은 153600원 입니다.감사합니다. 새해 복 많이 받으십시오.XXX은행옥포XXX올림,0
3,4,2017-01,XXX 고객님안녕하세요XXX은행 XXX지점입니다지난 한 해 동안 저희 XXX지점에 보내주신 성원에 감사드립니다. 설렘으로 시작한 2017년소망하시는 일 모두 이XXX 고객님의 가정에 늘 건강과 행복이 함께하길 기원하겠습니다. 사랑하는 가족과 함께 정을 나누는 행복한 설 명절 보내세요 XXX은행 XXX지점직원일동,0
4,5,2017-01,1월은 새로움이 가득XXX입니다.올 한해 더 많이행복한 한해되시길바랍니다,0


In [5]:
from soynlp.hangle import decompose

doublespace_pattern = re.compile('\s+')

def jamo_sentence(sent):

    def transform(char):
        if char == ' ':
            return char
        cjj = decompose(char)
        try:
            len(cjj)
        except:
            return ' '
        if len(cjj) == 1:
            return cjj
        cjj_ = ''.join(c if c != ' ' else '' for c in cjj)
        return cjj_

    sent_ = ''.join(transform(char) for char in sent)
    sent_ = doublespace_pattern.sub(' ', sent_)
    return sent_
# 'ㅇㅓ-ㅇㅣ-ㄱㅗ- ㅋㅔㄱㅋㅔㄱ ㅇㅏ-ㅇㅣ-ㄱㅗ-ㅇㅗ-'

jamo_sentence(train_df.loc[0, 'text'])

' ㅇㅡㄴㅎㅐㅇㅅㅓㅇㅅㅏㄴ ㅌㅣㅁㅈㅏㅇㅇㅣㅂㄴㅣㄷㅏ ㅎㅐㅇㅂㅗㄱㅎㅏㄴㅈㅜㅁㅏㄹㄷㅚㅅㅔㅇㅛ'

In [6]:
%%time
test_df['jamo'] = test_df['text'].apply(lambda x: jamo_sentence(x))
train_df['jamo'] = train_df['text'].apply(lambda x: jamo_sentence(x))

CPU times: user 1min, sys: 276 ms, total: 1min
Wall time: 1min


In [7]:
%%time
train_df.to_csv("../KB_NLP/jamo_data/jamo_train.csv", index=False)
test_df.to_csv("../KB_NLP/jamo_data/jamo_test.csv", index=False)

CPU times: user 2.83 s, sys: 196 ms, total: 3.02 s
Wall time: 5.74 s


In [8]:
raw_corpus_fname = '\n'.join(pd.concat([train_df['jamo'], test_df['jamo']]))
file=open('../KB_NLP/jamo_text.txt','w')
file.write(raw_corpus_fname)
file.close()

In [9]:
jamo_jumo = pd.concat([train_df['jamo'], test_df['jamo']])

In [10]:
input_sentences = list(jamo_jumo)

### window 1

In [11]:
from soynlp.utils import DoublespaceLineCorpus
from soynlp.vectorizer import sent_to_word_contexts_matrix

#corpus_path = '2016-10-20_article_all_normed_ltokenize.txt'
#corpus = DoublespaceLineCorpus(corpus_path, iter_sent=True)

x, idx2vocab = sent_to_word_contexts_matrix(
    input_sentences,
    windows=1,
    min_tf=10,
    tokenizer=lambda x:x.split(), # (default) lambda x:x.split(),
    dynamic_weight=True,
    verbose=True)

print(x.shape) # (36002, 36002)

Create (word, contexts) matrix
  - counting word frequency from 297570 sents, mem=1.015 Gb
  - scanning (word, context) pairs from 297570 sents, mem=1.133 Gb
  - (word, context) matrix was constructed. shape = (38890, 38890)                    
  - done
(38890, 38890)


In [12]:
from glove import Corpus, Glove

In [13]:
glove = Glove(no_components=200, learning_rate=0.01, random_state=42)
glove.fit(x.tocoo(), epochs=5, no_threads=4, verbose=True)

Performing 5 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4


In [14]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)

In [15]:
glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)
glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')
glove_embedding.head()

100%|██████████| 38890/38890 [00:07<00:00, 5092.39it/s]


Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.26324613 -0.02368496 0.17138936 0.31919709 -0.00689488 -0.44605252 -0.48537465 0.31311969 -0.27306892 -0.3357058 -0.45830763 0.43211633 0.47088084 -0.39937638 0.08947951 -0.40416604 -0.45353923 0.32876612 0.36043311 0.19995081 0.38421529 -0.39928572 0.15184268 0.01312085 -0.06739303 0.3669841 -0.3326004 0.20394546 -0.3291216 -0.30769685 -0.19931577 -0.2018717 -0.39794443 0.18234525 0.34545889 0.22885314 0.14080159 -0.42226852 0.44131197 -0.27124807 0.26018056 0.48561258 -0.44012502 0.40871661 -0.21198955 0.32536252 0.20979982 0.40596122 -0.2680429 -0.35655377 -0.16324139 0.35765051 0.63067254 0.33770794 -0.40297928 0.40558892 -0.42941914 -0.42061108 -0.32547986 -0.37210191 -0.35845763 0.32935819 0.0761545 -0.31737562 0.3506474 -0.25521387 -0.3401427 -0.30469031 -0.47357609 0.28129283 0.25701576 -0.38337743 0.20355264 0.35353469 0.35608287 -0.18183028 0.58787545 -0.11258498 0.1264746 -0.44819501 0.30213598 0.23646804 -0.34526207 -0.41680279 -0.32883283 -0.48248721 0.26442236 -0.07552682 0.36177159 0.10534863 -0.38082651 -0.08936218 0.32863151 -0.4269309 -0.28959362 0.2240692 -0.23371203 -0.4738351 -0.31996831 -0.43013864 -0.45742153 0.49453867 0.10599406 0.22118925 0.39092379 -0.45031061 -0.52173794 0.35475314 0.32576598 -0.14111253 0.30286463 -0.43283955 0.32750013 0.36729262 0.47768621 0.39781853 -0.1007444 0.09847619 0.44335116 -0.34536784 0.29290384 0.44129907 0.2028668 -0.24157026 -0.02368141 0.30631031 0.20524209 -0.1190077 -0.52472569 0.25885506 -0.3380466 0.37261 -0.42373552 0.28293189 -0.15180832 -0.02618632 -0.21165631 0.43343501 -0.22762538 -0.13681001 0.48857164 -0.3827457 0.40211896 0.26774224 -0.24846846 -0.32445273 0.5817532 -0.43216049 -0.36291881 0.47065761 0.37897837 0.25206909 -0.11672433 0.11441597 -0.22376758 0.16660629 0.64783438 0.30849193 0.09992503 0.34579599 -0.23138357 -0.23267252 -0.31017169 -0.10552328 -0.47186511 0.37932072 0.3544638 -0.3718918 -0.47914602 -0.20357382 -0.07177603 -0.08657376 -0.37860146 -0.28813909 -0.22686767 -0.38728035 0.10554038 -0.31160923 0.36114391 -0.29160718 -0.42616864 -0.03577871 -0.21910479 0.50787824 -0.12274066 0.27736672 0.4060941 -0.11171136 0.16975818 0.24908089 -0.43790707 0.61148535 0.49996092 0.32663876 -0.32662898 0.2597609 0.36299223 0.37074482 0.4155068 0.35441925
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.17145332 -0.20260989 0.11733715 0.28826444 0.21289365 -0.27726599 -0.29386979 0.30689655 -0.35514082 -0.3096952 -0.21365526 0.25867431 0.10199709 -0.37072184 0.15710515 -0.28837153 -0.27488793 0.22558779 0.23741683 0.15106138 0.12315886 -0.32122207 0.03742321 0.11340612 -0.24253265 0.21370225 -0.18026904 0.09764091 -0.1209306 -0.17731586 -0.06910689 -0.02806085 -0.1393988 0.12302363 -0.09050706 0.16008655 -0.17265485 -0.31220133 0.17969053 -0.14110015 0.21758027 0.10329363 -0.12288101 0.12494619 -0.17842359 0.32303197 0.0910653 0.28906552 0.0307323 -0.23746819 -0.27590243 0.23001134 0.22142613 0.28695002 0.12641401 0.25293844 -0.05722432 -0.33271881 -0.12802003 -0.24565919 -0.17858802 0.2689243 0.06282523 -0.16481707 0.36932831 -0.22648502 -0.31342403 -0.18730119 -0.13730998 -0.10937504 -0.12669457 0.02423994 0.23199514 0.27702759 0.27204582 0.00619051 0.05907741 0.11715736 0.0679795 -0.10779382 0.09219567 0.21890166 -0.25774489 -0.35416836 -0.20610571 -0.17342196 0.18165003 -0.1177088 0.19645612 0.12174515 -0.29548975 -0.1573005 0.2309999 -0.04933441 -0.22904556 0.19482473 -0.06454602 -0.28815669 -0.10041897 -0.32849347 -0.20360489 0.03762246 -0.04481123 0.1481647 0.24682075 -0.07884117 -0.09849821 0.05206405 0.26368936 -0.09566898 0.23993819 -0.21160782 0.15622702 0.22002858 0.19215324 0.21632498 -0.21706278 0.25877331 0.12869082 -0.18418742 0.11543104 0.19500451 0.13231774 -0.2090333 0.17654631 0.27724033 -0.13496149 -0.21652007 -0.06255342 0.1992889 -0.18175081 0.24989826 -0.29987899 -0.12544393 -0.30784739 -0.22598934 -0.1710232 0.14685071 -0.18917296 -0.25185269 0.25686527 -0.2184126 0.17525151 0.26052604 -0.16317637 -0.17620178 0.19003763 -0.18814974 -0.10239305 0.20730792 0.04771089 0.23857351 0.09785246 0.1572621 -0.17258684 0.19735532 0.02217344 0.14823813 0.13418688 0.19021413 -0.13435891 -0.28203366 -0.17931479 -0.13533593 -0.08740907 0.25516609 0.37633948 -0.24324664 -0.06832395 -0.23715256 -0.15883899 -0.01688594 -0.32035701 -0.07739023 -0.18418824 -0.22344438 -0.08728018 -0.24519748 0.17541857 -0.13310876 -0.35043493 0.03039741 -0.19006063 0.18292318 0.12996435 0.27914331 0.17265499 -0.0480151 0.12370499 0.22934302 -0.10676347 0.06333509 0.0292857 0.19234541 -0.27712132 0.26996036 0.27293942 0.12452954 -0.08144105 0.25751979
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-0.19711473 -0.15685196 0.14675567 0.18522787 -0.07251143 -0.12086552 -0.03566327 0.20581509 -0.2265857 -0.24614421 -0.16614977 0.16128926 0.06115056 -0.247925 0.16706373 -0.12995194 -0.20943487 -0.1553959 0.14552586 0.10597462 0.10809735 -0.21652429 0.04154649 0.17362894 -0.17551466 0.18241562 -0.17320933 0.15604515 -0.10885653 -0.17502993 -0.12849105 0.12024588 -0.09854627 0.02638896 -0.16946104 0.08675677 -0.02280882 -0.15004523 0.10501312 -0.1391208 0.14858632 0.04250946 -0.09782802 0.11952316 -0.11660619 0.21782301 0.15018493 0.16755575 0.05645631 -0.21169199 -0.22346208 0.13225025 -0.11671326 0.22142649 0.17691024 0.18220139 -0.0342423 -0.21929658 -0.08278642 0.04605617 -0.10990707 0.16085833 0.14296336 -0.10719035 0.23886036 -0.18200083 -0.2413008 -0.1610979 -0.06293719 -0.05484051 -0.19638071 0.14427161 0.15255253 0.16350203 0.17295031 0.09300142 -0.14520714 0.09389342 0.02634006 -0.08307828 0.00506017 0.13190105 -0.17583118 -0.21364485 -0.16677637 0.02593512 0.13979314 -0.10636901 0.16277148 -0.02491427 -0.20434812 -0.12823588 0.16700889 -0.02032577 -0.17971615 0.1097937 -0.09092894 -0.21687376 -0.08211326 -0.20137008 -0.07614282 -0.00416991 -0.12239826 0.15159457 0.17045369 -0.11826507 0.16599087 -0.04395105 0.17774665 -0.06863496 0.2026472 0.00790946 0.14768235 0.17800253 -0.06969423 -0.07160644 -0.18216774 0.21618479 0.07515537 -0.09910635 0.07743201 0.11426842 0.08647166 -0.04212301 0.20793375 0.1177222 -0.20086674 -0.20430352 -0.02031121 0.17374232 -0.13612661 0.2015606 -0.17237436 -0.043516 -0.2008721 0.02540192 -0.15708805 0.09567654 -0.15931822 -0.16532445 0.20337457 -0.15641832 0.11982888 0.2194372 -0.09826726 -0.03483229 -0.13099524 0.04179335 -0.05667275 0.17017843 0.01127302 0.17524342 0.13227134 0.18508527 -0.06217227 0.189266 -0.18379495 0.12008732 0.14123432 0.14622571 -0.0793059 -0.23465079 -0.14801446 -0.14941387 -0.07441821 0.19122188 0.23575173 -0.21793787 0.01251672 -0.1924305 -0.15277377 0.14131818 -0.21799403 0.01873796 -0.0865422 -0.12569851 -0.02774587 -0.05906566 0.06081243 -0.13728987 -0.24071345 0.13544047 -0.14407588 -0.00359999 0.17087897 0.15974346 0.0595134 -0.12555912 0.07481879 0.20547827 -0.11874975 -0.06934711 -0.01426847 0.09765574 -0.19920432 0.18492426 0.17014705 0.02760224 -0.2106648 0.14815665
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-0.20379878 -0.12089045 0.15687387 0.19867295 -0.07307875 -0.19487704 -0.11878828 0.22154134 -0.22689891 -0.24129295 -0.21121445 0.20394334 0.15074092 -0.2589028 0.17261487 -0.159314 -0.24400658 -0.08713579 0.19276928 0.11462024 0.13537659 -0.24201089 0.04832619 0.13643689 -0.15157758 0.20508975 -0.1906767 0.16954319 -0.14981122 -0.18975434 -0.12710396 0.08961673 -0.16770907 0.02152015 -0.07768399 0.09044737 -0.02159572 -0.17779025 0.12210749 -0.12509444 0.16229182 0.09507222 -0.13972536 0.1606637 -0.12723997 0.23037439 0.16155376 0.18921952 -0.09356552 -0.23013892 -0.21802789 0.14811406 0.03195109 0.23095243 0.0410744 0.20903613 -0.10019431 -0.22778162 -0.10051301 -0.02136175 -0.1478807 0.19466657 0.12667041 -0.16833011 0.22717892 -0.18156278 -0.24730807 -0.20270037 -0.12831363 -0.03438875 -0.11392102 0.05479974 0.14180159 0.19729523 0.20495329 0.10094704 -0.03211071 0.06666422 0.07149207 -0.16076222 0.11659976 0.13922108 -0.21050106 -0.23778294 -0.16580508 -0.124822 0.1705927 -0.0795621 0.18248735 0.0325956 -0.22085297 -0.12370215 0.17836735 -0.12146729 -0.19215869 0.11944296 -0.11119389 -0.24977961 -0.12148157 -0.23489769 -0.14493504 0.1009634 -0.08593777 0.12854965 0.18179743 -0.1752086 0.01114613 0.0441498 0.19199642 -0.08213017 0.19985016 -0.08851117 0.15294062 0.20234113 0.01255503 0.10768114 -0.15275335 0.18208662 0.13201351 -0.11034368 0.14459325 0.19065223 0.09677075 -0.04934213 0.14431167 0.17468614 -0.131853 -0.16737004 -0.12814961 0.16790931 -0.19229355 0.22408362 -0.22726745 0.02836744 -0.19469766 0.07855155 -0.16379061 0.19056759 -0.14492499 -0.16037033 0.23339814 -0.18928666 0.18817544 0.21182814 -0.13302822 -0.03744866 0.07184573 -0.0804945 -0.09293932 0.21022004 0.090818 0.18457402 0.04255208 0.16996085 -0.10776047 0.19024125 0.00383966 0.16522205 0.13220138 0.16301269 -0.10203851 -0.22660458 -0.17674961 -0.08130155 -0.17434356 0.2131733 0.24427285 -0.22215517 -0.11536155 -0.1739401 -0.14550837 0.11413324 -0.23727342 -0.04648682 -0.08584432 -0.18719559 -0.00643025 -0.10690957 0.11016459 -0.13901732 -0.26317776 0.05900019 -0.12899251 0.12419839 0.10036794 0.1814263 0.12393999 -0.11579129 0.09775069 0.22141022 -0.19177795 0.06932093 0.07533786 0.14363564 -0.21793854 0.18474596 0.20250122 0.12264677 -0.07723263 0.2002238
4,ㅇㅝㄹ,ㅇㅝㄹ-0.23121086 -0.20385177 0.17842647 0.17881503 -0.16552629 -0.07495665 0.1232057 0.24154268 -0.26636426 -0.26416249 -0.19919346 0.21054016 0.04159113 -0.247536 0.17783472 -0.11902066 -0.23049192 -0.11798003 0.1263924 0.04630408 0.1688013 -0.2508919 0.0253707 0.16402109 -0.20632688 0.22634635 -0.18755805 0.11412771 -0.05247742 -0.22564703 -0.16318369 0.09415373 -0.14849037 0.01951936 -0.15795306 0.06037405 0.25493194 -0.17104904 0.03637917 -0.17558575 0.19155701 0.00381876 -0.10508751 0.19331401 -0.14915353 0.21108535 0.12982734 0.21449344 0.08213003 -0.17359931 -0.22222552 0.159629 -0.11863649 0.24369706 0.05730076 0.17608265 -0.11009272 -0.23168519 -0.03716373 0.20222198 -0.12108146 0.13996469 0.17559889 -0.11517725 0.25803136 -0.20896793 -0.26890984 -0.21361812 -0.03181498 0.15426054 -0.15610411 0.06589303 0.17786993 0.11787914 0.20262873 -0.06045343 0.04593742 0.11208238 -0.06993538 -0.1230904 -0.03603872 0.05615955 -0.19490322 -0.24478631 -0.19306538 -0.01925264 0.1533987 -0.14976048 0.10667551 -0.08733119 -0.21501837 -0.14172231 0.24027313 0.00482143 -0.20688346 0.14612273 -0.10596504 -0.21615843 -0.20924403 -0.23341708 -0.12740153 0.03008634 -0.12011274 0.09905868 0.17109782 -0.05982206 0.09294917 -0.0241457 0.18657359 -0.03966101 0.19877938 -0.03326434 0.17836642 0.15236855 -0.08499327 -0.17588226 -0.17585999 0.24783104 0.10522697 -0.13336913 0.06331244 0.07575497 0.11395324 0.05204654 0.20588343 0.10689449 -0.2157941 -0.21164723 -0.07592213 0.15466974 -0.16327672 0.23399323 -0.1959588 0.04084362 -0.22776046 0.07485619 -0.20449096 0.08024204 -0.19176686 -0.15702549 0.20855016 -0.18253547 0.15049225 0.23953501 -0.07490284 -0.01291149 -0.17682702 0.18658603 0.02025488 0.17651097 0.08420814 0.16565754 0.04638587 0.18486767 -0.12183347 0.18173729 -0.09382131 0.14889256 0.14098782 0.17855688 -0.10931576 -0.25809361 -0.1933533 -0.1733561 0.03791768 0.18455322 0.28180927 -0.23544456 -0.03648317 -0.20353816 -0.15827936 0.23620022 -0.2384922 -0.0867409 -0.15522626 -0.13571793 -0.16059142 -0.05181946 0.10582112 -0.15041583 -0.25813393 0.14335328 -0.20675271 -0.17474539 0.12000686 0.20522446 -0.00157802 -0.09960923 0.05370231 0.23924612 -0.13008455 -0.1262925 0.00370456 0.14213129 -0.20514674 0.2299408 0.21328033 -0.07632942 -0.05825373 0.18551781


### window 3

In [16]:
from soynlp.utils import DoublespaceLineCorpus
from soynlp.vectorizer import sent_to_word_contexts_matrix

#corpus_path = '2016-10-20_article_all_normed_ltokenize.txt'
#corpus = DoublespaceLineCorpus(corpus_path, iter_sent=True)

x, idx2vocab = sent_to_word_contexts_matrix(
    input_sentences,
    windows=3,
    min_tf=10,
    tokenizer=lambda x:x.split(), # (default) lambda x:x.split(),
    dynamic_weight=True,
    verbose=True)

print(x.shape) # (36002, 36002)

Create (word, contexts) matrix
  - counting word frequency from 297570 sents, mem=1.432 Gb
  - scanning (word, context) pairs from 297570 sents, mem=1.732 Gb
  - (word, context) matrix was constructed. shape = (38890, 38890)                    
  - done
(38890, 38890)


In [17]:
from glove import Corpus, Glove

In [18]:
glove = Glove(no_components=200, learning_rate=0.01, random_state=42)
glove.fit(x.tocoo(), epochs=5, no_threads=4, verbose=True)

Performing 5 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4


In [19]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)

In [20]:
glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)
glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')
glove_embedding.head()

100%|██████████| 38890/38890 [00:07<00:00, 5517.08it/s]


Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.17762808 -0.18490076 0.08588548 0.24284263 -0.16239751 -0.27436715 -0.74741488 0.27707478 -0.25259406 -0.23154107 -0.33708273 0.22697088 0.64465582 -0.33283018 0.22061267 -0.00110232 -0.32557432 -0.18232905 0.23525859 -0.09731015 0.23999817 -0.32069043 0.0236779 -0.28442625 -0.21153094 0.27907981 -0.1574479 0.07752668 0.00385965 -0.21277331 -0.12128502 -0.13509534 -0.26911675 -0.49661489 -0.09681506 0.4846365 0.2365633 -0.19005776 0.65949728 0.14886284 0.19654134 0.15389083 -0.45425171 0.25612321 0.14431148 0.22433817 0.24232983 0.17212922 -0.51305458 -0.16731921 -0.24546143 -0.0804945 0.06065588 0.20904637 -0.19280202 0.24300949 0.28010348 -0.30338969 -0.03123301 0.07157973 -0.05354185 0.21945012 0.11181493 -0.15549045 0.25489995 -0.21970318 -0.27870176 -0.20826254 -0.01117263 0.27930547 0.17158264 0.14082292 0.13147954 0.1849325 0.29558346 0.56883546 -0.0227893 0.08927379 0.09878307 -0.37594125 0.26457799 0.14444631 -0.28795017 -0.26844787 -0.21988838 -0.21435549 0.06801286 -0.01953159 0.29221798 -0.11215994 -0.28065159 -0.12377167 0.26491115 0.09103644 -0.21920181 0.30420816 0.20697315 -0.31834055 0.05210666 -0.24388458 -0.26418875 0.23847078 -0.11318378 -0.18057553 0.21161032 -0.34224401 0.09544777 -0.03980369 0.24241325 -0.2171742 0.20423383 -0.76855712 0.19273149 0.26920546 -0.01427855 0.39934365 -0.10466226 0.03485697 0.51745231 0.21565218 0.27542995 0.18734969 0.23824979 0.10376377 0.15228127 0.22111061 -0.11053208 -0.20144308 -0.33583225 0.24846504 -0.12304139 0.2279901 -0.31229923 -0.39185089 -0.01723182 -0.43611997 0.50799385 0.2643226 -0.20321429 -0.16495067 0.27555359 -0.22156065 0.24016995 0.22833714 -0.1071268 -0.08148818 0.19848486 -0.22435449 -0.1045391 0.27179609 0.09831652 0.25171847 0.04511269 0.1662873 -0.18367904 0.21315035 0.78707128 0.27195917 0.17559253 0.24040041 -0.15316599 -0.23114456 -0.23123284 -0.16216958 -0.33669661 0.25399899 0.27594369 -0.2417251 -0.25129025 -0.13803024 -0.05740072 0.20420629 -0.29767144 -0.75723891 -0.04349043 -0.24188759 0.17401016 -0.15281127 0.13742357 -0.14885555 -0.26885002 0.06463839 0.56806788 0.18415101 0.05845328 0.2345564 0.7332153 0.22757088 0.20190615 0.16595557 -0.21651237 0.80447485 0.1661763 -0.03948209 -0.28126886 0.19478424 0.22551864 0.09433982 0.41178246 0.22315449
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.16453087 -0.26516197 0.04300108 0.21623661 0.32732233 -0.18126474 -0.56323844 0.28043292 -0.31980237 -0.25807859 -0.29012107 0.15741141 0.31911365 -0.34843289 0.26862883 -0.10760345 -0.30689119 -0.15652791 0.2239783 0.00326467 0.14461368 -0.32033049 -0.20757623 -0.05716952 -0.2671825 0.23523486 -0.15048179 0.09631036 -0.00559792 -0.23238276 -0.04263311 0.13703183 -0.14520505 -0.25156698 -0.19270911 0.48297001 0.07315754 -0.15967866 0.51416661 0.13045074 0.25031876 0.11132543 0.04368249 0.23119788 0.01156637 0.2950039 0.16231588 0.18430469 -0.10211864 -0.16909193 -0.28898996 -0.04575224 -0.07276394 0.23463 0.06735839 0.21228657 0.51122365 -0.31910732 0.28320037 0.02553759 -0.05598385 0.2634646 0.14276917 -0.17729745 0.31753387 -0.2573959 -0.30861766 -0.15715178 0.08164875 0.08651492 -0.03669455 0.16815502 0.19832007 0.15225004 0.3014319 0.38917697 -0.12903879 0.06920402 0.04116963 0.13911576 -0.01487776 0.16264057 -0.25780852 -0.25753512 -0.20985902 -0.18992802 0.0622322 -0.06275048 0.23737632 -0.06381908 -0.2807851 -0.2059566 0.25786982 0.15717269 -0.19918899 0.51231644 0.07148164 -0.29569274 0.10124513 -0.25240034 -0.23518909 0.11731936 -0.16521177 0.41369881 0.15569699 -0.09872513 0.17704693 -0.03653351 0.26531369 -0.24031066 0.14539152 -0.47667134 0.20120067 0.26080958 0.00652634 0.25945315 -0.18929608 0.14036646 0.26666941 0.21742156 0.24451342 0.11021287 0.25799339 0.11037641 0.24183836 0.2304094 -0.19287689 -0.26653333 -0.18277262 0.22896443 -0.13014098 0.21826324 -0.25475997 -0.17116112 -0.15806132 -0.62726317 0.42995061 0.21922358 -0.25783081 -0.23042489 0.27291364 -0.2224107 0.21040032 0.26385077 -0.10036656 -0.05226875 0.11469902 -0.17613546 -0.03513902 0.21945839 0.06894856 0.28173886 0.09347622 0.19502543 -0.24416241 0.25537763 0.26726128 0.18027583 0.20495616 0.24958275 -0.13326244 -0.25641661 -0.2237628 -0.15257399 -0.33030363 0.22925741 0.33267909 -0.23889674 -0.18267034 -0.22838895 -0.0363558 0.19680248 -0.3226219 -0.49693836 -0.35531822 -0.10382746 0.19748495 -0.07793752 0.1760736 -0.16426642 -0.29193899 0.05641252 0.27610348 0.11879097 0.13972812 0.21857363 0.5546561 0.04819196 0.22503645 0.19904503 0.05868125 0.49785286 0.0396371 -0.30741197 -0.2881185 0.21956543 0.21026317 0.01829323 0.09371745 0.20412687
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-0.18035706 -0.20593823 0.17755041 0.17803493 0.25062521 -0.07576707 -0.09917296 0.21842696 -0.24794895 -0.2346613 -0.21753052 0.12922315 0.10584182 -0.26260067 0.21865643 0.09279828 -0.23837307 -0.15070932 0.17729846 0.10689006 0.1181682 -0.24158278 -0.12781809 0.04820222 -0.19759451 0.19936368 -0.16125348 0.13052207 -0.05887559 -0.19553698 -0.03215755 0.2486424 -0.12918871 -0.21621816 -0.1715472 0.22945212 -0.0754623 -0.09082054 0.31725489 0.03224331 0.19676624 0.12887893 0.12455159 0.19494276 -0.01683416 0.2364831 0.16870693 0.14742272 -0.02930996 -0.16334583 -0.23026848 -0.06531601 -0.12978886 0.20450427 0.10699932 0.1796191 0.37711041 -0.25273701 0.16696117 0.0683883 -0.08793111 0.19243434 0.149335 -0.14804219 0.24469076 -0.20568907 -0.25946919 -0.12843315 0.10490983 0.05882324 -0.11928425 0.15407838 0.16193477 0.1193954 0.22382923 0.24719449 -0.14203422 0.04577956 0.05966363 0.14121762 -0.03605177 0.13212635 -0.19611982 -0.19046978 -0.1697692 -0.13862569 0.08736167 -0.07406146 0.20252085 -0.08285514 -0.22676321 -0.15320365 0.19511206 0.12810995 -0.16865106 -0.04460942 0.01938777 -0.23883083 0.01006474 -0.1809386 -0.18572723 0.10648679 -0.13538473 0.35512237 0.1374019 -0.1536288 0.17325484 -0.07154829 0.20594833 -0.17963631 0.17638725 0.0761555 0.18460028 0.21327608 -0.06832748 0.01607997 -0.15615766 0.15502635 0.11857959 0.14069448 0.18677813 0.09609222 0.19349246 0.1744069 0.21567542 0.14607019 -0.18055515 -0.21372947 -0.20604088 0.19929059 -0.12579817 0.18935691 -0.16495684 0.13819165 -0.13900369 -0.12991379 -0.15985432 0.16529805 -0.21093342 -0.17927216 0.21420765 -0.17260684 0.15574134 0.22934248 -0.09826909 0.03354651 0.04794499 -0.10589077 -0.00970113 0.18707734 0.03938216 0.21702011 0.1040313 0.18701282 -0.17067047 0.21353894 -0.12735266 0.13773477 0.17437356 0.20120789 -0.10264575 -0.23547795 -0.18600799 -0.14564218 -0.12545528 0.18498822 0.2439428 -0.22468462 -0.15161036 -0.20208853 -0.06463129 0.17400094 -0.24242715 -0.15945974 -0.27212888 -0.08052513 0.17751351 -0.02410365 0.11540534 -0.14974125 -0.23914478 0.10250906 0.12856503 0.06921563 0.13818207 0.14904047 0.08695182 -0.05385525 0.16693998 0.18605179 0.03992989 0.24623748 0.04540073 -0.31505579 -0.22062541 0.16301694 0.15287956 0.03251879 -0.12394095 0.1525898
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-0.16670579 -0.20268288 0.32278291 0.18936851 0.09903912 -0.16137362 -0.27610925 0.23071023 -0.24413627 -0.21446982 -0.24264446 0.15518183 0.33610703 -0.27969201 0.22159064 0.10315659 -0.25816527 -0.15963464 0.18634333 0.10680027 0.12375856 -0.25809744 -0.05937149 -0.03760895 -0.19954073 0.20782631 -0.15444609 0.13460951 -0.05372946 -0.19520452 -0.04191618 0.15341585 -0.18550318 -0.34701981 -0.15445546 0.30568236 -0.04066619 -0.09880088 0.48684374 0.04938683 0.19791583 0.10869059 -0.065257 0.20184862 0.01973742 0.22809418 0.19166909 0.13986937 -0.18566534 -0.16657389 -0.24274502 -0.09154871 -0.07986446 0.20625854 0.03470756 0.17958924 0.3577628 -0.25163597 0.08011517 0.086464 -0.07559482 0.19612136 0.14274199 -0.15986294 0.23069953 -0.21079802 -0.25468011 -0.17943777 0.07736727 0.02564522 -0.03861708 0.15136085 0.15491666 0.1573109 0.23772273 0.43226779 -0.12483196 0.08392622 0.09856732 -0.11061591 0.12784884 0.14097329 -0.21435489 -0.19926662 -0.17052206 -0.1759137 0.1016936 -0.06065276 0.20988041 -0.09590115 -0.22453272 -0.14445229 0.20161719 0.11553667 -0.17810173 -0.02652597 0.11049067 -0.24833855 0.00624475 -0.19907047 -0.2004281 0.14731284 -0.13117912 -0.0026914 0.13674673 -0.21289776 0.14781445 -0.06653613 0.20457186 -0.19472877 0.16467892 -0.24404329 0.17599035 0.21272925 -0.06750164 0.13674865 -0.13135125 0.10976535 0.11705041 0.23204939 0.20475099 0.16737674 0.19042119 0.13360359 0.19255362 0.19513525 -0.16206015 -0.19256867 -0.27525893 0.20250498 -0.15667344 0.18577248 -0.21074662 -0.21858685 -0.1112316 -0.07055822 0.11206731 0.2088078 -0.19106494 -0.17680662 0.2151904 -0.17682001 0.18848659 0.2181409 -0.11312432 0.0470312 0.13791709 -0.16140907 0.01164641 0.19040713 0.0675589 0.21195128 0.09295963 0.1887437 -0.18676243 0.21041496 0.3894178 0.16958556 0.17873109 0.19929962 -0.11424839 -0.22679111 -0.18893964 -0.11909476 -0.21965992 0.20318051 0.24894536 -0.21595261 -0.17438532 -0.17185176 -0.09043979 0.18441916 -0.2515595 -0.43870997 -0.03422848 -0.13895418 0.1705672 -0.09519601 0.11650014 -0.14094698 -0.23571956 0.08220766 0.33363742 0.11676144 0.10029591 0.17862466 0.31965009 0.00152053 0.17633728 0.19688331 -0.08144002 0.50977196 0.06015155 -0.24369461 -0.24063239 0.17323042 0.16795147 0.08424088 0.02469442 0.18189764
4,ㅇㅝㄹ,ㅇㅝㄹ-1.80677734e-01 -2.27168695e-01 1.08105535e-01 1.26421222e-01 -2.20596310e-01 6.34214915e-02 1.06700488e-01 2.26468329e-01 -2.64341479e-01 -2.33596839e-01 -2.17177609e-01 1.44191938e-01 3.91820551e-02 -2.44670072e-01 2.26875374e-01 4.00463418e-02 -2.44860924e-01 -1.11865712e-01 1.66731830e-01 -1.61872683e-01 1.28663014e-01 -2.53164090e-01 -5.12256428e-02 7.98186015e-02 -1.99305381e-01 2.09674190e-01 -1.73232231e-01 4.10615111e-02 -3.98498151e-02 -2.20072916e-01 -5.87177295e-03 3.86258813e-02 -1.15388506e-01 -1.30895504e-03 -1.42106409e-01 -1.48873444e-01 3.72398414e-01 -1.49468537e-01 -1.80539165e-01 -7.23423068e-02 2.15154256e-01 9.41083554e-02 1.01883541e-01 2.25685210e-01 -1.01021601e-01 2.26587927e-01 1.17397530e-01 1.69145162e-01 1.09687093e-01 -1.20499761e-01 -2.23637725e-01 -8.25401617e-03 -1.31373121e-01 1.96317982e-01 5.34008959e-02 1.76499811e-01 -6.47324167e-02 -2.44870138e-01 -1.89105741e-02 8.92512968e-02 -1.16010191e-01 1.86827873e-01 1.44700824e-01 -1.50681889e-01 2.51774849e-01 -2.08327989e-01 -2.62985940e-01 -1.38611487e-01 7.82000727e-02 2.26239296e-01 -9.24719438e-02 1.39549342e-01 1.62431711e-01 3.12787322e-02 2.37811171e-01 -8.28141286e-02 -1.07950092e-01 5.13163603e-02 -8.10803918e-02 1.23753818e-03 -1.49548004e-01 1.04637626e-01 -1.90565710e-01 -2.01295100e-01 -1.78377728e-01 -1.28227670e-01 6.47859185e-02 -1.31316904e-01 1.45915555e-01 -7.70001657e-02 -2.29042919e-01 -1.63218153e-01 2.32472403e-01 1.23702102e-01 -1.86751934e-01 8.73323927e-02 -2.27248088e-02 -2.20040566e-01 -1.26035883e-01 -2.02012478e-01 -1.84969817e-01 9.25505435e-02 -1.19725081e-01 1.52543605e-01 1.09028544e-01 3.36107286e-04 1.65545238e-01 -7.12447945e-02 2.05450235e-01 -1.67241967e-01 7.87788213e-02 -2.40026588e-01 1.99777833e-01 1.99363139e-01 -8.51788654e-02 -8.91882758e-02 -1.62364500e-01 1.80836504e-01 1.58818612e-01 -1.02523290e-01 1.74028880e-01 -4.30178215e-02 2.08912312e-01 1.35755610e-01 2.16641091e-01 1.19908389e-01 -2.16549303e-01 -2.24598749e-01 -4.70491961e-02 1.83177945e-01 -1.24887323e-01 1.84657720e-01 -1.64537203e-01 4.04566938e-01 -1.31016542e-01 -5.13590926e-01 -3.88858583e-01 1.51758527e-01 -2.28623171e-01 -1.55966183e-01 2.04434683e-01 -1.75190723e-01 1.77357966e-01 2.24257702e-01 -1.00991301e-01 4.21612869e-02 1.26673644e-02 -7.54786172e-02 7.39388967e-02 1.75327811e-01 6.70072469e-02 2.06584197e-01 6.11155601e-02 1.74311537e-01 -1.79554533e-01 2.09875807e-01 -3.15470582e-02 1.39824752e-01 1.74402762e-01 1.96972397e-01 -1.22400467e-01 -2.45061858e-01 -2.01867915e-01 -1.71485162e-01 1.56555212e-01 1.45888587e-01 2.61947885e-01 -2.12451898e-01 -1.28449550e-01 -2.17291923e-01 -4.68632099e-02 2.13745635e-01 -2.19491726e-01 -9.05490919e-02 -4.58993316e-01 -5.05081491e-02 1.68512493e-01 8.27610488e-02 1.50739998e-01 -1.60697711e-01 -2.30327652e-01 1.19538184e-01 -1.45112081e-01 2.90380945e-02 1.17160636e-01 1.79648010e-01 -1.11463981e-01 -5.23464867e-03 1.61123003e-01 1.96206426e-01 4.40037153e-02 -9.65171255e-02 7.30779732e-02 6.00416101e-02 -2.12749898e-01 1.77912028e-01 1.78968009e-01 -9.92933215e-03 7.46217918e-02 1.73410524e-01


In [16]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42)
glove.fit(x.tocoo(), epochs=100, no_threads=4, verbose=True)

Performing 100 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
CPU times: user 5min 2s, sys: 47 ms, total: 5min 2s
Wall time: 1

In [17]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)

In [18]:
glove.word_vectors.shape

(38890, 200)

In [19]:
glove.word_vectors[:1]

array([[-0.04014644,  0.14591996,  0.21770952,  0.27505619, -0.21774962,
        -0.3299259 , -0.34875664,  0.15955163, -0.30599767, -0.09654434,
        -0.36265585,  0.36642118,  0.58957813, -0.25477741, -0.77548881,
        -0.16812614, -0.30386485,  0.3014517 ,  0.11541772,  0.23872045,
         0.23607387, -0.32231933,  0.65029271, -0.57035232,  0.3518528 ,
         0.2721458 ,  0.29503168, -0.24414353,  0.24229787, -0.27020786,
         0.02250393, -0.64149403, -0.18691672,  0.57878591,  0.04031945,
         0.25008333, -0.11378724, -0.24900752, -0.36675543, -0.16945304,
        -0.26521683,  0.47874288, -0.39373579,  0.35531035, -0.0364649 ,
         0.24267091,  0.28361149,  0.0727594 , -0.18305883,  0.18589462,
         0.01891854,  0.31339466,  0.2110837 ,  0.2616362 ,  0.1850178 ,
         0.30698852, -0.34319437, -0.33101811, -0.0366438 , -0.10888972,
        -0.33897783, -0.18377894,  0.22498183, -0.16396334,  0.17689501,
        -0.09917777, -0.23181384, -0.07816078, -0.3

In [21]:
glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,"[-0.040146439146532854, 0.1459199564025886, 0.21770951649967288, 0.27505618905214874, -0.21774961539982576, -0.32992590369903885, -0.34875663718111205, 0.15955162607922904, -0.305997672446965, -0.09654433939962898, -0.3626558497403411, 0.3664211784936428, 0.5895781267518013, -0.25477740553566475, -0.7754888061202576, -0.16812614305628376, -0.3038648472127392, 0.3014516981329455, 0.11541771856676092, 0.23872045098218483, 0.23607387493931112, -0.3223193297401064, 0.6502927086724973, -0.5703523150599951, 0.3518527951391747, 0.2721457953773931, 0.2950316778258804, -0.24414352851925353, 0.2422978729500584, -0.27020786486751874, 0.022503931170752046, -0.6414940339397746, -0.18691671529030046, 0.5787859053707188, 0.04031944855763691, 0.25008332771133435, -0.11378724404866228, -0.2490075170225759, -0.3667554252195786, -0.16945303784637283, -0.2652168322474747, 0.47874287732138326, -0.3937357934693531, 0.3553103520420614, -0.03646489812035994, 0.2426709065582876, 0.283611489751039, 0.07275939930438592, -0.18305883253701133, 0.18589461868555524, 0.018918544221754655, 0.3133946598397176, 0.2110837041232433, 0.2616362013122453, 0.1850177996830624, 0.3069885215742826, -0.3431943654946833, -0.33101811024047656, -0.036643801577073046, -0.10888971595732212, -0.3389778346535938, -0.18377894383819907, 0.22498182770753475, -0.16396333881447284, 0.17689500936933583, -0.09917777024020573, -0.2318138382632774, -0.07816078123507537, -0.32654060854510836, 0.3917461140143674, 0.09102456177488191, 0.0580177063672472, -0.5810236975096904, -0.15678636539390664, 0.12635572264744868, 0.003218629273790827, 0.0629406559152243, -0.3518278754445992, -0.04854144943131424, -0.12647429335202598, 0.3510986852746356, 0.4051464808002084, -0.21645253747254242, -0.19755681746718312, -0.3228944423688359, -0.3002770816990503, 0.3483795838100865, -0.046157589155479815, 0.3533725154836427, -0.06437188334279607, -0.30470854975854267, 0.7943502240375381, 0.25375599377636954, -0.3947189615781569, -0.04646276048762151, 0.28683884154846023, 0.5676365611630606, -0.3430480468733849, -0.13901352788172927, -0.2517575370388552, ...]"
1,ㅇㅡㄴㅎㅐㅇ,"[-0.15727920467170306, -0.08054372063028327, -0.2784167129656523, 0.21973003896583776, 0.1345708537280672, -0.2774422316098419, -0.400841878504431, 0.30184944983642836, -0.37914002232502925, -0.2627290278595458, -0.16365064306242355, 0.1128674486396726, 0.3849630509308168, -0.3393446132997946, -0.3736597817841855, -0.29523862168727905, -0.30348419564073675, -0.08834723476573174, -0.07348613400918769, 0.10211750359834441, 0.12998961298497563, -0.3081142786792763, -0.3924729227360429, 0.01906074899715463, -0.07931723237395684, 0.05273993815782015, 0.6745660958607178, -0.5018620313279732, 0.774314659482358, -0.23300255532277203, -0.07766038389033927, 0.1186569755027705, -0.09071159349706298, 0.12439919776911805, -0.03220682619672916, -0.3979169872948048, -0.1483416568066553, -0.3256488848834462, 0.3223875163201774, -0.1931382718666288, 0.18513157221738175, 0.3644950224471216, 0.6357152262965683, 0.0407551202152704, 0.08497422841423914, 0.3487012146468191, -0.08026315269369984, 0.24847819842610344, 0.15154242508973784, 0.14718998742413336, -0.21447080637046176, 0.12459898480373152, 0.7576997656965404, 0.25016582680075883, 0.09443410587745431, 0.23656590915416306, 0.13067120527321718, -0.2009705858236229, 0.20186887847241536, -0.3305009875026934, -0.03671781304236953, 0.18318971495928815, 0.07951070770111159, -0.21976908822922528, 0.295480608496454, -0.14395484916926138, -0.3052021083437517, -0.21667290341446233, -0.4315845363953309, -0.05338219720879526, -0.06413475303666524, 0.27059306781339815, 0.3096129080816439, 0.6633131691762129, 0.25816638223029886, -0.01067129402339933, 0.6825768944273405, -0.2737465782620136, -0.04536977002346369, 0.7442248768664693, 0.35760636471853746, -0.03790119244674228, -0.2420740079887751, -0.18475701439268913, 0.04828208745885672, -0.3008422504077641, 0.19306437500761695, 0.11154074328408649, 0.20647253598795856, 0.534368730459033, -0.29766159394229813, -0.27991689690682525, 0.2800728623230272, -0.265122355013227, -0.24766068357297613, -0.010539936501116596, 0.5284232951761, -0.3123785625498622, -0.05439603101590575, -0.3390824645447112, ...]"
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,"[-0.1597745349845917, 0.013560189386259674, -0.036222217487630284, 0.16858401082072869, -0.13178771353448737, -0.12429784435248659, 0.20022860847736337, 0.20678051209654785, -0.25414346289123085, -0.277011121609237, -0.15336784866142583, 0.15247255560619424, 0.24610975665063942, -0.26218046077170526, 0.01585455543886052, -0.0979784456366289, -0.27310723720629565, -0.5611986860976371, -0.3041036213773179, 0.005303895829400219, 0.1840903877893535, -0.24486879182734414, -0.042220532522990406, 0.05418442921451441, -0.03175354299722056, 0.12713579660698557, 0.07355974957073855, 0.08047764000431303, 0.08086453809430875, -0.2363719412628069, -0.1213375183758481, 0.29946568416393393, -0.45223092796210207, 0.11566639206620552, -0.1376854579062381, 0.27379161872919194, -0.2646110593944506, -0.2020742390778186, 0.7991442250422304, -0.14805839989634684, 0.09664752431191137, 0.23049289216606497, 0.19880699235603322, 0.08873660437553722, 0.15334223031474323, 0.25937443881667005, 0.1668620021693682, -0.007799174302246304, -0.04175073307312873, -0.17069775779660332, -0.202600688045644, 0.23318257259824932, -0.005910291045224786, 0.28585787573803123, 0.15025258394349109, 0.12324448287345571, -0.21556373878514518, -0.25588753994258195, 0.43675909949770136, 0.21138261803940486, -0.13694436103820826, 0.05717806652997615, 0.18103457758071706, -0.11565909185291977, 0.23859917065664102, -0.1765263325532361, -0.2668210168396667, -0.1577628018657883, -0.6524388074827608, 0.3034388731910627, -0.19044898862669904, 0.13669089135976112, 0.2565055569629603, 0.6267301141155762, 0.15534569038984272, 0.060432344052985525, 0.15482988135717068, 0.0622165558827621, 0.12659675940263515, 0.5713068524674801, -0.27130474671315186, -0.014286634587276355, -0.1891147761054346, -0.12038000720779386, -0.19175650985965798, 0.38210259965453947, 0.18351235843185562, -0.00011090582079147755, 0.2719753052910863, 0.8261419776977916, -0.2445744183986712, 0.0806388655398789, 0.21738811317767923, -0.26443543483340237, -0.13278625747572173, 0.01757632727717061, 0.29641537227845927, -0.28082663440955047, -0.037437816617066105, -0.23873736684455435, ...]"
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,"[-0.30949347873295674, 0.0594615992566237, 0.11194597925692495, 0.11139642275213629, -0.17014131074212177, -0.15888300492540267, -0.4096666500993302, 0.22541192395834797, -0.2554380102045625, -0.15795146870552554, -0.20567883285328262, 0.13147658010704735, 0.4210270419663094, -0.18973075006494328, 0.18204655288288596, -0.036991250023264755, -0.24011825292565214, -0.0525553324294379, 0.2616669316185929, 0.12850865935871206, 0.08905903559669452, -0.22420536857596707, -0.17233434815698478, 0.053232948129125636, 0.21798266315250908, 0.157703705793137, -0.0326094627680965, 0.028388492788175002, -0.05552751810978055, -0.22592167392359006, -0.010159552640996636, -0.04843556093306686, -0.2920025809574028, -0.042041898940513814, -0.005841533084421416, 0.03330714867425825, -0.12490175339650302, -0.15087394291503384, -0.13501676877838464, -0.1061449962640955, 0.01714646268186141, -0.045602909679866466, -0.05235640648244271, 0.10581848984264958, 0.2571343235754455, 0.26773632178675755, 0.15339908678755626, 0.1850629262505611, -0.34342185999808406, -0.13202190560450142, -0.2940714737329587, 0.17297318004562764, -0.1363389144319819, 0.23465562580226712, 0.11743852958841822, 0.10270518952271752, -0.110883037675004, -0.16768313632872414, 0.20003684109641642, -0.03324226219093526, -0.1275020332823655, 0.15542274394803524, 0.215150699435846, -0.22294748165646752, 0.17039818389839728, -0.21416785929245663, -0.26153097624705735, -0.20613320944180208, -0.4615966057501567, 0.047777563087544224, -0.18641703297635726, 0.19955281620823476, -0.34544143047914805, 0.5032585370903512, 0.18568477003978798, 0.18604688534284436, -0.43049770720128655, -0.08946114952792736, 0.252809571324861, 0.37529315267789437, 0.07450914063595485, 0.1710039061098485, -0.2060700554947558, -0.15807394087512391, -0.024352840754614278, -0.2576598502468381, 0.21608363873353056, 0.10182593956440272, 0.20819151780996079, 0.48343805416410296, -0.23996691473053222, 0.2423153228526975, 0.1922629302762512, -0.2073086982358169, -0.22774425145617888, 0.06022101469327037, -0.029318545381784314, -0.22229408784772325, -0.20699543365715883, -0.21102870624428377, ...]"
4,ㅇㅝㄹ,"[-0.207543173901811, -0.10126383302300776, 0.12663856357366154, 0.09365658558210072, -0.15214850439036295, 0.01676780091562886, 0.3859737993008759, 0.19293043170135735, -0.22034247314667205, -0.21563599723469665, -0.08367861975282788, 0.17239945110534535, 0.08195125940436142, -0.16636275757723964, 0.11375667711653784, -0.030959406602681733, -0.2230893703509022, -0.3500946822516038, 0.07070867499100883, 0.06948771038753616, 0.11308669809161774, -0.2409729831005407, -0.10408944384287033, 0.14562679945502807, -0.059844979354090236, 0.19178740430251415, -0.056580343321612034, -0.24345800706225665, 0.7718960094453498, -0.18734593250122483, -0.09988026749493022, -0.24724901208216818, -0.3256238521317649, -0.3632924520292515, 0.13953768198605496, 0.12192561539573758, 0.03905335929374656, -0.17771254590500515, 0.051473126048748506, -0.07436220513866879, 0.09447720147997668, 0.2170487146225706, 0.013121823645414112, 0.15833193814478774, -0.10010747029405971, 0.2104428041023523, -0.0612985360562104, 0.1868389540485035, 0.16412723741452218, 0.18669903273950023, -0.25180359910409184, 0.13927908919183282, 0.0031591823994847248, 0.24241876071193227, -0.03466834494759662, 0.013801621375471156, -0.053656089640121495, -0.1772203485553253, 0.11315805765760426, 0.5900458192396987, 0.06502170060165967, 0.057612496314408256, 0.16771610180036875, -0.2053302776898888, 0.20146013116573552, -0.19343916543341624, -0.23378806394494378, -0.19377025336124093, -0.036952202452224324, 0.452333271321022, -0.17164022473942298, 0.14440364766428324, 0.5469929189590842, -0.3768228026254455, 0.13713820356160875, -0.24236037264899263, 0.6272787601546045, 0.19246810349207702, -0.42590602299881963, -0.1831739402504948, -0.7220820042049925, -0.29295093782405357, -0.15435646969443942, -0.14926624058674834, -0.12151011553752243, 0.2499818589336264, 0.19048467384150045, -0.22470350785046095, 0.11017413474089753, 0.030529444776095906, -0.17441777436704667, 0.652179239406073, 0.2556127836670539, 0.16375474278529106, -0.14029826855014432, -0.06965292387763845, -0.11519729645520464, -0.11245718540758347, -0.1913606479399667, -0.22064478148069874, ...]"


In [22]:
glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']

In [23]:
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')

100%|██████████| 38890/38890 [00:07<00:00, 5394.15it/s]


In [24]:
glove_embedding.head()

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.04014644 0.14591996 0.21770952 0.27505619 -0.21774962 -0.3299259 -0.34875664 0.15955163 -0.30599767 -0.09654434 -0.36265585 0.36642118 0.58957813 -0.25477741 -0.77548881 -0.16812614 -0.30386485 0.3014517 0.11541772 0.23872045 0.23607387 -0.32231933 0.65029271 -0.57035232 0.3518528 0.2721458 0.29503168 -0.24414353 0.24229787 -0.27020786 0.02250393 -0.64149403 -0.18691672 0.57878591 0.04031945 0.25008333 -0.11378724 -0.24900752 -0.36675543 -0.16945304 -0.26521683 0.47874288 -0.39373579 0.35531035 -0.0364649 0.24267091 0.28361149 0.0727594 -0.18305883 0.18589462 0.01891854 0.31339466 0.2110837 0.2616362 0.1850178 0.30698852 -0.34319437 -0.33101811 -0.0366438 -0.10888972 -0.33897783 -0.18377894 0.22498183 -0.16396334 0.17689501 -0.09917777 -0.23181384 -0.07816078 -0.32654061 0.39174611 0.09102456 0.05801771 -0.5810237 -0.15678637 0.12635572 0.00321863 0.06294066 -0.35182788 -0.04854145 -0.12647429 0.35109869 0.40514648 -0.21645254 -0.19755682 -0.32289444 -0.30027708 0.34837958 -0.04615759 0.35337252 -0.06437188 -0.30470855 0.79435022 0.25375599 -0.39471896 -0.04646276 0.28683884 0.56763656 -0.34304805 -0.13901353 -0.25175754 -0.36208369 0.05205259 -0.22300126 0.2905658 -0.36620431 -0.54267439 -0.55635134 -0.13487925 0.12809894 -0.27533946 -0.09402707 -0.13418316 0.31926451 0.25038203 0.56734689 0.50725485 0.13491343 -0.09576212 0.56955517 0.04188273 -0.26275488 0.27024855 -0.4486555 -0.37352145 -0.07066747 0.19620842 -0.05598441 -0.13595502 0.37554276 -0.24108017 -0.14587224 -0.02038195 -0.28231163 0.27694228 0.03949041 -0.36484638 0.25809373 0.03587891 -0.25194114 0.38679681 0.3846123 -0.36218281 -0.54193586 0.05941491 -0.22653054 -0.19972402 0.68535539 -0.20749228 -0.20579772 0.10440364 0.28986191 -0.41898958 -0.41396729 -0.21446917 -0.01976362 -0.33942792 0.55116389 0.35188357 0.22111976 0.29210891 -0.15658744 -0.19720351 0.36857953 0.45014699 -0.14041504 0.18623169 0.14295688 -0.2884932 -0.26274614 -0.16985756 -0.02450327 -0.59542114 -0.22030671 -0.15137482 0.10464277 -0.2418877 0.27974102 0.15631665 0.33281475 -0.11659845 -0.13164111 -0.41712243 0.2061083 0.58005565 -0.50544492 0.27397353 0.23080702 0.3383612 0.28207726 0.05866299 0.24901911 0.72161691 0.10578238 -0.03012658 -0.1668963 0.1184556 0.24818298 0.11677119 0.49216082 -0.1081244
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.1572792 -0.08054372 -0.27841671 0.21973004 0.13457085 -0.27744223 -0.40084188 0.30184945 -0.37914002 -0.26272903 -0.16365064 0.11286745 0.38496305 -0.33934461 -0.37365978 -0.29523862 -0.3034842 -0.08834723 -0.07348613 0.1021175 0.12998961 -0.30811428 -0.39247292 0.01906075 -0.07931723 0.05273994 0.6745661 -0.50186203 0.77431466 -0.23300256 -0.07766038 0.11865698 -0.09071159 0.1243992 -0.03220683 -0.39791699 -0.14834166 -0.32564888 0.32238752 -0.19313827 0.18513157 0.36449502 0.63571523 0.04075512 0.08497423 0.34870121 -0.08026315 0.2484782 0.15154243 0.14718999 -0.21447081 0.12459898 0.75769977 0.25016583 0.09443411 0.23656591 0.13067121 -0.20097059 0.20186888 -0.33050099 -0.03671781 0.18318971 0.07951071 -0.21976909 0.29548061 -0.14395485 -0.30520211 -0.2166729 -0.43158454 -0.0533822 -0.06413475 0.27059307 0.30961291 0.66331317 0.25816638 -0.01067129 0.68257689 -0.27374658 -0.04536977 0.74422488 0.35760636 -0.03790119 -0.24207401 -0.18475701 0.04828209 -0.30084225 0.19306438 0.11154074 0.20647254 0.53436873 -0.29766159 -0.2799169 0.28007286 -0.26512236 -0.24766068 -0.01053994 0.5284233 -0.31237856 -0.05439603 -0.33908246 -0.20242762 0.00664636 -0.13741498 0.03119037 -0.39012542 0.31199905 0.10927213 0.19657683 0.20031728 0.01557362 0.17524775 -0.10757874 0.18476346 0.25980472 0.09886934 0.30814531 -0.30672309 -0.00927756 -0.60975119 -0.14668808 0.03265322 0.19673261 -0.03530028 -0.11285591 0.14772563 0.27864426 -0.10311523 -0.21172293 -0.35814708 0.1042695 -0.03706134 0.19600204 -0.24872752 -0.23623717 -0.27154828 -0.53309937 0.11644491 -0.108796 -0.11860072 -0.12478401 0.17179854 0.0179788 0.06609469 0.03399539 -0.16891539 0.65338818 -0.06689487 -0.41351475 -0.11247675 0.22247287 -0.05430595 0.15229692 -0.1267627 -0.33409057 -0.2822526 0.19294731 -0.09861426 -0.03568986 0.28677243 0.06149134 -0.08263764 -0.26492316 0.36749904 0.42704584 0.42509172 0.1097473 0.35410968 -0.12505456 -0.59345382 -0.27992716 -0.18356801 -0.32856005 -0.27886826 0.01089768 -0.1810331 -0.14253432 0.12018746 -0.2276067 0.25153082 -0.01316961 -0.2637732 -0.27916181 0.05193879 0.28711377 0.12331934 0.1486321 0.02538425 -0.15962544 0.22544001 0.1950349 0.28615124 0.35690848 -0.43954099 -0.11264198 -0.22533619 0.26111029 0.31871354 -0.00189822 0.3384831 0.29622957
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-1.59774535e-01 1.35601894e-02 -3.62222175e-02 1.68584011e-01 -1.31787714e-01 -1.24297844e-01 2.00228608e-01 2.06780512e-01 -2.54143463e-01 -2.77011122e-01 -1.53367849e-01 1.52472556e-01 2.46109757e-01 -2.62180461e-01 1.58545554e-02 -9.79784456e-02 -2.73107237e-01 -5.61198686e-01 -3.04103621e-01 5.30389583e-03 1.84090388e-01 -2.44868792e-01 -4.22205325e-02 5.41844292e-02 -3.17535430e-02 1.27135797e-01 7.35597496e-02 8.04776400e-02 8.08645381e-02 -2.36371941e-01 -1.21337518e-01 2.99465684e-01 -4.52230928e-01 1.15666392e-01 -1.37685458e-01 2.73791619e-01 -2.64611059e-01 -2.02074239e-01 7.99144225e-01 -1.48058400e-01 9.66475243e-02 2.30492892e-01 1.98806992e-01 8.87366044e-02 1.53342230e-01 2.59374439e-01 1.66862002e-01 -7.79917430e-03 -4.17507331e-02 -1.70697758e-01 -2.02600688e-01 2.33182573e-01 -5.91029105e-03 2.85857876e-01 1.50252584e-01 1.23244483e-01 -2.15563739e-01 -2.55887540e-01 4.36759099e-01 2.11382618e-01 -1.36944361e-01 5.71780665e-02 1.81034578e-01 -1.15659092e-01 2.38599171e-01 -1.76526333e-01 -2.66821017e-01 -1.57762802e-01 -6.52438807e-01 3.03438873e-01 -1.90448989e-01 1.36690891e-01 2.56505557e-01 6.26730114e-01 1.55345690e-01 6.04323441e-02 1.54829881e-01 6.22165559e-02 1.26596759e-01 5.71306852e-01 -2.71304747e-01 -1.42866346e-02 -1.89114776e-01 -1.20380007e-01 -1.91756510e-01 3.82102600e-01 1.83512358e-01 -1.10905821e-04 2.71975305e-01 8.26141978e-01 -2.44574418e-01 8.06388655e-02 2.17388113e-01 -2.64435435e-01 -1.32786257e-01 1.75763273e-02 2.96415372e-01 -2.80826634e-01 -3.74378166e-02 -2.38737367e-01 -1.64600439e-01 -6.46687298e-01 -1.18838267e-01 2.28615551e-01 -3.60456107e-02 1.02354349e-01 7.00208990e-02 -1.01846005e-01 1.71657818e-01 -1.16689624e-01 1.92315075e-01 3.67106983e-01 2.68170847e-01 2.47345018e-01 -5.67269297e-02 -1.67791377e-01 -8.80511671e-02 1.04767538e-01 -2.11712500e-01 -1.03736686e-01 -1.24706549e-02 9.00582777e-02 5.21616787e-01 8.85927143e-02 1.83159704e-01 6.41675789e-02 -9.95466735e-02 -2.24394985e-01 -5.15215033e-01 7.20427212e-02 -3.18454183e-02 1.56768460e-01 -2.37868608e-02 -1.85783353e-03 -1.55038712e-01 -3.04176675e-02 5.07301340e-02 -9.86613048e-02 -1.88483787e-01 1.40914133e-01 2.73056131e-01 -1.24619872e-01 -2.40882670e-01 2.14382587e-01 -6.91382651e-02 6.95400661e-01 -1.76571711e-01 1.00786554e-01 -3.70676595e-01 1.37287830e-01 -4.64829169e-01 8.10677371e-02 1.13221683e-01 4.59398949e-02 1.76689141e-01 1.28926183e-01 -1.22438741e-01 2.37550251e-02 1.84000557e-01 1.70612222e-01 -9.19960364e-02 -2.50505274e-01 7.83623162e-02 -3.67610080e-01 6.35533834e-01 1.96856852e-01 2.15016094e-01 -2.64781221e-01 1.41865599e-01 -1.47284667e-01 -5.94803556e-02 -7.75125804e-02 -2.53884475e-01 1.71920020e-02 -6.69925865e-02 -1.03723733e-01 -2.25550254e-02 -2.71748762e-01 2.05700156e-01 -4.98145754e-03 -2.16777901e-01 1.25821891e-01 -2.96438880e-02 -2.19661576e-01 1.45792651e-01 1.02842153e-01 2.64636296e-01 -7.05999342e-02 9.47407554e-02 1.78405903e-01 -4.22844709e-01 -3.02994701e-02 -6.79062536e-01 -3.02442348e-01 -1.65240712e-01 1.04986135e-01 1.99250358e-01 -2.22102909e-01 -1.86846348e-01 -3.62570916e-01
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-3.09493479e-01 5.94615993e-02 1.11945979e-01 1.11396423e-01 -1.70141311e-01 -1.58883005e-01 -4.09666650e-01 2.25411924e-01 -2.55438010e-01 -1.57951469e-01 -2.05678833e-01 1.31476580e-01 4.21027042e-01 -1.89730750e-01 1.82046553e-01 -3.69912500e-02 -2.40118253e-01 -5.25553324e-02 2.61666932e-01 1.28508659e-01 8.90590356e-02 -2.24205369e-01 -1.72334348e-01 5.32329481e-02 2.17982663e-01 1.57703706e-01 -3.26094628e-02 2.83884928e-02 -5.55275181e-02 -2.25921674e-01 -1.01595526e-02 -4.84355609e-02 -2.92002581e-01 -4.20418989e-02 -5.84153308e-03 3.33071487e-02 -1.24901753e-01 -1.50873943e-01 -1.35016769e-01 -1.06144996e-01 1.71464627e-02 -4.56029097e-02 -5.23564065e-02 1.05818490e-01 2.57134324e-01 2.67736322e-01 1.53399087e-01 1.85062926e-01 -3.43421860e-01 -1.32021906e-01 -2.94071474e-01 1.72973180e-01 -1.36338914e-01 2.34655626e-01 1.17438530e-01 1.02705190e-01 -1.10883038e-01 -1.67683136e-01 2.00036841e-01 -3.32422622e-02 -1.27502033e-01 1.55422744e-01 2.15150699e-01 -2.22947482e-01 1.70398184e-01 -2.14167859e-01 -2.61530976e-01 -2.06133209e-01 -4.61596606e-01 4.77775631e-02 -1.86417033e-01 1.99552816e-01 -3.45441430e-01 5.03258537e-01 1.85684770e-01 1.86046885e-01 -4.30497707e-01 -8.94611495e-02 2.52809571e-01 3.75293153e-01 7.45091406e-02 1.71003906e-01 -2.06070055e-01 -1.58073941e-01 -2.43528408e-02 -2.57659850e-01 2.16083639e-01 1.01825940e-01 2.08191518e-01 4.83438054e-01 -2.39966915e-01 2.42315323e-01 1.92262930e-01 -2.07308698e-01 -2.27744251e-01 6.02210147e-02 -2.93185454e-02 -2.22294088e-01 -2.06995434e-01 -2.11028706e-01 -1.66563052e-01 -4.85574919e-01 -1.82388095e-01 -5.42742746e-02 -8.23135886e-01 -2.21013840e-01 -4.40681069e-02 2.12629231e-01 1.36213261e-01 1.06658047e-02 1.04830263e-03 1.44372069e-01 5.94634059e-02 2.12221010e-01 -1.14599112e-01 3.95763335e-01 -7.42433721e-02 1.55815986e-01 -3.89856348e-01 7.38338280e-01 5.84981670e-02 2.23452731e-01 -3.63964271e-01 -1.46125066e-01 1.08509729e-01 2.44283535e-01 -1.38359246e-01 -1.26746461e-01 -9.85321453e-01 -1.34348087e-01 -2.61074940e-01 1.78749183e-01 -1.18572059e-01 4.70259617e-02 -2.36366936e-01 1.35290417e-01 -1.54890058e-01 9.80860852e-02 -1.27319184e-01 5.85905727e-02 1.38498984e-01 -1.18930202e-01 -1.83147297e-01 1.77572835e-01 -1.34141367e-01 1.27452504e+00 1.84194519e-01 -2.14419861e-01 -1.65492920e-01 1.36767898e-01 1.17405707e-01 2.65861717e-02 -3.84100396e-01 3.24110383e-02 -7.35314764e-02 1.75616971e-01 2.39121538e-02 1.37870375e-01 1.78037625e-01 1.68168970e-01 -1.22840706e-01 -2.20283540e-01 1.89091101e-01 2.09259033e-01 -1.22405056e+00 1.58515297e-01 2.17919184e-01 -1.99272248e-01 -1.80702019e-01 -1.04559933e-01 -2.07485549e-01 7.96664902e-02 -2.00619006e-01 -6.98733066e-01 2.63047121e-02 -1.58938573e-01 1.89042325e-02 1.38065401e-01 1.48210255e-01 9.43055024e-02 -2.46891821e-01 -2.45973843e-01 3.59452876e-01 1.71412638e-01 -4.48088575e-02 1.95162914e-01 -1.05858767e-01 1.85876795e-01 1.71215244e-01 3.10503064e-01 -9.07120405e-01 2.01275062e-01 -4.48620514e-01 9.75750336e-02 -1.92659790e-01 1.59513384e-01 1.60265219e-01 6.44474712e-02 -1.31309510e-01 2.44154373e-01
4,ㅇㅝㄹ,ㅇㅝㄹ-2.07543174e-01 -1.01263833e-01 1.26638564e-01 9.36565856e-02 -1.52148504e-01 1.67678009e-02 3.85973799e-01 1.92930432e-01 -2.20342473e-01 -2.15635997e-01 -8.36786198e-02 1.72399451e-01 8.19512594e-02 -1.66362758e-01 1.13756677e-01 -3.09594066e-02 -2.23089370e-01 -3.50094682e-01 7.07086750e-02 6.94877104e-02 1.13086698e-01 -2.40972983e-01 -1.04089444e-01 1.45626799e-01 -5.98449794e-02 1.91787404e-01 -5.65803433e-02 -2.43458007e-01 7.71896009e-01 -1.87345933e-01 -9.98802675e-02 -2.47249012e-01 -3.25623852e-01 -3.63292452e-01 1.39537682e-01 1.21925615e-01 3.90533593e-02 -1.77712546e-01 5.14731260e-02 -7.43622051e-02 9.44772015e-02 2.17048715e-01 1.31218236e-02 1.58331938e-01 -1.00107470e-01 2.10442804e-01 -6.12985361e-02 1.86838954e-01 1.64127237e-01 1.86699033e-01 -2.51803599e-01 1.39279089e-01 3.15918240e-03 2.42418761e-01 -3.46683449e-02 1.38016214e-02 -5.36560896e-02 -1.77220349e-01 1.13158058e-01 5.90045819e-01 6.50217006e-02 5.76124963e-02 1.67716102e-01 -2.05330278e-01 2.01460131e-01 -1.93439165e-01 -2.33788064e-01 -1.93770253e-01 -3.69522025e-02 4.52333271e-01 -1.71640225e-01 1.44403648e-01 5.46992919e-01 -3.76822803e-01 1.37138204e-01 -2.42360373e-01 6.27278760e-01 1.92468103e-01 -4.25906023e-01 -1.83173940e-01 -7.22082004e-01 -2.92950938e-01 -1.54356470e-01 -1.49266241e-01 -1.21510116e-01 2.49981859e-01 1.90484674e-01 -2.24703508e-01 1.10174135e-01 3.05294448e-02 -1.74417774e-01 6.52179239e-01 2.55612784e-01 1.63754743e-01 -1.40298269e-01 -6.96529239e-02 -1.15197296e-01 -1.12457185e-01 -1.91360648e-01 -2.20644781e-01 -1.13727032e-01 -5.69626472e-01 -1.33273956e-01 -6.59166633e-04 -2.45182579e-01 2.47954587e-01 9.37162451e-02 -3.30785260e-02 1.54434188e-01 1.05433836e-01 1.61869395e-01 -4.16800339e-01 1.78003039e-01 1.08066788e-01 4.87426430e-01 -1.34939466e-01 -1.14443743e-01 1.84902759e-01 -3.18457480e-01 2.15180926e-01 2.11916558e-01 8.92627609e-02 6.60362507e-01 3.37890619e-01 9.84967576e-02 1.13256852e-01 -1.67288098e-01 -1.60780883e-01 -5.24549544e-01 6.48892316e-02 -1.97777472e-01 1.83043419e-01 -1.57161905e-01 7.47437323e-02 -2.12751595e-01 3.97028490e-02 -1.29083648e-01 -4.11200696e-01 -1.43165755e-01 -4.87978000e-02 1.62505604e-01 -1.12123034e-01 2.17820616e-02 2.00909486e-01 -9.43656613e-02 1.01302617e-01 -3.64311214e-01 5.07613794e-01 3.94370917e-02 1.66529309e-01 1.51838193e-01 7.40891890e-02 -1.71141511e-01 1.81023410e-01 -6.92126992e-02 9.02600565e-02 -1.11472314e-01 1.13758591e-01 1.71081352e-01 1.41558394e-01 -1.81943980e-01 -2.15555957e-01 -1.14436321e-01 -4.30688901e-01 2.36485530e-02 1.75774379e-01 2.62191743e-01 -1.97027935e-01 -3.73835512e-01 -1.51544333e-01 -1.20699944e-01 2.44332789e-01 -1.84968239e-01 -8.70590447e-01 -1.63512853e-01 -1.19869269e-01 -2.95490953e-01 -2.68816005e-03 1.48533264e-01 -6.81354214e-02 -1.86784827e-01 1.60469802e-01 -1.42405910e-01 -3.10615624e-01 3.25235341e-02 1.48908569e-01 7.32271722e-02 2.47296963e-02 6.81094160e-02 2.27964696e-01 -3.24181395e-01 1.42529717e-01 -1.13509342e-02 8.09770566e-02 -1.74362306e-01 1.85492757e-01 2.23699225e-01 -6.10087039e-01 7.63579517e-01 6.01198398e-02


In [25]:
np.savetxt('../KB_NLP/glove.200D.100E.txt',glove_embedding['vectors'],fmt='%s')

---
## 1000 epochs, max loss

In [31]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42,max_loss=0.0002)
glove.fit(x.tocoo(), epochs=1000, no_threads=4, verbose=True)

Performing 1000 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
Epoch 100
Epoch 101
Epoch 102
Epoch 103
Epoch 104
Epoch 105
Epo

---
# make every 10 th embeddings with 200dim

In [34]:
%%time
for k in range(10,210,10):
    glove = Glove(no_components=200, learning_rate=0.01, random_state=42,)
    glove.fit(x.tocoo(), epochs=k, no_threads=4, verbose=False)
    dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
    glove.add_dictionary(dictionary)
    
    glove_embedding = pd.DataFrame()
    glove_embedding['key'] = list(glove.dictionary.keys())
    glove_embedding['vectors'] = list(glove.word_vectors)

    glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
    glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']
    for i in tqdm(range(len(glove_embedding))):
        glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')
    np.savetxt('../KB_NLP/glove_embedding/glove.200D.{}E.txt'.format(k),glove_embedding['vectors'],fmt='%s')
    print("{}th embedding DONE".format(k))

100%|██████████| 38890/38890 [00:07<00:00, 5170.59it/s]


10th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5178.05it/s]


20th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5293.16it/s]


30th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5151.79it/s]


40th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5480.20it/s]


50th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5184.51it/s]


60th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5387.93it/s]


70th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5193.58it/s]


80th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5534.20it/s]


90th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5108.40it/s]


100th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5220.91it/s]


110th embedding DONE


100%|██████████| 38890/38890 [00:06<00:00, 5618.49it/s]


120th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5166.17it/s]


130th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5455.86it/s]


140th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5236.72it/s]


150th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5522.14it/s]


160th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5152.22it/s]


170th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5440.41it/s]


180th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5276.32it/s]


190th embedding DONE


100%|██████████| 38890/38890 [00:07<00:00, 5478.10it/s]


200th embedding DONE
CPU times: user 1h 57min 17s, sys: 8.18 s, total: 1h 57min 26s
Wall time: 41min 18s


---
## 1000 epochs

In [16]:
%%time
glove = Glove(no_components=200, learning_rate=0.01, random_state=42,)
glove.fit(x.tocoo(), epochs=1000, no_threads=4, verbose=True)

Performing 100 training epochs with 4 threads
Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
CPU times: user 5min 2s, sys: 47 ms, total: 5min 2s
Wall time: 1

In [22]:
dictionary = {vocab:idx for idx, vocab in enumerate(idx2vocab)}
glove.add_dictionary(dictionary)
print("shape: {}".format(glove.word_vectors.shape))

glove_embedding = pd.DataFrame()
glove_embedding['key'] = list(glove.dictionary.keys())
glove_embedding['vectors'] = list(glove.word_vectors)

glove_embedding['vectors'] = glove_embedding['vectors'].map(str)
glove_embedding['vectors'] = glove_embedding['key'] + glove_embedding['vectors']

In [23]:
for i in tqdm(range(len(glove_embedding))):
    glove_embedding['vectors'][i] = glove_embedding['vectors'][i].replace('[','').replace(']','').replace('\n','')

100%|██████████| 38890/38890 [00:07<00:00, 5394.15it/s]


In [24]:
glove_embedding.head()

Unnamed: 0,key,vectors
0,ㅇㅗㄹㄹㅣㅁ,ㅇㅗㄹㄹㅣㅁ-0.04014644 0.14591996 0.21770952 0.27505619 -0.21774962 -0.3299259 -0.34875664 0.15955163 -0.30599767 -0.09654434 -0.36265585 0.36642118 0.58957813 -0.25477741 -0.77548881 -0.16812614 -0.30386485 0.3014517 0.11541772 0.23872045 0.23607387 -0.32231933 0.65029271 -0.57035232 0.3518528 0.2721458 0.29503168 -0.24414353 0.24229787 -0.27020786 0.02250393 -0.64149403 -0.18691672 0.57878591 0.04031945 0.25008333 -0.11378724 -0.24900752 -0.36675543 -0.16945304 -0.26521683 0.47874288 -0.39373579 0.35531035 -0.0364649 0.24267091 0.28361149 0.0727594 -0.18305883 0.18589462 0.01891854 0.31339466 0.2110837 0.2616362 0.1850178 0.30698852 -0.34319437 -0.33101811 -0.0366438 -0.10888972 -0.33897783 -0.18377894 0.22498183 -0.16396334 0.17689501 -0.09917777 -0.23181384 -0.07816078 -0.32654061 0.39174611 0.09102456 0.05801771 -0.5810237 -0.15678637 0.12635572 0.00321863 0.06294066 -0.35182788 -0.04854145 -0.12647429 0.35109869 0.40514648 -0.21645254 -0.19755682 -0.32289444 -0.30027708 0.34837958 -0.04615759 0.35337252 -0.06437188 -0.30470855 0.79435022 0.25375599 -0.39471896 -0.04646276 0.28683884 0.56763656 -0.34304805 -0.13901353 -0.25175754 -0.36208369 0.05205259 -0.22300126 0.2905658 -0.36620431 -0.54267439 -0.55635134 -0.13487925 0.12809894 -0.27533946 -0.09402707 -0.13418316 0.31926451 0.25038203 0.56734689 0.50725485 0.13491343 -0.09576212 0.56955517 0.04188273 -0.26275488 0.27024855 -0.4486555 -0.37352145 -0.07066747 0.19620842 -0.05598441 -0.13595502 0.37554276 -0.24108017 -0.14587224 -0.02038195 -0.28231163 0.27694228 0.03949041 -0.36484638 0.25809373 0.03587891 -0.25194114 0.38679681 0.3846123 -0.36218281 -0.54193586 0.05941491 -0.22653054 -0.19972402 0.68535539 -0.20749228 -0.20579772 0.10440364 0.28986191 -0.41898958 -0.41396729 -0.21446917 -0.01976362 -0.33942792 0.55116389 0.35188357 0.22111976 0.29210891 -0.15658744 -0.19720351 0.36857953 0.45014699 -0.14041504 0.18623169 0.14295688 -0.2884932 -0.26274614 -0.16985756 -0.02450327 -0.59542114 -0.22030671 -0.15137482 0.10464277 -0.2418877 0.27974102 0.15631665 0.33281475 -0.11659845 -0.13164111 -0.41712243 0.2061083 0.58005565 -0.50544492 0.27397353 0.23080702 0.3383612 0.28207726 0.05866299 0.24901911 0.72161691 0.10578238 -0.03012658 -0.1668963 0.1184556 0.24818298 0.11677119 0.49216082 -0.1081244
1,ㅇㅡㄴㅎㅐㅇ,ㅇㅡㄴㅎㅐㅇ-0.1572792 -0.08054372 -0.27841671 0.21973004 0.13457085 -0.27744223 -0.40084188 0.30184945 -0.37914002 -0.26272903 -0.16365064 0.11286745 0.38496305 -0.33934461 -0.37365978 -0.29523862 -0.3034842 -0.08834723 -0.07348613 0.1021175 0.12998961 -0.30811428 -0.39247292 0.01906075 -0.07931723 0.05273994 0.6745661 -0.50186203 0.77431466 -0.23300256 -0.07766038 0.11865698 -0.09071159 0.1243992 -0.03220683 -0.39791699 -0.14834166 -0.32564888 0.32238752 -0.19313827 0.18513157 0.36449502 0.63571523 0.04075512 0.08497423 0.34870121 -0.08026315 0.2484782 0.15154243 0.14718999 -0.21447081 0.12459898 0.75769977 0.25016583 0.09443411 0.23656591 0.13067121 -0.20097059 0.20186888 -0.33050099 -0.03671781 0.18318971 0.07951071 -0.21976909 0.29548061 -0.14395485 -0.30520211 -0.2166729 -0.43158454 -0.0533822 -0.06413475 0.27059307 0.30961291 0.66331317 0.25816638 -0.01067129 0.68257689 -0.27374658 -0.04536977 0.74422488 0.35760636 -0.03790119 -0.24207401 -0.18475701 0.04828209 -0.30084225 0.19306438 0.11154074 0.20647254 0.53436873 -0.29766159 -0.2799169 0.28007286 -0.26512236 -0.24766068 -0.01053994 0.5284233 -0.31237856 -0.05439603 -0.33908246 -0.20242762 0.00664636 -0.13741498 0.03119037 -0.39012542 0.31199905 0.10927213 0.19657683 0.20031728 0.01557362 0.17524775 -0.10757874 0.18476346 0.25980472 0.09886934 0.30814531 -0.30672309 -0.00927756 -0.60975119 -0.14668808 0.03265322 0.19673261 -0.03530028 -0.11285591 0.14772563 0.27864426 -0.10311523 -0.21172293 -0.35814708 0.1042695 -0.03706134 0.19600204 -0.24872752 -0.23623717 -0.27154828 -0.53309937 0.11644491 -0.108796 -0.11860072 -0.12478401 0.17179854 0.0179788 0.06609469 0.03399539 -0.16891539 0.65338818 -0.06689487 -0.41351475 -0.11247675 0.22247287 -0.05430595 0.15229692 -0.1267627 -0.33409057 -0.2822526 0.19294731 -0.09861426 -0.03568986 0.28677243 0.06149134 -0.08263764 -0.26492316 0.36749904 0.42704584 0.42509172 0.1097473 0.35410968 -0.12505456 -0.59345382 -0.27992716 -0.18356801 -0.32856005 -0.27886826 0.01089768 -0.1810331 -0.14253432 0.12018746 -0.2276067 0.25153082 -0.01316961 -0.2637732 -0.27916181 0.05193879 0.28711377 0.12331934 0.1486321 0.02538425 -0.15962544 0.22544001 0.1950349 0.28615124 0.35690848 -0.43954099 -0.11264198 -0.22533619 0.26111029 0.31871354 -0.00189822 0.3384831 0.29622957
2,ㄱㅗㄱㅐㄱㄴㅣㅁ,ㄱㅗㄱㅐㄱㄴㅣㅁ-1.59774535e-01 1.35601894e-02 -3.62222175e-02 1.68584011e-01 -1.31787714e-01 -1.24297844e-01 2.00228608e-01 2.06780512e-01 -2.54143463e-01 -2.77011122e-01 -1.53367849e-01 1.52472556e-01 2.46109757e-01 -2.62180461e-01 1.58545554e-02 -9.79784456e-02 -2.73107237e-01 -5.61198686e-01 -3.04103621e-01 5.30389583e-03 1.84090388e-01 -2.44868792e-01 -4.22205325e-02 5.41844292e-02 -3.17535430e-02 1.27135797e-01 7.35597496e-02 8.04776400e-02 8.08645381e-02 -2.36371941e-01 -1.21337518e-01 2.99465684e-01 -4.52230928e-01 1.15666392e-01 -1.37685458e-01 2.73791619e-01 -2.64611059e-01 -2.02074239e-01 7.99144225e-01 -1.48058400e-01 9.66475243e-02 2.30492892e-01 1.98806992e-01 8.87366044e-02 1.53342230e-01 2.59374439e-01 1.66862002e-01 -7.79917430e-03 -4.17507331e-02 -1.70697758e-01 -2.02600688e-01 2.33182573e-01 -5.91029105e-03 2.85857876e-01 1.50252584e-01 1.23244483e-01 -2.15563739e-01 -2.55887540e-01 4.36759099e-01 2.11382618e-01 -1.36944361e-01 5.71780665e-02 1.81034578e-01 -1.15659092e-01 2.38599171e-01 -1.76526333e-01 -2.66821017e-01 -1.57762802e-01 -6.52438807e-01 3.03438873e-01 -1.90448989e-01 1.36690891e-01 2.56505557e-01 6.26730114e-01 1.55345690e-01 6.04323441e-02 1.54829881e-01 6.22165559e-02 1.26596759e-01 5.71306852e-01 -2.71304747e-01 -1.42866346e-02 -1.89114776e-01 -1.20380007e-01 -1.91756510e-01 3.82102600e-01 1.83512358e-01 -1.10905821e-04 2.71975305e-01 8.26141978e-01 -2.44574418e-01 8.06388655e-02 2.17388113e-01 -2.64435435e-01 -1.32786257e-01 1.75763273e-02 2.96415372e-01 -2.80826634e-01 -3.74378166e-02 -2.38737367e-01 -1.64600439e-01 -6.46687298e-01 -1.18838267e-01 2.28615551e-01 -3.60456107e-02 1.02354349e-01 7.00208990e-02 -1.01846005e-01 1.71657818e-01 -1.16689624e-01 1.92315075e-01 3.67106983e-01 2.68170847e-01 2.47345018e-01 -5.67269297e-02 -1.67791377e-01 -8.80511671e-02 1.04767538e-01 -2.11712500e-01 -1.03736686e-01 -1.24706549e-02 9.00582777e-02 5.21616787e-01 8.85927143e-02 1.83159704e-01 6.41675789e-02 -9.95466735e-02 -2.24394985e-01 -5.15215033e-01 7.20427212e-02 -3.18454183e-02 1.56768460e-01 -2.37868608e-02 -1.85783353e-03 -1.55038712e-01 -3.04176675e-02 5.07301340e-02 -9.86613048e-02 -1.88483787e-01 1.40914133e-01 2.73056131e-01 -1.24619872e-01 -2.40882670e-01 2.14382587e-01 -6.91382651e-02 6.95400661e-01 -1.76571711e-01 1.00786554e-01 -3.70676595e-01 1.37287830e-01 -4.64829169e-01 8.10677371e-02 1.13221683e-01 4.59398949e-02 1.76689141e-01 1.28926183e-01 -1.22438741e-01 2.37550251e-02 1.84000557e-01 1.70612222e-01 -9.19960364e-02 -2.50505274e-01 7.83623162e-02 -3.67610080e-01 6.35533834e-01 1.96856852e-01 2.15016094e-01 -2.64781221e-01 1.41865599e-01 -1.47284667e-01 -5.94803556e-02 -7.75125804e-02 -2.53884475e-01 1.71920020e-02 -6.69925865e-02 -1.03723733e-01 -2.25550254e-02 -2.71748762e-01 2.05700156e-01 -4.98145754e-03 -2.16777901e-01 1.25821891e-01 -2.96438880e-02 -2.19661576e-01 1.45792651e-01 1.02842153e-01 2.64636296e-01 -7.05999342e-02 9.47407554e-02 1.78405903e-01 -4.22844709e-01 -3.02994701e-02 -6.79062536e-01 -3.02442348e-01 -1.65240712e-01 1.04986135e-01 1.99250358e-01 -2.22102909e-01 -1.86846348e-01 -3.62570916e-01
3,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ,ㄱㅏㅁㅅㅏㅎㅏㅂㄴㅣㄷㅏ-3.09493479e-01 5.94615993e-02 1.11945979e-01 1.11396423e-01 -1.70141311e-01 -1.58883005e-01 -4.09666650e-01 2.25411924e-01 -2.55438010e-01 -1.57951469e-01 -2.05678833e-01 1.31476580e-01 4.21027042e-01 -1.89730750e-01 1.82046553e-01 -3.69912500e-02 -2.40118253e-01 -5.25553324e-02 2.61666932e-01 1.28508659e-01 8.90590356e-02 -2.24205369e-01 -1.72334348e-01 5.32329481e-02 2.17982663e-01 1.57703706e-01 -3.26094628e-02 2.83884928e-02 -5.55275181e-02 -2.25921674e-01 -1.01595526e-02 -4.84355609e-02 -2.92002581e-01 -4.20418989e-02 -5.84153308e-03 3.33071487e-02 -1.24901753e-01 -1.50873943e-01 -1.35016769e-01 -1.06144996e-01 1.71464627e-02 -4.56029097e-02 -5.23564065e-02 1.05818490e-01 2.57134324e-01 2.67736322e-01 1.53399087e-01 1.85062926e-01 -3.43421860e-01 -1.32021906e-01 -2.94071474e-01 1.72973180e-01 -1.36338914e-01 2.34655626e-01 1.17438530e-01 1.02705190e-01 -1.10883038e-01 -1.67683136e-01 2.00036841e-01 -3.32422622e-02 -1.27502033e-01 1.55422744e-01 2.15150699e-01 -2.22947482e-01 1.70398184e-01 -2.14167859e-01 -2.61530976e-01 -2.06133209e-01 -4.61596606e-01 4.77775631e-02 -1.86417033e-01 1.99552816e-01 -3.45441430e-01 5.03258537e-01 1.85684770e-01 1.86046885e-01 -4.30497707e-01 -8.94611495e-02 2.52809571e-01 3.75293153e-01 7.45091406e-02 1.71003906e-01 -2.06070055e-01 -1.58073941e-01 -2.43528408e-02 -2.57659850e-01 2.16083639e-01 1.01825940e-01 2.08191518e-01 4.83438054e-01 -2.39966915e-01 2.42315323e-01 1.92262930e-01 -2.07308698e-01 -2.27744251e-01 6.02210147e-02 -2.93185454e-02 -2.22294088e-01 -2.06995434e-01 -2.11028706e-01 -1.66563052e-01 -4.85574919e-01 -1.82388095e-01 -5.42742746e-02 -8.23135886e-01 -2.21013840e-01 -4.40681069e-02 2.12629231e-01 1.36213261e-01 1.06658047e-02 1.04830263e-03 1.44372069e-01 5.94634059e-02 2.12221010e-01 -1.14599112e-01 3.95763335e-01 -7.42433721e-02 1.55815986e-01 -3.89856348e-01 7.38338280e-01 5.84981670e-02 2.23452731e-01 -3.63964271e-01 -1.46125066e-01 1.08509729e-01 2.44283535e-01 -1.38359246e-01 -1.26746461e-01 -9.85321453e-01 -1.34348087e-01 -2.61074940e-01 1.78749183e-01 -1.18572059e-01 4.70259617e-02 -2.36366936e-01 1.35290417e-01 -1.54890058e-01 9.80860852e-02 -1.27319184e-01 5.85905727e-02 1.38498984e-01 -1.18930202e-01 -1.83147297e-01 1.77572835e-01 -1.34141367e-01 1.27452504e+00 1.84194519e-01 -2.14419861e-01 -1.65492920e-01 1.36767898e-01 1.17405707e-01 2.65861717e-02 -3.84100396e-01 3.24110383e-02 -7.35314764e-02 1.75616971e-01 2.39121538e-02 1.37870375e-01 1.78037625e-01 1.68168970e-01 -1.22840706e-01 -2.20283540e-01 1.89091101e-01 2.09259033e-01 -1.22405056e+00 1.58515297e-01 2.17919184e-01 -1.99272248e-01 -1.80702019e-01 -1.04559933e-01 -2.07485549e-01 7.96664902e-02 -2.00619006e-01 -6.98733066e-01 2.63047121e-02 -1.58938573e-01 1.89042325e-02 1.38065401e-01 1.48210255e-01 9.43055024e-02 -2.46891821e-01 -2.45973843e-01 3.59452876e-01 1.71412638e-01 -4.48088575e-02 1.95162914e-01 -1.05858767e-01 1.85876795e-01 1.71215244e-01 3.10503064e-01 -9.07120405e-01 2.01275062e-01 -4.48620514e-01 9.75750336e-02 -1.92659790e-01 1.59513384e-01 1.60265219e-01 6.44474712e-02 -1.31309510e-01 2.44154373e-01
4,ㅇㅝㄹ,ㅇㅝㄹ-2.07543174e-01 -1.01263833e-01 1.26638564e-01 9.36565856e-02 -1.52148504e-01 1.67678009e-02 3.85973799e-01 1.92930432e-01 -2.20342473e-01 -2.15635997e-01 -8.36786198e-02 1.72399451e-01 8.19512594e-02 -1.66362758e-01 1.13756677e-01 -3.09594066e-02 -2.23089370e-01 -3.50094682e-01 7.07086750e-02 6.94877104e-02 1.13086698e-01 -2.40972983e-01 -1.04089444e-01 1.45626799e-01 -5.98449794e-02 1.91787404e-01 -5.65803433e-02 -2.43458007e-01 7.71896009e-01 -1.87345933e-01 -9.98802675e-02 -2.47249012e-01 -3.25623852e-01 -3.63292452e-01 1.39537682e-01 1.21925615e-01 3.90533593e-02 -1.77712546e-01 5.14731260e-02 -7.43622051e-02 9.44772015e-02 2.17048715e-01 1.31218236e-02 1.58331938e-01 -1.00107470e-01 2.10442804e-01 -6.12985361e-02 1.86838954e-01 1.64127237e-01 1.86699033e-01 -2.51803599e-01 1.39279089e-01 3.15918240e-03 2.42418761e-01 -3.46683449e-02 1.38016214e-02 -5.36560896e-02 -1.77220349e-01 1.13158058e-01 5.90045819e-01 6.50217006e-02 5.76124963e-02 1.67716102e-01 -2.05330278e-01 2.01460131e-01 -1.93439165e-01 -2.33788064e-01 -1.93770253e-01 -3.69522025e-02 4.52333271e-01 -1.71640225e-01 1.44403648e-01 5.46992919e-01 -3.76822803e-01 1.37138204e-01 -2.42360373e-01 6.27278760e-01 1.92468103e-01 -4.25906023e-01 -1.83173940e-01 -7.22082004e-01 -2.92950938e-01 -1.54356470e-01 -1.49266241e-01 -1.21510116e-01 2.49981859e-01 1.90484674e-01 -2.24703508e-01 1.10174135e-01 3.05294448e-02 -1.74417774e-01 6.52179239e-01 2.55612784e-01 1.63754743e-01 -1.40298269e-01 -6.96529239e-02 -1.15197296e-01 -1.12457185e-01 -1.91360648e-01 -2.20644781e-01 -1.13727032e-01 -5.69626472e-01 -1.33273956e-01 -6.59166633e-04 -2.45182579e-01 2.47954587e-01 9.37162451e-02 -3.30785260e-02 1.54434188e-01 1.05433836e-01 1.61869395e-01 -4.16800339e-01 1.78003039e-01 1.08066788e-01 4.87426430e-01 -1.34939466e-01 -1.14443743e-01 1.84902759e-01 -3.18457480e-01 2.15180926e-01 2.11916558e-01 8.92627609e-02 6.60362507e-01 3.37890619e-01 9.84967576e-02 1.13256852e-01 -1.67288098e-01 -1.60780883e-01 -5.24549544e-01 6.48892316e-02 -1.97777472e-01 1.83043419e-01 -1.57161905e-01 7.47437323e-02 -2.12751595e-01 3.97028490e-02 -1.29083648e-01 -4.11200696e-01 -1.43165755e-01 -4.87978000e-02 1.62505604e-01 -1.12123034e-01 2.17820616e-02 2.00909486e-01 -9.43656613e-02 1.01302617e-01 -3.64311214e-01 5.07613794e-01 3.94370917e-02 1.66529309e-01 1.51838193e-01 7.40891890e-02 -1.71141511e-01 1.81023410e-01 -6.92126992e-02 9.02600565e-02 -1.11472314e-01 1.13758591e-01 1.71081352e-01 1.41558394e-01 -1.81943980e-01 -2.15555957e-01 -1.14436321e-01 -4.30688901e-01 2.36485530e-02 1.75774379e-01 2.62191743e-01 -1.97027935e-01 -3.73835512e-01 -1.51544333e-01 -1.20699944e-01 2.44332789e-01 -1.84968239e-01 -8.70590447e-01 -1.63512853e-01 -1.19869269e-01 -2.95490953e-01 -2.68816005e-03 1.48533264e-01 -6.81354214e-02 -1.86784827e-01 1.60469802e-01 -1.42405910e-01 -3.10615624e-01 3.25235341e-02 1.48908569e-01 7.32271722e-02 2.47296963e-02 6.81094160e-02 2.27964696e-01 -3.24181395e-01 1.42529717e-01 -1.13509342e-02 8.09770566e-02 -1.74362306e-01 1.85492757e-01 2.23699225e-01 -6.10087039e-01 7.63579517e-01 6.01198398e-02


In [25]:
np.savetxt('../KB_NLP/glove.200D.1000E.txt',glove_embedding['vectors'],fmt='%s')