In [1]:
from konlpy.tag import Kkma
from konlpy.tag import Mecab
from konlpy.tag import Twitter

# Simple Approach

In [2]:
#Line 구분자가 있는 경우
test_set_2 = "첫 번째 문장입니다. \n" + \
             "두 번째 문장입니다. \n" + \
             "김선호 입니다.\n" 
print("\x1b[1;31m[개행 문자가 있는 문장]\x1b[0m \n{0}".format(test_set_2))

# # #Line 구분자가 없는 경우
# test_set_2 = "첫번째문장입니다 ." + \
#              "두번째문장입니다.... " + \
#              "김승우 ." 
# print("\x1b[1;31m[문장 구분자가 없는 문장]\x1b[0m \n{0}\n".format(test_set_2))

sent_split = test_set_2.split('\n')
print("\x1b[1;31m[개행 문자로 분리]\x1b[0m \n{0}\n".format(sent_split))

sent_token = list(map(lambda x:x.split(' '), sent_split))
print("\x1b[1;31m[공백으로 문자 분리]\x1b[0m \n{0}\n".format(sent_token))

[1;31m[개행 문자가 있는 문장][0m 
첫 번째 문장입니다. 
두 번째 문장입니다. 
김선호 입니다.

[1;31m[개행 문자로 분리][0m 
['첫 번째 문장입니다. ', '두 번째 문장입니다. ', '김선호 입니다.', '']

[1;31m[공백으로 문자 분리][0m 
[['첫', '번째', '문장입니다.', ''], ['두', '번째', '문장입니다.', ''], ['김선호', '입니다.'], ['']]



# Mecab

In [3]:
mecab = Mecab('/usr/local/lib/mecab/dic/mecab-ko-dic')
pos1 = mecab.pos(test_set_2)
print("\x1b[1;31m[Mecab POS]\x1b[0m \n{0}\n".format(pos1))

pos2 = list(filter(lambda x :  x[1] in ['MM', 'NNG', 'NNP', 'SF','XSN','NNBC'], pos1 ))
print("\x1b[1;31m[Mecab Extract Some Tag]\x1b[0m \n{0}\n".format(pos2))

pos3 = ''.join(list(map(lambda x : '\n' if x[1] in ['SF'] else x[0], pos2))).split('\n')
print("\x1b[1;31m[Mecab Sentence Splitting]\x1b[0m \n{0}\n".format(pos3))

morphs = list(map(lambda x : mecab.morphs(x) , pos3))
print("\x1b[1;31m[Mecab Morph Sentence]\x1b[0m \n{0}\n".format(morphs))


[1;31m[Mecab POS][0m 
[('첫', 'MM'), ('번', 'NNBC'), ('째', 'XSN'), ('문장', 'NNG'), ('입니다', 'VCP+EF'), ('.', 'SF'), ('두', 'MM'), ('번', 'NNBC'), ('째', 'XSN'), ('문장', 'NNG'), ('입니다', 'VCP+EF'), ('.', 'SF'), ('김선호', 'NNP'), ('입니다', 'VCP+EF'), ('.', 'SF')]

[1;31m[Mecab Extract Some Tag][0m 
[('첫', 'MM'), ('번', 'NNBC'), ('째', 'XSN'), ('문장', 'NNG'), ('.', 'SF'), ('두', 'MM'), ('번', 'NNBC'), ('째', 'XSN'), ('문장', 'NNG'), ('.', 'SF'), ('김선호', 'NNP'), ('.', 'SF')]

[1;31m[Mecab Sentence Splitting][0m 
['첫번째문장', '두번째문장', '김선호', '']

[1;31m[Mecab Morph Sentence][0m 
[['첫', '번', '째', '문장'], ['두', '번', '째', '문장'], ['김선호'], []]



# Twitter

In [4]:
twitter = Twitter(jvmpath=None)
pos1 = twitter.pos(test_set_2)
print("\x1b[1;31m[Twitter POS]\x1b[0m \n{0}\n".format(pos1))

pos2 = list(filter(lambda x :  x[1] in ['Noun', 'Suffix', 'Punctuation'], pos1 ))
print("\x1b[1;31m[Twitter Extract Some Tag]\x1b[0m \n{0}\n".format(pos2))

pos3 = ''.join(list(map(lambda x : '\n' if x[1] in ['Punctuation'] else x[0], pos2))).split('\n')
print("\x1b[1;31m[Twitter Sentence Splitting]\x1b[0m \n{0}\n".format(pos3))

morphs = list(map(lambda x : twitter.morphs(x) , pos3))
print("\x1b[1;31m[Twitter Morph Sentence]\x1b[0m \n{0}\n".format(morphs))


[1;31m[Twitter POS][0m 
[('첫', 'Noun'), ('번째', 'Suffix'), ('문장', 'Noun'), ('입니', 'Adjective'), ('다', 'Eomi'), ('.', 'Punctuation'), ('두', 'Noun'), ('번째', 'Suffix'), ('문장', 'Noun'), ('입니', 'Adjective'), ('다', 'Eomi'), ('.', 'Punctuation'), ('김', 'Noun'), ('선호', 'Noun'), ('입니', 'Adjective'), ('다', 'Eomi'), ('.', 'Punctuation')]

[1;31m[Twitter Extract Some Tag][0m 
[('첫', 'Noun'), ('번째', 'Suffix'), ('문장', 'Noun'), ('.', 'Punctuation'), ('두', 'Noun'), ('번째', 'Suffix'), ('문장', 'Noun'), ('.', 'Punctuation'), ('김', 'Noun'), ('선호', 'Noun'), ('.', 'Punctuation')]

[1;31m[Twitter Sentence Splitting][0m 
['첫번째문장', '두번째문장', '김선호', '']

[1;31m[Twitter Morph Sentence][0m 
[['첫', '번째', '문장'], ['두번째', '문장'], ['김', '선호'], []]



# KKMA

In [5]:
kkma = Kkma()
pos1 = kkma.pos(test_set_2)
print("\x1b[1;31m[Kkma POS]\x1b[0m \n{0}\n".format(pos1))

pos2 = list(filter(lambda x :  x[1] in ['MDT', 'NNB', 'NNG', 'SF', 'MDN'], pos1 ))
print("\x1b[1;31m[Kkma Extract Some Tag]\x1b[0m \n{0}\n".format(pos2))

pos3 = ''.join(list(map(lambda x : '\n' if x[1] in ['Punctuation'] else x[0], pos2))).split('\n')
print("\x1b[1;31m[Kkma Sentence Splitting]\x1b[0m \n{0}\n".format(pos3))

morphs = list(map(lambda x : kkma.morphs(x) , pos3))
print("\x1b[1;31m[Kkma Morph Sentence]\x1b[0m \n{0}\n".format(morphs))


[1;31m[Kkma POS][0m 
[('첫', 'MDT'), ('번째', 'NNB'), ('문장', 'NNG'), ('이', 'VCP'), ('ㅂ니다', 'EFN'), ('.', 'SF'), ('두', 'MDN'), ('번째', 'NNB'), ('문장', 'NNG'), ('이', 'VCP'), ('ㅂ니다', 'EFN'), ('.', 'SF'), ('김', 'NNG'), ('선호', 'NNG'), ('이', 'VCP'), ('ㅂ니다', 'EFN'), ('.', 'SF')]

[1;31m[Kkma Extract Some Tag][0m 
[('첫', 'MDT'), ('번째', 'NNB'), ('문장', 'NNG'), ('.', 'SF'), ('두', 'MDN'), ('번째', 'NNB'), ('문장', 'NNG'), ('.', 'SF'), ('김', 'NNG'), ('선호', 'NNG'), ('.', 'SF')]

[1;31m[Kkma Sentence Splitting][0m 
['첫번째문장.두번째문장.김선호.']

[1;31m[Kkma Morph Sentence][0m 
[['첫', '번째', '문장', '.', '두', '번째', '문장', '.', '김', '선호', '.']]

