## 한글 형태소 분석기
- refer : https://konlpy-ko.readthedocs.io/ko/v0.6.0/

In [2]:
!pip install konlpy

Collecting konlpy
  Downloading konlpy-0.6.0-py2.py3-none-any.whl (19.4 MB)
                                              0.0/19.4 MB ? eta -:--:--
                                              0.4/19.4 MB 8.1 MB/s eta 0:00:03
     -                                        0.7/19.4 MB 7.6 MB/s eta 0:00:03
     --                                       1.2/19.4 MB 8.1 MB/s eta 0:00:03
     ---                                      1.6/19.4 MB 8.6 MB/s eta 0:00:03
     ----                                     2.1/19.4 MB 9.0 MB/s eta 0:00:02
     -----                                    2.6/19.4 MB 9.3 MB/s eta 0:00:02
     ------                                   3.1/19.4 MB 9.5 MB/s eta 0:00:02
     -------                                  3.6/19.4 MB 9.7 MB/s eta 0:00:02
     --------                                 4.1/19.4 MB 10.1 MB/s eta 0:00:02
     ---------                                4.6/19.4 MB 10.1 MB/s eta 0:00:02
     ----------                               5.1/19.4 MB 10

### OKt : 한글 형태분석기 중 하나

In [3]:
from konlpy.tag import Okt

In [4]:
okt = Okt() # 형태소 분석기 선택

In [5]:
sentences_list = ['아버지가방에들어가신다'
                , '나는 밥을 먹는다.'
                , '하늘을 나는 자동차'
                , '아이폰 기다리다 지쳐 애플공홈에서 언락폰질러버렸다 6+ 128기가실버ㅋ']

In [6]:
# 토큰화
morphs_list = okt.morphs(sentences_list[1])
morphs_list

['나', '는', '밥', '을', '먹는다', '.']

In [7]:
# 품사 달기
okt.pos(sentences_list[1])

[('나', 'Noun'),
 ('는', 'Josa'),
 ('밥', 'Noun'),
 ('을', 'Josa'),
 ('먹는다', 'Verb'),
 ('.', 'Punctuation')]

In [8]:
# 명사만 가져오기
okt.nouns(sentences_list[1])

['나', '밥']

In [9]:
# 결합명사 단어별 추출
okt.phrases(sentences_list[3]), okt.phrases('애플공홈')

(['아이폰', '애플공홈', '언락폰', '128기', '실버', '애플', '공홈', '128'], ['애플공홈', '애플', '공홈'])

### Mecab

In [10]:
!pip install python-mecab-ko



In [12]:
from mecab import MeCab

In [14]:
mecab = MeCab()

In [15]:
mecab.morphs(sentences_list[0])

['아버지', '가', '방', '에', '들어가', '신다']

In [16]:
mecab.pos(sentences_list[0])

[('아버지', 'NNG'),
 ('가', 'JKS'),
 ('방', 'NNG'),
 ('에', 'JKB'),
 ('들어가', 'VV'),
 ('신다', 'EP+EC')]

In [17]:
# mecab.parse(sentences_list[0])

[Morpheme(span=Span(start=0, end=3), surface='아버지', feature=Feature(pos='NNG', semantic=None, has_jongseong=False, reading='아버지', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=3, end=4), surface='가', feature=Feature(pos='JKS', semantic=None, has_jongseong=False, reading='가', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=4, end=5), surface='방', feature=Feature(pos='NNG', semantic='장소', has_jongseong=True, reading='방', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=5, end=6), surface='에', feature=Feature(pos='JKB', semantic=None, has_jongseong=False, reading='에', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=6, end=9), surface='들어가', feature=Feature(pos='VV', semantic=None, has_jongseong=False, reading='들어가', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=9, end=11), surface='신다', feature=Feat

In [18]:
for sentence in sentences_list :
    morphs_list = mecab.morphs(sentence)
    print(morphs_list)

['아버지', '가', '방', '에', '들어가', '신다']
['나', '는', '밥', '을', '먹', '는다', '.']
['하늘', '을', '나', '는', '자동차']
['아이폰', '기다리', '다', '지쳐', '애플', '공홈', '에서', '언락', '폰', '질러', '버렸', '다', '6', '+', '128', '기', '가', '실버', 'ㅋ']
