## 자연어 처리, NLP(Natural Language Processing) 
### KoNLPy 및 필요 모듈의 설치 
- KoNLPy : pip install konlpy
- JPype1 : conda install -c conda-forge jpype1
- 이후 Jupyter Notebook 재실행 필요

In [1]:
from IPython.display import Image as Show

In [2]:
! pip install konlpy

Collecting konlpy
  Downloading https://files.pythonhosted.org/packages/e5/3d/4e983cd98d87b50b2ab0387d73fa946f745aa8164e8888a714d5129f9765/konlpy-0.5.1-py2.py3-none-any.whl (19.4MB)
Collecting JPype1>=0.5.7 (from konlpy)
  Downloading https://files.pythonhosted.org/packages/d3/08/f4bb58c1c0dff93e9628cd0e1025f80fcb5a4551310455feb96b96e58ad1/JPype1-0.7.0-cp37-cp37m-win_amd64.whl (1.2MB)
Installing collected packages: JPype1, konlpy
Successfully installed JPype1-0.7.0 konlpy-0.5.1


In [3]:
import platform

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# sns.set()

from matplotlib import font_manager, rc
if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname="C:/Windows/Fonts/malgun.ttf").get_name()
    rc('font', family=font_name)
else:
    print("It's unknown system. Hangul fonts are not supported!")
    
# plt.rcParams['axes.unicode_minus'] = False
plt.rcParams["figure.figsize"] = [12,6]

%matplotlib inline

In [4]:
### 한글 자연어 처리 기초 

In [5]:
from konlpy.tag import Kkma
kkma = Kkma()

-------------------------------------------------------------------------------
Deprecated: convertStrings was not specified when starting the JVM. The default
behavior in JPype will be False starting in JPype 0.8. The recommended setting
for new code is convertStrings=False.  The legacy value of True was assumed for
please file a ticket with the developer.
-------------------------------------------------------------------------------

  """)


In [6]:
kkma.sentences('한국어 분석을 시작합니다 재미있어요~~')

['한국어 분석을 시작합니다', '재미있어요~~']

In [7]:
kkma.nouns('한국어 분석을 시작합니다 재미있어요~~')

['한국어', '분석']

In [8]:
kkma.pos("한국어 분석을 시작합니다 재미있어요~~")

[('한국어', 'NNG'),
 ('분석', 'NNG'),
 ('을', 'JKO'),
 ('시작하', 'VV'),
 ('ㅂ니다', 'EFN'),
 ('재미있', 'VA'),
 ('어요', 'EFN'),
 ('~~', 'SW')]

In [9]:
from konlpy.tag import Hannanum
hannanum = Hannanum()

In [10]:
hannanum.nouns("한국어 분석을 시작합니다 재미있어요~~")

['한국어', '분석', '시작']

In [11]:
hannanum.morphs('한국어 분석을 시작합니다 재미있어요~~')

['한국어', '분석', '을', '시작', '하', 'ㅂ니다', '재미있', '어요', '~~']

In [12]:
hannanum.pos('한국어 분석을 시작합니다 재미있어요~~')

[('한국어', 'N'),
 ('분석', 'N'),
 ('을', 'J'),
 ('시작', 'N'),
 ('하', 'X'),
 ('ㅂ니다', 'E'),
 ('재미있', 'P'),
 ('어요', 'E'),
 ('~~', 'S')]

In [13]:
from konlpy.tag import Twitter
t = Twitter()

  warn('"Twitter" has changed to "Okt" since KoNLPy v0.4.5.')


In [14]:
t.nouns('한국어 분석을 시작합니다 재미있어요~~')

['한국어', '분석', '시작']

In [15]:
t.morphs('한국어 분석을 시작합니다 재미있어요~~')

['한국어', '분석', '을', '시작', '합니다', '재미있어요', '~~']

In [16]:
t.pos('한국어 분석을 시작합니다 재미있어요~~')

[('한국어', 'Noun'),
 ('분석', 'Noun'),
 ('을', 'Josa'),
 ('시작', 'Noun'),
 ('합니다', 'Verb'),
 ('재미있어요', 'Adjective'),
 ('~~', 'Punctuation')]