Skip to content

Commit

Permalink
Merge 735d721 into 7b9281a
Browse files Browse the repository at this point in the history
  • Loading branch information
Jung Daun committed Jul 28, 2020
2 parents 7b9281a + 735d721 commit c2a644b
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 82 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ testall:
python3 -m pytest --cov=konlpy test/

init_i18n:
pip install mock sphinx sphinx-intl tweepy colorama bs4
pip install mock sphinx sphinx-intl
git submodule init
git submodule update

Expand Down
12 changes: 6 additions & 6 deletions konlpy/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ class CorpusLoader():
유구한 역사와 전통에 빛나는 우리 대한국민은 3·1운동으로 건립된 대한민국임시정부의 법통과 불의에 항거한 4·19민주이념을 계승하고, 조국의 민주개혁과 평화적 통일의 사명에 입각하여 정의·인도와 동포애로써 민족의 단결을 공고히 하고, 모든 사회적 폐습과 불의를 타파하며, 자율과 조화를 바 바
"""

def __init__(self, name=None):
if not name:
raise Exception("You need to input the name of the corpus")
else:
self.name = name

def abspath(self, filename=None):
"""Absolute path of corpus file.
If ``filename`` is *None*, returns absolute path of corpus.
Expand All @@ -45,12 +51,6 @@ def open(self, filename):
"""
return utils.load_txt(self.abspath(filename))

def __init__(self, name=None):
if not name:
raise Exception("You need to input the name of the corpus")
else:
self.name = name


kolaw = CorpusLoader('kolaw')
kobill = CorpusLoader('kobill')
5 changes: 3 additions & 2 deletions konlpy/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class Downloader(object):
NOT_INSTALLED = 'not installed'
STALE = 'corrupt or out of date'

def __init__(self, download_dir=None):
self._download_dir = download_dir

def download(self, id=None, download_dir=None):
"""The KoNLPy data downloader.
With this module you can download corpora, models and other data packages
Expand Down Expand Up @@ -220,8 +223,6 @@ def _get_info(self, id):
else:
raise ValueError("Could not find a matching item to download")

def __init__(self, download_dir=None):
self._download_dir = download_dir

# Aliases
_downloader = Downloader(default_download_dir())
Expand Down
8 changes: 7 additions & 1 deletion konlpy/tag/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
""" Common utility function for tagger classes """
from __future__ import absolute_import
from __future__ import unicode_literals
import sys


# For both Python 2 and Python 3 compatibility
if sys.version_info[0] >= 3:
basestring = str


def validate_phrase_inputs(phrase):
Expand All @@ -11,4 +17,4 @@ def validate_phrase_inputs(phrase):
phrase (str): phrase input
"""
msg = "phrase input should be string, not %s" % type(phrase)
assert isinstance(phrase, str), msg
assert isinstance(phrase, basestring), msg
18 changes: 9 additions & 9 deletions konlpy/tag/_hannanum.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ class Hannanum():
:param max_heap_size: Maximum memory usage limitation (Megabyte) :py:func:`.init_jvm`.
"""

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

jhannanumJavaPackage = jpype.JPackage('kr.lucypark.jhannanum.comm')
HannanumInterfaceJavaClass = jhannanumJavaPackage.HannanumInterface
self.jhi = HannanumInterfaceJavaClass() # Java instance
self.tagset = utils.read_json('%s/data/tagset/hannanum.json' % utils.installpath)

def analyze(self, phrase):
"""Phrase analyzer.
Expand Down Expand Up @@ -104,12 +113,3 @@ def morphs(self, phrase):
"""Parse phrase to morphemes."""

return [s for s, t in self.pos(phrase)]

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

jhannanumJavaPackage = jpype.JPackage('kr.lucypark.jhannanum.comm')
HannanumInterfaceJavaClass = jhannanumJavaPackage.HannanumInterface
self.jhi = HannanumInterfaceJavaClass() # Java instance
self.tagset = utils.read_json('%s/data/tagset/hannanum.json' % utils.installpath)
18 changes: 9 additions & 9 deletions konlpy/tag/_kkma.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ class Kkma():
:param max_heap_size: Maximum memory usage limitation (Megabyte) :py:func:`.init_jvm`.
"""

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

kkmaJavaPackage = jpype.JPackage('kr.lucypark.kkma')
KkmaInterfaceJavaClass = kkmaJavaPackage.KkmaInterface
self.jki = KkmaInterfaceJavaClass() # Java instance
self.tagset = utils.read_json('%s/data/tagset/kkma.json' % utils.installpath)

def nouns(self, phrase):
"""Noun extractor."""

Expand Down Expand Up @@ -91,12 +100,3 @@ def sentences(self, phrase):
sentences = self.jki.morphAnalyzer(phrase)
if not sentences: return []
return [sentences.get(i).getSentence() for i in range(sentences.size())]

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

kkmaJavaPackage = jpype.JPackage('kr.lucypark.kkma')
KkmaInterfaceJavaClass = kkmaJavaPackage.KkmaInterface
self.jki = KkmaInterfaceJavaClass() # Java instance
self.tagset = utils.read_json('%s/data/tagset/kkma.json' % utils.installpath)
44 changes: 22 additions & 22 deletions konlpy/tag/_komoran.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,28 @@ class Komoran():
:param max_heap_size: Maximum memory usage limitation (Megabyte) :py:func:`.init_jvm`.
"""

def __init__(self, jvmpath=None, userdic=None, modelpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

if modelpath:
self.modelpath = modelpath
else:
# FIXME: Cannot execute without sudoing
# java.lang.NoClassDefFoundErrorPyRaisable: java.lang.NoClassDefFoundError: kr/co/shineware/nlp/komoran/core/analyzer/Komoran
self.modelpath = os.path.join(utils.installpath, 'java', 'data', 'models')
self.tagset = utils.read_json('%s/data/tagset/komoran.json' % utils.installpath)

komoranJavaPackage = jpype.JPackage('kr.co.shineware.nlp.komoran.core')

try:
self.jki = komoranJavaPackage.Komoran(self.modelpath)
except TypeError: # Package kr.lucypark.komoran.KomoranInterface is not Callable
raise IOError("Cannot access komoran-dic. Please leave an issue at https://github.com/konlpy/konlpy/issues")

if userdic:
self.jki.setUserDic(userdic)

def pos(self, phrase, flatten=True, join=False):
"""POS tagger.
Expand Down Expand Up @@ -89,25 +111,3 @@ def morphs(self, phrase):
"""Parse phrase to morphemes."""

return [s for s, t in self.pos(phrase)]

def __init__(self, jvmpath=None, userdic=None, modelpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

if modelpath:
self.modelpath = modelpath
else:
# FIXME: Cannot execute without sudoing
# java.lang.NoClassDefFoundErrorPyRaisable: java.lang.NoClassDefFoundError: kr/co/shineware/nlp/komoran/core/analyzer/Komoran
self.modelpath = os.path.join(utils.installpath, 'java', 'data', 'models')
self.tagset = utils.read_json('%s/data/tagset/komoran.json' % utils.installpath)

komoranJavaPackage = jpype.JPackage('kr.co.shineware.nlp.komoran.core')

try:
self.jki = komoranJavaPackage.Komoran(self.modelpath)
except TypeError: # Package kr.lucypark.komoran.KomoranInterface is not Callable
raise IOError("Cannot access komoran-dic. Please leave an issue at https://github.com/konlpy/konlpy/issues")

if userdic:
self.jki.setUserDic(userdic)
40 changes: 20 additions & 20 deletions konlpy/tag/_mecab.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,26 @@ class Mecab():
.. _Eunjeon Project: http://eunjeon.blogspot.kr/
"""

def __init__(self, dicpath='/usr/local/lib/mecab/dic/mecab-ko-dic'):
self.dicpath = dicpath
try:
self.tagger = Tagger('-d %s' % dicpath)
self.tagset = utils.read_json('%s/data/tagset/mecab.json' % utils.installpath)
except RuntimeError:
raise Exception('The MeCab dictionary does not exist at "%s". Is the dictionary correctly installed?\nYou can also try entering the dictionary path when initializing the Mecab class: "Mecab(\'/some/dic/path\')"' % dicpath)
except NameError:
raise Exception('Install MeCab in order to use it: http://konlpy.org/en/latest/install/')

def __setstate__(self, state):
"""just reinitialize."""

self.__init__(dicpath=state['dicpath'])

def __getstate__(self):
"""store arguments."""

return {'dicpath': self.dicpath}

# TODO: check whether flattened results equal non-flattened
def pos(self, phrase, flatten=True, join=False):
"""POS tagger.
Expand Down Expand Up @@ -103,23 +123,3 @@ def nouns(self, phrase):

tagged = self.pos(phrase)
return [s for s, t in tagged if t.startswith('N')]

def __init__(self, dicpath='/usr/local/lib/mecab/dic/mecab-ko-dic'):
self.dicpath = dicpath
try:
self.tagger = Tagger('-d %s' % dicpath)
self.tagset = utils.read_json('%s/data/tagset/mecab.json' % utils.installpath)
except RuntimeError:
raise Exception('The MeCab dictionary does not exist at "%s". Is the dictionary correctly installed?\nYou can also try entering the dictionary path when initializing the Mecab class: "Mecab(\'/some/dic/path\')"' % dicpath)
except NameError:
raise Exception('Install MeCab in order to use it: http://konlpy.org/en/latest/install/')

def __setstate__(self, state):
"""just reinitialize."""

self.__init__(dicpath=state['dicpath'])

def __getstate__(self):
"""store arguments."""

return {'dicpath': self.dicpath}
18 changes: 9 additions & 9 deletions konlpy/tag/_okt.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ class Okt():
:param max_heap_size: Maximum memory usage limitation (Megabyte) :py:func:`.init_jvm`.
"""

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

oktJavaPackage = jpype.JPackage('kr.lucypark.okt')
OktInterfaceJavaClass = oktJavaPackage.OktInterface
self.jki = OktInterfaceJavaClass()
self.tagset = utils.read_json('%s/data/tagset/twitter.json' % utils.installpath)

def pos(self, phrase, norm=False, stem=False, join=False):
"""POS tagger.
In contrast to other classes in this subpackage,
Expand Down Expand Up @@ -87,12 +96,3 @@ def phrases(self, phrase):
def normalize(self, phrase):
text = self.jki.normalize(phrase)
return text

def __init__(self, jvmpath=None, max_heap_size=1024):
if not jpype.isJVMStarted():
jvm.init_jvm(jvmpath, max_heap_size)

oktJavaPackage = jpype.JPackage('kr.lucypark.okt')
OktInterfaceJavaClass = oktJavaPackage.OktInterface
self.jki = OktInterfaceJavaClass()
self.tagset = utils.read_json('%s/data/tagset/twitter.json' % utils.installpath)
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
JPype1>=0.7.0
beautifulsoup4==4.6.0
colorama
lxml>=4.1.0
numpy>=1.6
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-

import os
import sys
import platform
from setuptools import find_packages, setup

Expand All @@ -23,7 +24,11 @@ def _openreq(reqfile):
with open(os.path.join(os.path.dirname(__file__), reqfile)) as f:
return f.read().splitlines()

return _openreq('requirements.txt')
req = _openreq('requirements.txt')
if sys.version_info[0] < 3:
req[0] = req[0].replace(">=", "==")

return


about = get_about()
Expand Down

0 comments on commit c2a644b

Please sign in to comment.