In [1]:
import sys
sys.path.append('../')
import soynlp

print(soynlp.__version__)

0.0.4


In [2]:
from soynlp.pos.tagset import tagset
from pprint import pprint
pprint(tagset)

{'Adjective': '형용사',
 'Adverb': '부사',
 'Determiner': '관형사',
 'Exclamation': '감탄사',
 'Josa': '조사',
 'Noun': '명사',
 'Numeral': '수사',
 'Pronoun': '대명사',
 'Symbol': '기호',
 'Verb': '동사'}


In [3]:
from soynlp.pos import Dictionary
from soynlp.pos import LRTemplateMatcher
from soynlp.pos import LREvaluator
from soynlp.pos import SimpleTagger
from soynlp.pos import UnknowLRPostprocessor

pos_dict = {
    'Adverb': {'너무', '매우'}, 
    'Noun': {'너무너무너무', '아이오아이', '아이', '노래', '오', '이', '고양'},
    'Josa': {'는', '의', '이다', '입니다', '이', '이는', '를', '라', '라는'},
    'Verb': {'하는', '하다', '하고'},
    'Adjective': {'예쁜', '예쁘다'},
    'Exclamation': {'우와'}    
}

dictionary = Dictionary(pos_dict)

print(dictionary.get_pos('아이오아이'))
print(dictionary.get_pos('이'))

['Noun']
['Noun', 'Josa']


In [4]:
preference = {
    'Noun': {'아이오아이':10.0, '너무너무너무':5}
}

In [5]:
generator = LRTemplateMatcher(dictionary)
evaluator = LREvaluator(preference=preference)
postprocessor = UnknowLRPostprocessor()

tagger = SimpleTagger(generator, evaluator, postprocessor)

In [6]:
sent = '너무너무너무는아이오아이의노래입니다!!'
tagger.tag(sent)

[('너무너무너무', 'Noun'),
 ('는', 'Josa'),
 ('아이오아이', 'Noun'),
 ('의', 'Josa'),
 ('노래', 'Noun'),
 ('입니다', 'Josa'),
 ('!!', None)]

In [7]:
tags, debugs = tagger.tag(sent, debug=True)

In [8]:
pprint(tags)

[('너무너무너무', 'Noun'),
 ('는', 'Josa'),
 ('아이오아이', 'Noun'),
 ('의', 'Josa'),
 ('노래', 'Noun'),
 ('입니다', 'Josa'),
 ('!!', None)]


In [9]:
pprint(debugs)

[[(LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=0, m=2, e=2), 0.4),
  (LR(l='너무너무너무', l_tag='Noun', r='', r_tag=None, b=0, m=6, e=6), 5.9),
  (LR(l='너무너무너무', l_tag='Noun', r='는', r_tag='Josa', b=0, m=6, e=7), 6.15),
  (LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=2, m=4, e=4), 0.4),
  (LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=4, m=6, e=6), 0.4),
  (LR(l='아이오아이', l_tag='Noun', r='', r_tag=None, b=7, m=12, e=12), 10.8),
  (LR(l='아이오아이', l_tag='Noun', r='의', r_tag='Josa', b=7, m=12, e=13), 11.05),
  (LR(l='노래', l_tag='Noun', r='', r_tag=None, b=13, m=15, e=15), 0.5),
  (LR(l='노래', l_tag='Noun', r='입니다', r_tag='Josa', b=13, m=15, e=18), 0.95)]]
