In [1]:
import sys
sys.path.append('../')


from soynlp.pos._dictionary import Dictionary

In [2]:
pos_dict = {
            'Adverb': {'너무', '매우'}, 
            'Noun': {'너무너무너무', '아이오아이', '아이', '노래', '오', '이', '고양'},
            'Josa': {'는', '의', '이다', '입니다', '이', '이는', '를'},
            'Verb': {'하는', '하다', '하고'},
            'Adjective': {'예쁜', '예쁘다'},
            'Exclamation': {'우와'}
          }

dictionary = Dictionary(pos_dict)

In [3]:
dictionary.pos_dict

{'Adjective': {'예쁘다', '예쁜'},
 'Adverb': {'너무', '매우'},
 'Exclamation': {'우와'},
 'Josa': {'는', '를', '의', '이', '이는', '이다', '입니다'},
 'Noun': {'고양', '너무너무너무', '노래', '아이', '아이오아이', '오', '이'},
 'Verb': {'하고', '하는', '하다'}}

In [4]:
dictionary.get_pos('아이오아이')

['Noun']

In [5]:
dictionary.get_pos('이')

['Noun', 'Josa']

In [6]:
dictionary.word_is_tag('아이오아이', 'Noun')

True

In [7]:
dictionary.max_length

6

In [8]:
from soynlp.pos._template import EojeolTemplateMatcher

generator = EojeolTemplateMatcher(dictionary)

In [9]:
from pprint import pprint
pprint(generator.generate('아이오아이'))

[[LR(l='아이오아이', l_tag='Noun', r='', r_tag=None, b=0, m=5, e=5)],
 [LR(l='아이', l_tag='Noun', r='', r_tag=None, b=0, m=2, e=2),
  LR(l='오', l_tag='Noun', r='', r_tag=None, b=2, m=3, e=3),
  LR(l='아이', l_tag='Noun', r='', r_tag=None, b=3, m=5, e=5)]]


In [10]:
pprint(generator.generate('아이오아이는'))

[[LR(l='아이오아이는', l_tag=None, r='', r_tag=None, b=0, m=6, e=6)],
 [LR(l='아이오아이', l_tag='Noun', r='는', r_tag='Josa', b=0, m=5, e=6)]]


In [11]:
pprint(generator.generate('너무너무너무'))

[[LR(l='너무너무너무', l_tag='Noun', r='', r_tag=None, b=0, m=6, e=6)],
 [LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=0, m=2, e=2),
  LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=2, m=4, e=4),
  LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=4, m=6, e=6)]]


In [12]:
from _evaluator import SimpleEojeolEvaluator

In [13]:
evaluator = SimpleEojeolEvaluator()

candidates = generator.generate('아이오아이는')
pprint(candidates)

[[LR(l='아이오아이는', l_tag=None, r='', r_tag=None, b=0, m=6, e=6)],
 [LR(l='아이오아이', l_tag='Noun', r='는', r_tag='Josa', b=0, m=5, e=6)]]


In [14]:
evaluator.select_best(candidates)

[LR(l='아이오아이', l_tag='Noun', r='는', r_tag='Josa', b=0, m=5, e=6)]

In [15]:
from soynlp.pos._tagger import SimpleTagger

sent = '너무너무너무는 아이오아이의 노래입니다'

tagger = SimpleTagger(generator, evaluator)
tagger.tag(sent)

[('너무너무너무', 'Noun'),
 ('는', 'Josa'),
 ('아이오아이', 'Noun'),
 ('의', 'Josa'),
 ('노래', 'Noun'),
 ('입니다', 'Josa')]

In [16]:
tagger.tag(sent, flatten=False)

[[('너무너무너무', 'Noun'), ('는', 'Josa')],
 [('아이오아이', 'Noun'), ('의', 'Josa')],
 [('노래', 'Noun'), ('입니다', 'Josa')]]

In [17]:
from soynlp.pos._template import LRTemplateMatcher

lr_generator = LRTemplateMatcher(dictionary)

In [18]:
pprint(lr_generator.generate('아이오아이는'))

[LR(l='아이오아이', l_tag='Noun', r='는', r_tag='Josa', b=0, m=5, e=6),
 LR(l='아이오아이', l_tag='Noun', r='', r_tag=None, b=0, m=5, e=5)]


In [19]:
pprint(lr_generator.generate(sent.replace(' ', '')))

[LR(l='너무너무너무', l_tag='Noun', r='는', r_tag='Josa', b=0, m=6, e=7),
 LR(l='너무너무너무', l_tag='Noun', r='', r_tag=None, b=0, m=6, e=6),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=0, m=2, e=2),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=2, m=4, e=4),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=4, m=6, e=6),
 LR(l='아이오아이', l_tag='Noun', r='의', r_tag='Josa', b=7, m=12, e=13),
 LR(l='아이오아이', l_tag='Noun', r='', r_tag=None, b=7, m=12, e=12),
 LR(l='노래', l_tag='Noun', r='입니다', r_tag='Josa', b=13, m=15, e=18),
 LR(l='노래', l_tag='Noun', r='', r_tag=None, b=13, m=15, e=15)]


In [20]:
from soynlp.pos._evaluator import LREvaluator

lr_evaluator = LREvaluator()

In [21]:
candidates = lr_generator.generate(sent.replace(' ', ''))
pprint(candidates)

[LR(l='너무너무너무', l_tag='Noun', r='는', r_tag='Josa', b=0, m=6, e=7),
 LR(l='너무너무너무', l_tag='Noun', r='', r_tag=None, b=0, m=6, e=6),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=0, m=2, e=2),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=2, m=4, e=4),
 LR(l='너무', l_tag='Adverb', r='', r_tag=None, b=4, m=6, e=6),
 LR(l='아이오아이', l_tag='Noun', r='의', r_tag='Josa', b=7, m=12, e=13),
 LR(l='아이오아이', l_tag='Noun', r='', r_tag=None, b=7, m=12, e=12),
 LR(l='노래', l_tag='Noun', r='입니다', r_tag='Josa', b=13, m=15, e=18),
 LR(l='노래', l_tag='Noun', r='', r_tag=None, b=13, m=15, e=15)]


In [22]:
pprint(lr_evaluator.select_best(candidates))

[LR(l='너무너무너무', l_tag='Noun', r='는', r_tag='Josa', b=0, m=6, e=7),
 LR(l='아이오아이', l_tag='Noun', r='의', r_tag='Josa', b=7, m=12, e=13),
 LR(l='노래', l_tag='Noun', r='입니다', r_tag='Josa', b=13, m=15, e=18)]


In [23]:
from _tagger import UnknowLRPostprocessor

unknown_postprocessor = UnknowLRPostprocessor()
lr_tagger = SimpleTagger(lr_generator, lr_evaluator, unknown_postprocessor)
lr_tagger.tag('너무너무너무는우리아이오아이의노래입니다!', )

[('너무너무너무', 'Noun'),
 ('는', 'Josa'),
 ('우리', None),
 ('아이오아이', 'Noun'),
 ('의', 'Josa'),
 ('노래', 'Noun'),
 ('입니다', 'Josa'),
 ('!', None)]