In [1]:
import math
import sys
sys.path.insert(0, '../')
import soynlp
print(soynlp.__version__)

0.0.46


# Movie comment data

In [2]:
# movie comments
corpus_path = 'merged_comments.txt' # set your data path

# <idx, comment, rate>
with open(corpus_path, encoding='utf-8') as f:
    sentences = [doc.split('\t')[1] for doc in f]
    sentences = [sent for sent in sentences if sent]
print('num sentences = %d' % len(sentences))

num sentences = 294493


## Noun Extraction

In [3]:
from soynlp.noun import LRNounExtractor_v2

noun_extractor = LRNounExtractor_v2(
    max_left_length=10, max_right_length=9,
    verbose=True, min_num_of_features=2,
    max_frequency_when_noun_is_eojeol=10,
    min_eojeol_frequency=2,
    extract_compound=True, extract_pos_feature=False
)

noun_scores = noun_extractor.train_extract(
    sentences,
    min_noun_score=0.3,
    min_noun_frequency=2,  # 추출되는 명사의 최소 빈도수
    min_eojeol_frequency=1,
    reset_lrgraph=False    # predicator extraction 을 위해서
)

[Noun Extractor] use default predictors
[Noun Extractor] num features: pos=1260, neg=1173, common=12
[Noun Extractor] counting eojeols
[EojeolCounter] n eojeol = 89170 from 294493 sents. mem=0.166 Gb                     
[Noun Extractor] complete eojeol counter -> lr graph
[Noun Extractor] has been trained. mem=0.297 Gb
[Noun Extractor] batch prediction was completed for 28290 words
[Noun Extractor] checked compounds. discovered 15 compounds
[Noun Extractor] postprocessing detaching_features : 9458 -> 9335
[Noun Extractor] postprocessing ignore_features : 9335 -> 9257
[Noun Extractor] postprocessing ignore_NJ : 9257 -> 9131
[Noun Extractor] 9131 nouns (15 compounds) with min frequency=2
[Noun Extractor] flushing was done. mem=0.333 Gb                    
[Noun Extractor] 68.04 % eojeols are covered


In [4]:
noun_scores['크리스토퍼']

NounScore(frequency=686, score=1.0)

In [5]:
for noun in sorted(noun_scores, key=lambda x:-math.sqrt(noun_scores[x].frequency) * noun_scores[x].score)[:30]:
    print('{}: {}'.format(noun, noun_scores[noun]))

영화: NounScore(frequency=61200, score=0.7619601007166377)
정말: NounScore(frequency=18842, score=1.0)
너무: NounScore(frequency=13708, score=1.0)
진짜: NounScore(frequency=14504, score=0.9568345323741008)
최고: NounScore(frequency=12792, score=0.8792405063291139)
감독: NounScore(frequency=6591, score=1.0)
재미: NounScore(frequency=6048, score=0.9528061224489796)
생각: NounScore(frequency=5616, score=0.9793271179570329)
배우들: NounScore(frequency=5065, score=0.9925449000338868)
ㅋㅋ: NounScore(frequency=4880, score=1.0)
감동: NounScore(frequency=4902, score=0.9840525328330206)
평점: NounScore(frequency=6226, score=0.8520084566596194)
마지막: NounScore(frequency=4508, score=1.0)
이런: NounScore(frequency=4861, score=0.904540379605508)
스토리: NounScore(frequency=4991, score=0.8579169175195666)
것: NounScore(frequency=3629, score=1.0)
그래: NounScore(frequency=3579, score=0.9900787861103005)
느낌: NounScore(frequency=3482, score=1.0)
내: NounScore(frequency=3778, score=0.9359138682389131)
대박: NounScore(frequency=3174, score=

## Predicator Extraction

In [6]:
from soynlp.predicator import PredicatorExtractor

# prepare materials from noun extractor
noun_pos_features = noun_extractor._pos_features
nouns = {noun for noun in noun_scores}

# initiate
predicator_extractor = PredicatorExtractor(
    nouns = nouns,
    noun_pos_features = noun_pos_features, # "명사 + 조사/형용사" 어절을 걸러내기 위함
    extract_eomi=True,
    extract_stem=True,
    verbose=True
)

predicator_extractor.train(
    sentences,
    min_eojeol_frequency = 3
)

print('num stems (before stem extraction) = %d' % len(predicator_extractor._stems))
print('num eomis (before eomi extraction) = %d' % len(predicator_extractor._eomis))

predicators = predicator_extractor.extract(
    min_predicator_frequency=3, reset_lrgraph=True,
    # Eomi extractor
    min_num_of_features=2, min_eomi_score=0.3, min_eomi_frequency=1,
    # Stem extractor
    min_num_of_unique_R_char=10, min_entropy_of_R_char=0.5,
    min_entropy_of_R=1.5, min_stem_score=0.7, min_stem_frequency=5
)

print('num stems (after stem extraction) = %d' % len(predicator_extractor._stems))
print('num eomis (after eomi extraction) = %d' % len(predicator_extractor._eomis))

[Predicator Extractor] counting eojeols was done. 60208 eojeols, mem=0.333 Gb                    
[Predicator Extractor] complete eojeol counter -> lr graph
[Predicator Extractor] has been trained. mem=0.333 Gb
num stems (before stem extraction) = 1248
num eomis (before eomi extraction) = 1120
[Eomi Extractor] batch prediction was completed for 5500 words
[Eomi Extractor] eomi lemmatization with 714 candidates
[Eomi Extractor] 1562 eomis extracted with min frequency = 1, min score = 0.3
[Predicator Extractor] 1287 eomis have been extracted
[Stem Extractor] Initializing was done with 1248 stems and 2407 eomisis
[Stem Extractor] batch prediction for 4695 candidates
[Stem Extractor] 37 stems, 18 surfacial stems, 13 removals
[Predicator Extractor] 37 stems have been extracted
[Predicator Extractor] lemma candidating was done. 64.558 % eojeols are covered
[Predicator Extractor] 6474 predicators are extracted
num stems (after stem extraction) = 1285
num eomis (after eomi extraction) = 2407


In [7]:
frequency_sorted_predicators = sorted(predicators, key=lambda x: -predicators[x].frequency)

word_sets = [
    frequency_sorted_predicators[:50],
    frequency_sorted_predicators[-500:-400]
]

for word_set in word_sets:
    for word in word_set:
        lemmas = predicators[word]
        print('{}: {}'.format(word, lemmas))
    print('--' * 30)

보고: Predicator(frequency=5899, lemma={('보', '고')})
내가: Predicator(frequency=3718, lemma={('내', '가'), ('낳', '아가')})
봤는데: Predicator(frequency=3560, lemma={('보', '았는데')})
하지만: Predicator(frequency=3507, lemma={('하', '지만')})
이건: Predicator(frequency=3414, lemma={('이', '건')})
있는: Predicator(frequency=2966, lemma={('있', '는'), ('이', 'ㅆ는')})
이렇게: Predicator(frequency=2891, lemma={('이렇', '게')})
없는: Predicator(frequency=2848, lemma={('없', '는'), ('이', '없는')})
보는: Predicator(frequency=2775, lemma={('보', '는')})
없다: Predicator(frequency=2686, lemma={('없', '다'), ('이', '없다')})
재밌게: Predicator(frequency=2573, lemma={('재밌', '게')})
하는: Predicator(frequency=2453, lemma={('하', '는')})
좋은: Predicator(frequency=2429, lemma={('좋', '은'), ('좋으', 'ㄴ')})
이게: Predicator(frequency=2391, lemma={('이', '게')})
내내: Predicator(frequency=2140, lemma={('낳', '아내'), ('내', '내')})
보세요: Predicator(frequency=2105, lemma={('보', '세요')})
보면: Predicator(frequency=2071, lemma={('보', '면')})
같다: Predicator(frequency=1847, lemma={('같', 

# Chat text

## Noun Extraction

In [8]:
import pickle
from soynlp.utils import LRGraph

# build noun extractor from {l:{r:count}} dict
with open('../tmp/lrgraph_dict_for_nounextraction.pkl', 'rb') as f:
    lrgraph = LRGraph(pickle.load(f))

chat_noun_extractor = LRNounExtractor_v2(
    extract_pos_feature=True
)

chat_noun_extractor.lrgraph = lrgraph
chat_noun_extractor._num_of_eojeols = lrgraph.to_EojeolCounter()._count_sum

[Noun Extractor] use default predictors
[Noun Extractor] num features: pos=1260, neg=1173, common=12


In [9]:
chat_noun_scores = chat_noun_extractor.extract(
    min_noun_score=0.3,
    min_noun_frequency=2,  # 추출되는 명사의 최소 빈도수
    reset_lrgraph=False    # predicator extraction 을 위해서
)

[Noun Extractor] extract and append pos features
[Noun Extractor] batch prediction for extracting pos feature
[Noun Extractor] batch prediction was completed for 167504 words
[Noun Extractor] features appended. pos=1260 -> 1260, neg=1173 -> 1173, common=12 -> 12
[Noun Extractor] 0 pos features were extracted
[Noun Extractor] batch prediction was completed for 167504 words
[Noun Extractor] checked compounds. discovered 13 compounds
[Noun Extractor] postprocessing detaching_features : 58046 -> 56981
[Noun Extractor] postprocessing ignore_features : 56981 -> 56867
[Noun Extractor] postprocessing ignore_NJ : 56867 -> 55706
[Noun Extractor] 55706 nouns (13 compounds) with min frequency=2
[Noun Extractor] flushing was done. mem=1.783 Gb                    
[Noun Extractor] 58.14 % eojeols are covered


In [10]:
for word in '여부 서가앤쿡 맘스터치 신라스테이 설입 '.split():
    print('{}: {}'.format(word, chat_noun_scores.get(word, None)))

여부: NounScore(frequency=4831, score=0.7579092159559835)
서가앤쿡: NounScore(frequency=121, score=1.0)
맘스터치: NounScore(frequency=336, score=0.9178082191780822)
신라스테이: NounScore(frequency=11, score=1.0)
설입: NounScore(frequency=71, score=1.0)


In [11]:
word_sets = [
    sorted(chat_noun_scores, key=lambda x:-chat_noun_scores[x].frequency)[:50],
    sorted(chat_noun_scores, key=lambda x:-chat_noun_scores[x].frequency)[3000:3050]
]
for word_set in word_sets:
    for noun in word_set:
        print('{}: {}'.format(noun, chat_noun_scores[noun]))
    print('--' * 30)

내: NounScore(frequency=253502, score=0.9581029653240607)
그래: NounScore(frequency=219531, score=0.8884650675055709)
오늘: NounScore(frequency=202989, score=0.9519450800915332)
진짜: NounScore(frequency=184587, score=0.8989955357142857)
근데: NounScore(frequency=173788, score=0.8535127055306427)
너무: NounScore(frequency=157721, score=0.9843899330997132)
그냥: NounScore(frequency=147720, score=0.7358490566037735)
오빠: NounScore(frequency=146246, score=0.9886392234405896)
지금: NounScore(frequency=140182, score=0.8701720220707563)
그럼: NounScore(frequency=132727, score=0.8043704474505723)
사랑: NounScore(frequency=121283, score=0.9992462852630823)
내일: NounScore(frequency=101316, score=0.7156619678401926)
많이: NounScore(frequency=100460, score=0.8667058132706987)
응응: NounScore(frequency=85628, score=1.0)
우리: NounScore(frequency=84382, score=0.9788417943257539)
너: NounScore(frequency=78604, score=0.9711176803572552)
미안: NounScore(frequency=73317, score=0.9991863447421951)
헤헤: NounScore(frequency=71407, scor

**TODO**
1. N is N+J postprocessing 에서 길이가 2 이하인 단어를 pass 하다보니 "욕 + 을" 이 명사로 추출. 두 글자의 명사는 각 글자가 명사, 조사인 경우가 많아서 이는 postprocessing 으로 처리하기 어려움

## Predicator Extraction

In [12]:
from soynlp.predicator import PredicatorExtractor

# prepare materials from noun extractor
chat_noun_pos_features = chat_noun_extractor._pos_features
chat_nouns = {noun for noun in chat_noun_scores}
chat_lrgraph = LRGraph(chat_noun_extractor.lrgraph._lr)

# initiate
chat_predicator_extractor = PredicatorExtractor(
    nouns = chat_nouns,
    noun_pos_features = chat_noun_pos_features, # "명사 + 조사/형용사" 어절을 걸러내기 위함
    extract_eomi=True,
    extract_stem=True,
    verbose=True
)

chat_predicator_extractor.train(
    #sentences,
    chat_lrgraph, # sentence 대신 LRGraph 를 넣어도 됨
    min_eojeol_frequency = 3
)

print('num stems (before stem extraction) = %d' % len(chat_predicator_extractor._stems))
print('num eomis (before eomi extraction) = %d' % len(chat_predicator_extractor._eomis))

chat_predicators = chat_predicator_extractor.extract(
    min_predicator_frequency=5, reset_lrgraph=True,
    # Eomi extractor
    min_num_of_features=4, min_eomi_score=0.3, min_eomi_frequency=10,
    # Stem extractor
    min_num_of_unique_R_char=5, min_entropy_of_R_char=0.5,
    min_entropy_of_R=1.5, min_stem_score=0.7, min_stem_frequency=5
)

print('num stems (after stem extraction) = %d' % len(chat_predicator_extractor._stems))
print('num eomis (after eomi extraction) = %d' % len(chat_predicator_extractor._eomis))

num stems (before stem extraction) = 1248
num eomis (before eomi extraction) = 1120
[Eomi Extractor] batch prediction was completed for 103217 words
[Eomi Extractor] eomi lemmatization with 5460 candidates
[Eomi Extractor] 11511 eomis extracted with min frequency = 10, min score = 0.3
[Predicator Extractor] 10994 eomis have been extracted
[Stem Extractor] Initializing was done with 1248 stems and 12114 eomisis
[Stem Extractor] batch prediction for 58689 candidates
[Stem Extractor] 2558 stems, 1584 surfacial stems, 2888 removals
[Predicator Extractor] 2528 stems have been extracted
[Predicator Extractor] lemma candidating was done. 70.088 % eojeols are covered
[Predicator Extractor] 75327 predicators are extracted
num stems (after stem extraction) = 3776
num eomis (after eomi extraction) = 12114


In [13]:
frequency_sorted_chat_predicators = sorted(chat_predicators, key=lambda x: -chat_predicators[x].frequency)

word_sets = [
    frequency_sorted_chat_predicators[:50],
    frequency_sorted_chat_predicators[10400:10500]
]

for word_set in word_sets:
    for word in word_set:
        lemmas = chat_predicators[word]
        print('{}: {}'.format(word, lemmas))
    print('--' * 30)

내가: Predicator(frequency=251537, lemma={('내', '가'), ('내그', '아'), ('낳', '아가')})
이제: Predicator(frequency=170348, lemma={('이', '제')})
나도: Predicator(frequency=150243, lemma={('나', '도'), ('낳', '도')})
나는: Predicator(frequency=96965, lemma={('나', '는'), ('낳', '는')})
자기: Predicator(frequency=88198, lemma={('자', '기')})
같이: Predicator(frequency=61417, lemma={('같', '이')})
다시: Predicator(frequency=43174, lemma={('닿', '시')})
그렇게: Predicator(frequency=41940, lemma={('그렇', '게')})
하고: Predicator(frequency=40197, lemma={('하', '고')})
나두: Predicator(frequency=40086, lemma={('낳', '두'), ('나', '두')})
너가: Predicator(frequency=38115, lemma={('너그', '아'), ('넣', '가')})
그리고: Predicator(frequency=34275, lemma={('그리', '고')})
빨리: Predicator(frequency=33545, lemma={('빨', '리')})
좋아: Predicator(frequency=30761, lemma={('좋', '아')})
어떻게: Predicator(frequency=29312, lemma={('어떻', '게')})
있어: Predicator(frequency=29049, lemma={('이', 'ㅆ어'), ('있', '어')})
이렇게: Predicator(frequency=26173, lemma={('이렇', '게')})
맞아: Predicator(fr

In [26]:
from soynlp.lemmatizer import _lemma_candidate
l = '먹'
for r, count in chat_predicator_extractor.lrgraph.get_r(l, topk=-1):    
    word = l + r
    word_is_extracted = word in chat_predicators

    is_stem = lambda x: x in chat_predicator_extractor._stems
    is_eomi = lambda x: x in chat_predicator_extractor._eomis
    lemmas = _lemma_candidate(l, r)
    lemmas = {(stem, eomi) for stem, eomi in lemmas
              if is_stem(stem) and is_eomi(eomi)}
    lemmas_strf = '' if not lemmas else '[%s]' % ', '.join('%s+%s'%lr for lr in lemmas)

    print('{} - {} ({}) / {} / {}extracted'.format(
        l, r, count, lemmas_strf, ' ' if word_is_extracted else 'not '))

먹 - 어 (13159) / [먹+어] /  extracted
먹 - 었어 (10510) / [먹+었어] /  extracted
먹 - 구 (9331) / [먹+구] /  extracted
먹 - 으면 (5906) / [먹+으면] /  extracted
먹 - 어요 (5638) / [먹+어요] /  extracted
먹 - 었어요 (4201) / [먹+었어요] /  extracted
먹 - 어야지 (3885) / [먹+어야지] /  extracted
먹 - 어서 (3845) / [먹+어서] /  extracted
먹 - 자 (3133) / [먹+자] /  extracted
먹 - 엉 (2541) / [먹+엉] /  extracted
먹 - 을 (2467) / [먹+을] /  extracted
먹 - 어용 (2463) / [먹+어용] /  extracted
먹 - 지 (2322) / [먹+지] /  extracted
먹 - 어도 (2227) / [먹+어도] /  extracted
먹 - 었는데 (1946) / [먹+었는데] /  extracted
먹 - 을까 (1935) / [먹+을까] /  extracted
먹 - 엇어 (1825) / [먹+엇어] /  extracted
먹 - 었엉 (1513) / [먹+었엉] /  extracted
먹 - 는데 (1502) / [먹+는데] /  extracted
먹 - 는 (1495) / [먹+는] /  extracted
먹 - 으면서 (1368) / [먹+으면서] /  extracted
먹 - 는거 (1356) / [먹+는거] /  extracted
먹 - 을게 (1342) / [먹+을게] /  extracted
먹 - 었지 (1215) / [먹+었지] /  extracted
먹 - 어야징 (1208) / [먹+어야징] /  extracted
먹 - 어야 (1050) / [먹+어야] /  extracted
먹 - 게 (1044) / [먹+게] /  extracted
먹 - 는게 (992) / [먹+는게] /  extract

먹 - 냐구 (12) / [먹+냐구] /  extracted
먹 - 으려니 (12) / [먹+으려니] /  extracted
먹 - 을려나 (12) / [먹+을려나] /  extracted
먹 - 이게 (12) / [먹+이게] /  extracted
먹 - 으몀 (12) / [먹+으몀] /  extracted
먹 - 은중 (12) /  / not extracted
먹 - 노 (12) / [먹+노] /  extracted
먹 - 구싶다아 (12) / [먹+구싶다아] /  extracted
먹 - 쨔 (12) /  / not extracted
먹 - 구올께용 (12) /  / not extracted
먹 - 구이땅 (12) / [먹+구이땅] /  extracted
먹 - 게따 (12) / [먹+게따] /  extracted
먹 - 으니깡 (12) / [먹+으니깡] /  extracted
먹 - 을고 (12) / [먹+을고] /  extracted
먹 - 어야하낭 (12) / [먹+어야하낭] /  extracted
먹 - 는거네 (11) / [먹+는거네] /  extracted
먹 - 기야 (11) /  / not extracted
먹 - 으랴고 (11) / [먹+으랴고] /  extracted
먹 - 어주라 (11) / [먹+어주라] /  extracted
먹 - 는거보면 (11) / [먹+는거보면] /  extracted
먹 - 었지여 (11) / [먹+었지여] /  extracted
먹 - 는담 (11) /  / not extracted
먹 - 어놔 (11) / [먹+어놔] /  extracted
먹 - 자앙 (11) / [먹+자앙] /  extracted
먹 - 아요 (11) / [먹+아요] /  extracted
먹 - 는다매 (11) /  / not extracted
먹 - 어야대나 (11) / [먹+어야대나] /  extracted
먹 - 어야되용 (11) /  / not extracted
먹 - 던징 (11) / [먹+던징] /  extracted
먹

먹 - 여주넹 (2) /  / not extracted
먹 - 읍 (2) /  / not extracted
먹 - 었습니다용 (2) /  / not extracted
먹 - 구옴다 (2) /  / not extracted
먹 - 어야되욤 (2) /  / not extracted
먹 - 을량 (2) /  / not extracted
먹 - 지말릭 (2) /  / not extracted
먹 - 여살려죠 (2) /  / not extracted
먹 - 넝 (2) /  / not extracted
먹 - 어야한다며 (2) /  / not extracted
먹 - 을긋 (2) /  / not extracted
먹 - 으려도 (2) /  / not extracted
먹 - 엇슴당 (2) /  / not extracted
먹 - 어간다용 (2) /  / not extracted
먹 - 구싶어햇던 (2) /  / not extracted
먹 - 즈아아 (2) /  / not extracted
먹 - 어주려궁 (2) /  / not extracted
먹 - 어준다고 (2) / [먹+어준다고] / not extracted
먹 - 어줄사람 (2) /  / not extracted
먹 - 을려겅 (2) /  / not extracted
먹 - 어보려나 (2) /  / not extracted
먹 - 잖옹 (2) / [먹+잖옹] / not extracted
먹 - 어와서 (2) / [먹+어와서] / not extracted
먹 - 었지러엉 (2) /  / not extracted
먹 - 어야될거아냐 (2) /  / not extracted
먹 - 어헤헤 (2) /  / not extracted
먹 - 는거넹 (2) / [먹+는거넹] / not extracted
먹 - 자냥 (2) / [먹+자냥] / not extracted
먹 - 어서어 (2) / [먹+어서어] / not extracted
먹 - 어놔서 (2) / [먹+어놔서] / not extracted
먹 - 을게없또 (2) 

In [32]:
l = '갈'
for r, count in chat_predicator_extractor.lrgraph.get_r(l, topk=-1):
    word = l + r    
    word_is_extracted = word in chat_predicators

    is_stem = lambda x: x in chat_predicator_extractor._stems
    is_eomi = lambda x: x in chat_predicator_extractor._eomis
    lemmas = _lemma_candidate(l, r)
    lemmas = {(stem, eomi) for stem, eomi in lemmas
              if is_stem(stem) and is_eomi(eomi)}
    lemmas_strf = '' if not lemmas else '[%s]' % ', '.join('%s+%s'%lr for lr in lemmas)

    print('{} - {} ({}) / {} / {}extracted'.format(
        l, r, count, lemmas_strf, ' ' if word_is_extracted else 'not '))

갈 - 게 (6120) / [갈+게, 가+ㄹ게] /  extracted
갈 - 게요 (1852) / [갈+게요, 가+ㄹ게요] /  extracted
갈 - 겡 (1219) / [갈+겡, 가+ㄹ겡] /  extracted
갈 - 게용 (710) / [가+ㄹ게용, 갈+게용] /  extracted
갈 - 려고 (660) / [가+ㄹ려고, 갈+려고] /  extracted
갈 - 지 (525) / [가+ㄹ지, 갈+지] /  extracted
갈 - 라구 (515) / [가+ㄹ라구, 갈+라구, 가르+아구] /  extracted
갈 - 거 (431) /  / not extracted
갈 - 려구 (413) / [가+ㄹ려구, 갈+려구] /  extracted
갈 - 랭 (366) / [갈+랭, 가+ㄹ랭] /  extracted
갈 - 게여 (301) / [갈+게여, 가+ㄹ게여] /  extracted
갈 - 거같아 (298) / [갈+거같아, 가+ㄹ거같아] /  extracted
갈 - 깡 (263) / [가+ㄹ깡, 갈+깡] /  extracted
갈 - 껭 (257) / [갈+껭, 가+ㄹ껭] /  extracted
갈 - 라궁 (206) / [가+ㄹ라궁, 가르+아궁, 갈+라궁] /  extracted
갈 - 테니까 (201) / [가+ㄹ테니까, 갈+테니까] /  extracted
갈 - 거면 (184) / [갈+거면, 가+ㄹ거면] /  extracted
갈 - 거얌 (175) / [가+ㄹ거얌, 갈+거얌] /  extracted
갈 - 려면 (154) / [가+ㄹ려면, 갈+려면] /  extracted
갈 - 텐데 (153) / [가+ㄹ텐데, 갈+텐데] /  extracted
갈 - 지도 (152) / [가+ㄹ지도, 갈+지도] /  extracted
갈 - 땐 (147) / [가+ㄹ땐, 갈+땐] /  extracted
갈 - 아입구 (143) /  /  extracted
갈 - 거양 (140) / [갈+거양, 가+ㄹ거양] /  extracted
갈 - 거임 (124) 