Skip to content

Commit

Permalink
fix dag path bug
Browse files Browse the repository at this point in the history
  • Loading branch information
letiantian committed Feb 8, 2016
1 parent 13b6a76 commit 4101d85
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Pinyin2Hanzi/dag.py
Expand Up @@ -34,7 +34,7 @@ def dag(dag_params, pinyin_list, path_num=6, log=False):
kvs = dag_params.get_phrase(pinyin_list[from_idx:to_idx+1], num=path_num)
for prev_item in prev_paths:
for item in kvs:
word = prev_item.path + [ _ for _ in item[0] ]
word = prev_item.path + [ item[0] ]
if log:
score = prev_item.score + math.log(item[1])
else:
Expand Down
24 changes: 10 additions & 14 deletions README.md
Expand Up @@ -48,11 +48,6 @@ result = viterbi(hmm_params=hmmparams, observations=('ni', 'zhii', 'bu', 'zhi',
for item in result:
print(item.score, item.path)
# 发生KeyError,`zhii`不规范


result = viterbi(hmm_params=hmmparams, observations=[u'ti', u'chu', u'le', u'jie', u'jve', u'fang', u'an'], path_num = 2, log = True)
for item in result:
print(item.score, item.path)
```

#### 基于DAG的转换
Expand All @@ -69,28 +64,29 @@ result = dag(dagparams, ('ni', 'bu', 'zhi', 'dao', 'de', 'shi'), path_num=2)
for item in result:
print(item.score, item.path)
''' 输出
0.08117536840088911 ['你不知道', '的', '是']
0.04149191639287887 ['你不知道', '的', '诗']
0.08117536840088911 ['你不知道', '的是']
0.04149191639287887 ['你不知道', '的诗']
'''

## 2个候选,使用对数打分
result = dag(dagparams, ('ni', 'bu', 'zhi', 'dao', 'de', 'shi'), path_num=2, log=True)
for item in result:
print(item.score, item.path)
''' 输出
-2.5111434226494866 ['你不知道', '的', '是']
-3.1822566564324477 ['你不知道', '的', '诗']
-2.5111434226494866 ['你不知道', '的是']
-3.1822566564324477 ['你不知道', '的诗']
'''

## 1个候选
print( dag(dagparams, ['ti', 'chu', 'le', 'bu', 'cuo', 'de', 'jie', 'jve', 'fang', 'an'], path_num=1) )
'''输出
[< score=0.0017174549839096384, path=['提出了', '不错', '的', '解决方案'] >]
'''

## 2个候选,使用对数打分
result = dag(dagparams, ('ni', 'bu', 'zhi', 'dao', 'de', 'shii'), path_num=2, log=True)
print(result)
# 输出空列表,因为`shii`不存在


result = dag(dagparams, [u'ti', u'chu', u'le', u'jie', u'jve', u'fang', u'an'], path_num=2, log=True)
print(result)

```

#### 关于拼音
Expand Down
3 changes: 3 additions & 0 deletions example/dag_pinyin2hanzi.py
@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import DefaultDagParams
from Pinyin2Hanzi import dag

Expand Down
22 changes: 22 additions & 0 deletions example/dag_pinyin2hanzi_2.py
@@ -0,0 +1,22 @@
# coding: utf-8
from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import DefaultDagParams
from Pinyin2Hanzi import dag

dagparams = DefaultDagParams()


print( dag(dagparams, [u'ti', u'chu', u'le', u'jie', u'jve', u'fang', u'an'], path_num=1) )
print( dag(dagparams, [u'ti', u'chu', u'le'], path_num=1) )


print( dag(dagparams, ['jie', 'jve', 'fang', 'an'], path_num=1) )
print( dag(dagparams, ['jie', 'jve'], path_num=1) )
print( dag(dagparams, ['fang', 'an'], path_num=1) )



4 changes: 3 additions & 1 deletion example/viterbi_health_fever.py
@@ -1,7 +1,9 @@
# coding: utf-8

from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import AbstractHmmParams
from Pinyin2Hanzi import viterbi

Expand Down
3 changes: 3 additions & 0 deletions example/viterbi_pinyin2hanzi.py
@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import DefaultHmmParams
from Pinyin2Hanzi import viterbi

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -10,7 +10,7 @@

setup(
name='Pinyin2Hanzi',
version='0.1.0',
version='0.1.1',
description='拼音转汉字, Engine of Chinese Input Method',
long_description=LONGDOC,
author='Letian Sun',
Expand Down

0 comments on commit 4101d85

Please sign in to comment.