-
-
Notifications
You must be signed in to change notification settings - Fork 604
/
runner.py
141 lines (125 loc) · 4.36 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from argparse import ArgumentParser
import logging
import sys
import pypinyin
from pypinyin.compat import PY2
style_map = {
'NORMAL': pypinyin.Style.NORMAL,
'zhao': pypinyin.Style.NORMAL,
'TONE': pypinyin.Style.TONE,
'zh4ao': pypinyin.Style.TONE,
'TONE2': pypinyin.Style.TONE2,
'zha4o': pypinyin.Style.TONE2,
'TONE3': pypinyin.Style.TONE3,
'zhao4': pypinyin.Style.TONE3,
'INITIALS': pypinyin.Style.INITIALS,
'zh': pypinyin.Style.INITIALS,
'FIRST_LETTER': pypinyin.Style.FIRST_LETTER,
'z': pypinyin.Style.FIRST_LETTER,
'FINALS': pypinyin.Style.FINALS,
'ao': pypinyin.Style.FINALS,
'FINALS_TONE': pypinyin.Style.FINALS_TONE,
'4ao': pypinyin.Style.FINALS_TONE,
'FINALS_TONE2': pypinyin.Style.FINALS_TONE2,
'a4o': pypinyin.Style.FINALS_TONE2,
'FINALS_TONE3': pypinyin.Style.FINALS_TONE3,
'ao4': pypinyin.Style.FINALS_TONE3,
'BOPOMOFO': pypinyin.Style.BOPOMOFO,
'BOPOMOFO_FIRST': pypinyin.Style.BOPOMOFO_FIRST,
'CYRILLIC': pypinyin.Style.CYRILLIC,
'CYRILLIC_FIRST': pypinyin.Style.CYRILLIC_FIRST,
}
func_map = {
'pinyin': pypinyin.pinyin,
'slug': pypinyin.slug,
}
default_style = 'zh4ao'
class NullWriter(object):
"""数据流黑洞,类似 linux/unix 下 /dev/null 的效果。"""
def write(self, string):
pass
def get_parser():
parser = ArgumentParser(description='convert chinese to pinyin.')
parser.add_argument('-V', '--version', action='version',
version='{0} {1}'.format(
pypinyin.__title__, pypinyin.__version__
))
# 要执行的函数名称
parser.add_argument('-f', '--func',
help='function name (default: "pinyin")',
choices=['pinyin', 'slug'],
default='pinyin')
# 拼音风格
parser.add_argument(
'-s', '--style',
help='pinyin style (default: "{0}")'.format(default_style),
choices=style_map.keys(), default=default_style
)
parser.add_argument('-p', '--separator',
help='slug separator (default: "-")',
default='-')
parser.add_argument('-e', '--errors',
help=('how to handle none-pinyin string'
' (default: "default")'),
choices=['default', 'ignore', 'replace'],
default='default')
# 输出多音字
parser.add_argument('-m', '--heteronym', help='enable heteronym',
action='store_true')
# 要查询的汉字
parser.add_argument('hans', help='chinese string')
return parser
def main():
# 禁用除 CRITICAL 外的日志消息
logging.disable(logging.CRITICAL)
# read hans from stdin
if not sys.stdin.isatty():
pipe_data = sys.stdin.read().strip()
else:
pipe_data = ''
args = sys.argv[1:]
if pipe_data:
args.append(pipe_data)
# 获取命令行选项和参数
parser = get_parser()
options = parser.parse_args(args)
if PY2:
hans = options.hans.decode(sys.stdin.encoding or 'utf-8')
else:
hans = options.hans
func = getattr(pypinyin, options.func)
style = style_map[options.style]
heteronym = options.heteronym
separator = options.separator
errors = options.errors
func_kwargs = {
'pinyin': {'heteronym': heteronym, 'errors': errors},
'slug': {'heteronym': heteronym, 'separator': separator,
'errors': errors},
}
if PY2:
kwargs = func_kwargs[func.func_name]
else:
kwargs = func_kwargs[func.__name__]
# 重设标准输出流和标准错误流
# 不输出任何字符,防止污染命令行命令的输出结果
# 其实主要是为了干掉 jieba 内的 print 语句 ;)
sys.stdout = sys.stderr = NullWriter()
result = func(hans, style=style, **kwargs)
# 恢复默认
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
if not result:
print('')
elif result and isinstance(result, (list, tuple)):
if isinstance(result[0], (list, tuple)):
print(' '.join([','.join(s) for s in result]))
else:
print(result)
else:
print(result)
if __name__ == '__main__':
main()