Skip to content

Commit

Permalink
Merge 65506c5 into c5a7d99
Browse files Browse the repository at this point in the history
  • Loading branch information
mocobeta committed Dec 9, 2018
2 parents c5a7d99 + 65506c5 commit ca5ef2d
Showing 1 changed file with 32 additions and 8 deletions.
40 changes: 32 additions & 8 deletions bin/janome
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,59 @@ from janome.tokenizer import Tokenizer
from janome.version import JANOME_VERSION
from argparse import ArgumentParser
import sys
import subprocess

PY3 = sys.version_info[0] == 3

import sys, signal
import signal
def signal_handler(signal, frame):
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)


def main():
parser = ArgumentParser()
parser.add_argument("-e", "--enc", dest="enc", default="utf8", help="input encoding. default is utf8")
parser.add_argument("--udic", dest="udic", default="", help="path to user dictionary file")
parser.add_argument("--udic-enc", dest="udic_enc", default="utf8", help="user dictionary encoding. default is utf8")
parser.add_argument("--udic-type", dest="udic_type", default="ipadic", help="user dictionary type, 'ipadic' or 'simpledic.' default is 'ipadic'")
parser.add_argument("-e", "--enc", dest="enc", default="utf8", help="Input encoding. Default is utf8")
parser.add_argument("--udic", dest="udic", default="", help="Path to user dictionary file")
parser.add_argument("--udic-enc", dest="udic_enc", default="utf8", help="User dictionary encoding. Default is utf8")
parser.add_argument("--udic-type", dest="udic_type", default="ipadic", help="User dictionary type, 'ipadic' or 'simpledic.' Default is 'ipadic'")
parser.add_argument("-m", "--mmap", dest="mmap", nargs='?', const=True, default=False, help="Use mmap mode")
parser.add_argument("-g", "--graphviz", dest="graphviz", nargs='?', const=True, default=False, help="Output lattice graph by Graphviz")
parser.add_argument("--gv-out", dest="gv_out", default="lattice.gv", help="Graphviz output file path. This option is used with --graphviz option")
parser.add_argument("--gv-format", dest="gv_format", default="png", help="Graphviz output format. default is png. See https://graphviz.gitlab.io/_pages/doc/info/output.html for the supported formats.")
parser.add_argument('--version', action="version", version="janome {}".format(JANOME_VERSION))
args = parser.parse_args()

t = Tokenizer(udic=args.udic,
udic_enc=args.udic_enc,
udic_type=args.udic_type)
udic_type=args.udic_type,
mmap=args.mmap)
dotfile = args.gv_out if args.graphviz else ''
while True:
try:
line = input() if PY3 else raw_input().decode(args.enc)
for token in t.tokenize(line):
for token in t.tokenize(line, dotfile=dotfile):
print(token)
except EOFError:
break


if args.graphviz:
generate_graph(dotfile, args.gv_format)

def generate_graph(dotfile, gv_format):
check_rc = subprocess.call(['type', '-p', 'dot'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if check_rc != 0:
sys.stderr.write('Cannot find dot command. Maybe Graphviz is not installed?\n')
sys.exit(1)
format_opt = '-T%s' % gv_format
output = '%s.%s' % (dotfile, gv_format)
output_opt = '-o%s' % output
gv_rc = subprocess.call(['dot', format_opt, output_opt, dotfile])
if gv_rc != 0:
sys.stderror.write('Something wrong with executing dot command.\n')
sys.exit(1)
print('Graph was successfully output to %s' % output)

if __name__ == '__main__':
main()

0 comments on commit ca5ef2d

Please sign in to comment.