jcseg-core/jcseg.properties

# Jcseg properties file.
# @Note: 
# true | 1 | on for open the specified configuration or
# false | 0 | off to close it.
# bug report chenxin <chenxin619315@gmail.com>

# Jcseg function
#maximum match length. (5-7)
jcseg.maxlen = 7

#Whether to recognized the Chinese name.
jcseg.icnname = true

#maximum length for pair punctuation text.
jcseg.pptmaxlen = 7

#maximum length for Chinese last name andron.
jcseg.cnmaxlnadron = 1

#Whether to clear the stopwords.
jcseg.clearstopword = false

#Whether to convert the Chinese numeric to Arabic number. like '\u4E09\u4E07' to 30000.
jcseg.cnnumtoarabic = true

#Whether to convert the Chinese fraction to Arabic fraction.
#@Note: for lucene,solr,elasticsearch eg.. close it.
jcseg.cnfratoarabic = false

#Whether to keep the unrecognized word.
jcseg.keepunregword = true

#Whether to do the secondary segmentation for the complex English words
jcseg.ensecondseg = true

#minimum length of the secondary segmentation token.
jcseg.ensecminlen = 1

#Whether to do the English word segmentation
#the jcseg.ensecondseg must set to true before active this function
jcseg.enwordseg = true

#maximum match length for English extracted word
jcseg.enmaxlen = 16

#threshold for Chinese name recognize.
# better not change it before you know what you are doing.
jcseg.nsthreshold = 1000000

#The punctuation set that will be keep in an token.(Not the end of the token).
jcseg.keeppunctuations = @#%.&+

#Whether to append the pinyin of the entry.
jcseg.appendpinyin = false

#Whether to load and append the synonyms words of the entry.
jcseg.appendsyn = true


####for Tokenizer
#default delimiter for JcsegDelimiter tokenizer
#set to default or whitespace will use the default whitespace as delimiter
#or set to the char you want, like ',' or whatever
jcseg.delimiter = default

#default length for the N-gram tokenizer
jcseg.gram = 1


####about the lexicon
#absolute path of the lexicon file.
#Multiple path support from jcseg 1.9.2, use ';' to split different path.
#example: lexicon.path = /home/chenxin/lex1;/home/chenxin/lex2 (Linux)
#        : lexicon.path = D:/jcseg/lexicon/1;D:/jcseg/lexicon/2 (WinNT)
#lexicon.path=/Code/java/JavaSE/jcseg/lexicon
#lexicon.path = {jar.dir}/lexicon ({jar.dir} means the base directory of jcseg-core-{version}.jar)
#@since 1.9.9 Jcseg default to load the lexicons in the classpath
lexicon.path = null

#Whether to load the modified lexicon file auto.
lexicon.autoload = false

#Poll time for auto load. (seconds)
lexicon.polltime = 300


####lexicon load
#Whether to load the part of speech of the entry.
jcseg.loadpos = true

#Whether to load the pinyin of the entry.
jcseg.loadpinyin = true

#Whether to load the synonyms words of the entry.
jcseg.loadsyn = true

#Whether to load the entity of the entry
jcseg.loadentity = true