Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
126 lines (109 sloc) 3.95 KB
##
#### default configuration file for Spanish analyzer
##
#### General options
Lang=es
Locale=default
#### Trace options. Only effective if we have compiled with -DVERBOSE
#
## Possible values for TraceModule (may be OR'ed)
#define SPLIT_TRACE 0x00000001
#define TOKEN_TRACE 0x00000002
#define MACO_TRACE 0x00000004
#define OPTIONS_TRACE 0x00000008
#define NUMBERS_TRACE 0x00000010
#define DATES_TRACE 0x00000020
#define PUNCT_TRACE 0x00000040
#define DICT_TRACE 0x00000080
#define SUFF_TRACE 0x00000100
#define LOCUT_TRACE 0x00000200
#define NP_TRACE 0x00000400
#define PROB_TRACE 0x00000800
#define QUANT_TRACE 0x00001000
#define NEC_TRACE 0x00002000
#define AUTOMAT_TRACE 0x00004000
#define TAGGER_TRACE 0x00008000
#define HMM_TRACE 0x00010000
#define RELAX_TRACE 0x00020000
#define RELAX_TAGGER_TRACE 0x00040000
#define CONST_GRAMMAR_TRACE 0x00080000
#define SENSES_TRACE 0x00100000
#define CHART_TRACE 0x00200000
#define GRAMMAR_TRACE 0x00400000
#define DEP_TRACE 0x00800000
#define UTIL_TRACE 0x01000000
TraceLevel=3
TraceModule=0x0000
## Options to control the applied modules. The input may be partially
## processed, or not a full analysis may me wanted. The specific
## formats are a choice of the main program using the library, as well
## as the responsability of calling only the required modules.
## Valid input/output formats are: plain, token, splitted, morfo, tagged, parsed
InputFormat=plain
OutputFormat=tagged
# consider each newline as a sentence end
AlwaysFlush=yes
#### Tokenizer options
TokenizerFile=$FREELINGSHARE/es/tokenizer.dat
#### Splitter options
SplitterFile=$FREELINGSHARE/es/splitter.dat
#### Morfo options
AffixAnalysis=yes
MultiwordsDetection=yes
NumbersDetection=yes
PunctuationDetection=yes
DatesDetection=yes
QuantitiesDetection=yes
DictionarySearch=yes
ProbabilityAssignment=yes
OrthographicCorrection=no
DecimalPoint=,
ThousandPoint=.
LocutionsFile=$FREELINGSHARE/es/locucions.dat
QuantitiesFile=$FREELINGSHARE/es/quantities.dat
AffixFile=$FREELINGSHARE/es/afixos.dat
ProbabilityFile=$FREELINGSHARE/es/probabilitats.dat
DictionaryFile=$FREELINGSHARE/es/dicc.src
PunctuationFile=$FREELINGSHARE/common/punct.dat
ProbabilityThreshold=0.001
# NER options
NERecognition=yes
NPDataFile=$FREELINGSHARE/es/np.dat
## comment line above and uncomment one of those below, if you want
## a better NE recognizer (higer accuracy, lower speed)
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-poor1.dat
#NPDataFile=$FREELINGSHARE/es/nerc/ner/ner-ab-rich.dat
# "rich" model is trained with rich gazetteer. Offers higher accuracy but
# requires adapting gazetteer files to have high coverage on target corpus.
# "poor1" model is trained with poor gazetteer. Accuracy is splightly lower
# but suffers small accuracy loss the gazetteer has low coverage in target corpus.
# If in doubt, use "poor1" model.
#Spelling Corrector config file
CorrectorFile=$FREELINGSHARE/es/corrector/corrector.dat
## Phonetic encoding of words.
Phonetics=no
PhoneticsFile=$FREELINGSHARE/es/phonetics.dat
## NEC options. See README in common/nec
NEClassification=no
NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-poor1.dat
#NECFile=$FREELINGSHARE/es/nerc/nec/nec-ab-rich.dat
## Sense annotation options (none,all,mfs,ukb)
SenseAnnotation=none
SenseConfigFile=$FREELINGSHARE/es/senses.dat
UKBConfigFile=$FREELINGSHARE/es/ukb.dat
#### Tagger options
Tagger=hmm
TaggerHMMFile=$FREELINGSHARE/es/tagger.dat
TaggerRelaxFile=$FREELINGSHARE/es/constr_gram-B.dat
TaggerRelaxMaxIter=500
TaggerRelaxScaleFactor=670.0
TaggerRelaxEpsilon=0.001
TaggerRetokenize=yes
TaggerForceSelect=tagger
#### Parser options
GrammarFile=$FREELINGSHARE/es/chunker/grammar-chunk.dat
#### Dependence Parser options
DepTxalaFile=$FREELINGSHARE/es/dep/dependences.dat
#### Coreference Solver options
CoreferenceResolution=no
CorefFile=$FREELINGSHARE/es/coref/coref.dat