Skip to content
Olanto Foundation edited this page Jun 20, 2018 · 6 revisions

public class ConfigurationForCat ...

permanent constants for this index

words and docs implementation

DOC_MAXBIT = 18;

WORD_MAXBIT = 20;

DOC_MAX = (int) Math.pow(2, DOC_MAXBIT); // recompute

WORD_MAX = (int) Math.pow(2, WORD_MAXBIT); // recompute

WORD_IMPLEMENTATION = implementationMode.FAST;

DOC_IMPLEMENTATION = implementationMode.FAST;

OBJ_IMPLEMENTATION = implementationMode.FAST;

number of object storage = 2^OBJ_PW2

OBJ_PW2 = 0; ///0=>1,1=>2,2=>4,3=>8,4=>16

OBJ_NB = (int) Math.pow(2, OBJ_PW2); ///0=>1,1=>2,2=>4,

OBJ_STORE_ASYNC = false;

IDX_DONTINDEXTHIS = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/config/dontindexthiswords_EMPTY.txt"; // list of stop words

IDX_WITHDOCBAG = true;

IDX_MORE_INFO = false;

IDX_SAVE_POSITION = false;

DOC_SIZE_NAME = 12; // max size for document name

WORD_MINOCCKEEP = 4; // keep words occuring more than this number

WORD_MAXOCCDOCKEEP = 100; // keep words occuring less than this number in o/oo (per 1000)

WORD_NFIRSTOFDOC = 600; // keep only the first n words

IDX_MFLF_ENCODING = "UTF-8"; //.mflf encoding IDX_MFLF_ENCODING = "ISO-8859-1";

WORD_MINLENGTH = 1; // minimum size for a words

WORD_MAXLENGTH = 40; // maximum size for a words

WORD_DEFINITION = new TokenCatNative(); // method to use for tokenisation

WORD_USE_STEMMER = false; // Stemming usage

STEM_DOC = false; // Stemming document

WORD_STEMMING_LANG = "english"; // only for initialisation

ACTUAL_LANGUAGE = "_EN"; // Stemming definition

desactivate this parameter

IdxConstant.MODE_RANKING = RankingMode.NO;

ORTOGRAFIC = false;

IDX_MARKER = false;

configuration for the processing

folder and files

COMLOG_FILE = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/common.log";

DETLOG_FILE = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/detail.log";

String root = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect";

String root0 = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/sto0";

IdxConstant.COMMON_ROOT = root;

IdxConstant.DOC_ROOT = root;

IdxConstant.WORD_ROOT = root;

SetObjectStoreRoot(root0, 0);

cache parameter for indexing

CACHE_IMPLEMENTATION_INDEXING = implementationMode.FAST;

KEEP_IN_CACHE = 90;

INDEXING_CACHE_SIZE = 256 * MEGA;

IDX_CACHE_COUNT = 2 * (int) MEGA;

IDX_RESERVE = WORD_NFIRSTOFDOC + 4 * KILO;