-
Notifications
You must be signed in to change notification settings - Fork 1
Indexing_annexe
DOC_MAXBIT = 18;
WORD_MAXBIT = 20;
DOC_MAX = (int) Math.pow(2, DOC_MAXBIT); // recompute
WORD_MAX = (int) Math.pow(2, WORD_MAXBIT); // recompute
WORD_IMPLEMENTATION = implementationMode.FAST;
DOC_IMPLEMENTATION = implementationMode.FAST;
OBJ_IMPLEMENTATION = implementationMode.FAST;
OBJ_PW2 = 0; ///0=>1,1=>2,2=>4,3=>8,4=>16
OBJ_NB = (int) Math.pow(2, OBJ_PW2); ///0=>1,1=>2,2=>4,
OBJ_STORE_ASYNC = false;
IDX_DONTINDEXTHIS = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/config/dontindexthiswords_EMPTY.txt"; // list of stop words
IDX_WITHDOCBAG = true;
IDX_MORE_INFO = false;
IDX_SAVE_POSITION = false;
DOC_SIZE_NAME = 12; // max size for document name
WORD_MINOCCKEEP = 4; // keep words occuring more than this number
WORD_MAXOCCDOCKEEP = 100; // keep words occuring less than this number in o/oo (per 1000)
WORD_NFIRSTOFDOC = 600; // keep only the first n words
IDX_MFLF_ENCODING = "UTF-8"; //.mflf encoding IDX_MFLF_ENCODING = "ISO-8859-1";
WORD_MINLENGTH = 1; // minimum size for a words
WORD_MAXLENGTH = 40; // maximum size for a words
WORD_DEFINITION = new TokenCatNative(); // method to use for tokenisation
WORD_USE_STEMMER = false; // Stemming usage
STEM_DOC = false; // Stemming document
WORD_STEMMING_LANG = "english"; // only for initialisation
ACTUAL_LANGUAGE = "_EN"; // Stemming definition
IdxConstant.MODE_RANKING = RankingMode.NO;
ORTOGRAFIC = false;
IDX_MARKER = false;
COMLOG_FILE = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/common.log";
DETLOG_FILE = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/detail.log";
String root = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect";
String root0 = SenseOS.getMYCLASS_ROOT() + "MYCLASS_MODEL/data/langdetect/sto0";
IdxConstant.COMMON_ROOT = root;
IdxConstant.DOC_ROOT = root;
IdxConstant.WORD_ROOT = root;
SetObjectStoreRoot(root0, 0);
CACHE_IMPLEMENTATION_INDEXING = implementationMode.FAST;
KEEP_IN_CACHE = 90;
INDEXING_CACHE_SIZE = 256 * MEGA;
IDX_CACHE_COUNT = 2 * (int) MEGA;
IDX_RESERVE = WORD_NFIRSTOFDOC + 4 * KILO;