-
Notifications
You must be signed in to change notification settings - Fork 0
/
CONFIG.cfg
119 lines (76 loc) · 3.41 KB
/
CONFIG.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// ******** INPUT FILE ********************************************
//data seperator for the input file
//Change this whenever working with different input OIE
OIE_DATA_SEPERARTOR=;
//which OIE data set
IS_NELL=false
//OIE data location
OIE_DATA_PATH=/home/adutta/git/DATA/noDigitHighAll.csv
///home/adutta/git/DATA/Nell_new.csv
///home/adutta/git/DATA/noDigitHighAll.csv
// consider OIe properties with atleast these many instances,
//lowering it, makes everything slower,
// includes more number of proeprties. This are the input set of
// OIE relations, on which IM runs
// the clustering is performed. 0 means all.
INSTANCE_THRESHOLD=0
// ******************************************************************
// ******** WORKFLOW TYPE *****************************************
// One of the most important parameters
// 1 : simple each property flow
// 2: cluster input properties flow
// 3: cluster with KB relation input
WORKFLOW=1
// ***************************************************************
// ******* GENERATE TOPK CANDIDATES FOR ENTITIES ********************
// one of the most important parameter. Defines the top-k entities for
// a given OIE term. Has been shown in the papers, that beyoind
top-5 there is no more improvement in recall.
TOPK_ANCHORS=5
// ******************************************************************
// ********** set these only if WORKFLOW is 2 or 3 *****************
OPTI_INFLATION=12
OPTI_BETA=0
// ********************************************************
//true: map individual KB properties
//false: map clusters to KB properties
WORKFLOW_NORMAL=true
//************* allow including YAGO types for missing dbpedia types
INCLUDE_YAGO_TYPES=true
WORDNET_API=http://velsen.informatik.uni-mannheim.de:8080/StsService/GetStsSim?operation=api
// ****** SPARQL ENDPOINT *************************************************
DBPEDIA_SPARQL_ENDPOINT=http://wifo5-32.informatik.uni-mannheim.de:8893/sparql
#DBPEDIA_SPARQL_ENDPOINT=http://wifo5-32.informatik.uni-mannheim.de:8893/sparql
DBPEDIA_SPARQL_ENDPOINT_LOCAL=http://dbpedia.org/sparql
DBPEDIA_SPARQL_ENDPOINT_LIVE_DBP=http://live.dbpedia.org/sparql
//http://dbpedia.org/sparql
//http://wifo5-32.informatik.uni-mannheim.de:8891/sparql
//http://live.dbpedia.org/sparql
// ****** SCALING FACTOR FOR THE DOMAIN/RANGE CLASS HIERARCHY *************
TREE_PROPAGATION_FACTOR=0.5
SIMILARITY_FACTOR=0.0
// ******** WEIGHT or PROBABILITY *****************************************
// **** setting it true converts all probabilities into weights ***********
USE_LOGIT=true
// ********* RELOAD DBPEDIA TYPES ***************************************
// Setting it true, makes it run slower, since SPARQL endpoint query is executed and loaded in DB
RELOAD_TYPE=false
// one time effort to load all the top-k type onfo of all the possible instances.
//Should run once in the lifetime
LOAD_TYPES=true
SCALE_WEIGHT=0
ENGAGE_INTER_STEP=false
//********** Database Settings ****************************************
//********** affects only when RELOAD_TYPE is set to true *************
// Batch operation size
BATCH_SIZE=10000
//********** Extended Plugin Settings ****************************************
TOP_K_NUMERIC_PROPERTIES=500
DBPEDIA_TBOX=src/main/resources/input/dbpediaTBox.owl
WORDNET_DICTIONARY=
OPTIMAL_INFLATION=0
THREAD_MAX_POOL_SIZE=10
HTTP_CONN_MAX_TOTAL=500
HTTP_CONN_MAX_TOTAL_PER_ROUTE=50
TIMEOUT_MINS=3
TOPK_SURFACE_FORMS=10