/
settings.ini
78 lines (75 loc) · 2.08 KB
/
settings.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
[Section]
# OGER settings.ini file for argument loading
[Shared]
[Main]
include_header = True
input-directory = data/input
output-directory = data/output
pointer-type = glob
pointers = *.tsv
# options: collection OR document
iter-mode = collection
# article-format options:
# 'txt_directory',
# 'txt_id',
# 'txt_collection',
# 'txt_json',
# 'txt_tar',
# 'txt_tsv',
# 'conll',
# 'pubtator',
# 'pubtator_fbk',
# 'pxmlgz',
# 'pxml_directory',
# 'pxml_id',
# 'bioc_xml',
# 'bioc_json',
# 'download_pubmed',
# 'download_pmc',
# 'download_bad_pmc',
# 'download_fictious_pmc',
# 'download_random_pmc',
article-format = txt_tsv
# export_format options:
# 'tsv'
# 'txt'
# 'xml'
# 'text_xml'
# 'bioc'
# 'bioc_xml'
# 'bioc_json'
# 'odin':
# 'bionlp'
# 'bionlp.ann'
# 'brat'
# 'brat.ann'
# 'conll'
# 'becalm_tsv'
# 'becalm_json'
# 'pubanno_json'
# 'pubanno_json.tgz'
# 'pubtator'
# 'pubtator_fbk'
# 'europepmc'
# 'europepmc.zip'
export_format = tsv
# Multiple termlists can be added by giving them numbers
# as shown below. Each termlist could have a separate stopword list
# Initially there's just one for all.
; termlist1_path = data/terms/bero_termlist.tsv
termlist1_path = data/terms/mop_termlist.tsv
termlist2_path = data/terms/envo_syn_termlist.tsv
termlist3_path = data/terms/po_termlist.tsv
termlist4_path = data/terms/ecocore_termlist.tsv
termlist5_path = data/terms/go_syn_termlist.tsv
termlist6_path = data/terms/obi_termlist.tsv
termlist7_path = data/terms/ncit_subset_no_syn_termlist.tsv
termlist8_path = data/terms/to_termlist.tsv
termlist9_path = data/terms/edam_subset_termlist.tsv
termlist10_path = data/terms/mesh_subset_termlist.tsv
termlist11_path = data/terms/chebi_termlist.tsv
termlist12_path = data/terms/pato_subset_termlist.tsv
termlist13_path = data/terms/ncbitaxon_termlist.tsv
termlist_stopwords = data/stopwords/stopWords.txt
termlist_normalize = stem-Porter
postfilter = builtin:remove_overlaps builtin:remove_sametype_overlaps builtin:remove_submatches builtin:remove_sametype_submatches