-
Notifications
You must be signed in to change notification settings - Fork 3
/
descriptor.json
executable file
·120 lines (119 loc) · 3.93 KB
/
descriptor.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
{
"componentName" : "TikaTextDetection",
"componentVersion" : "7.0",
"middlewareVersion" : "7.0",
"sourceLanguage" : "java",
"batchLibrary" : "mpf-tika-text-detection-component-7.0.jar",
"environmentVariables" : [],
"algorithm" : {
"name": "TIKATEXT",
"description": "The Apache Tika text detection component.",
"actionType": "DETECTION",
"requiresCollection": {
"states": []
},
"providesCollection": {
"states": [
"DETECTION",
"DETECTION_TEXT",
"DETECTION_TEXT_TIKA"
],
"properties": [
{
"name": "CONFIDENCE_THRESHOLD",
"description": "The minimum confidence score which must be met or exceeded. Detections below this threshold are silently discarded.",
"type": "DOUBLE",
"defaultValue": "-2"
},
{
"name": "STORE_METADATA",
"description": "Specifies whether or not to store metadata as a separate track detection.",
"type": "BOOLEAN",
"defaultValue": "false"
},
{
"name": "LANGUAGE_DETECTOR",
"description": "Specifies which Tika language detector to use. Current options are `opennlp`, `optimaize` and `tika` (Note: `tika` is depreciated and does not work for short text).",
"type": "STRING",
"defaultValue": "opennlp"
},
{
"name": "MAX_REASONABLE_LANGUAGES",
"description": "Specifies maximum number of top detected languages. When set to 0 or below, allow any number of language results that are marked as reasonably certain by TIKA.",
"type": "INT",
"defaultValue": "-1"
},
{
"name": "MIN_LANGUAGES",
"description": "When set to a positive integer, attempt to always return specified number of top languages, even if some are not marked as reasonably certain. Non-positive values disable this property to only accept reasonable predictions.",
"type": "INT",
"defaultValue": "2"
},
{
"name": "LIST_ALL_PAGES",
"description": "Specifies whether or not to store each page as a track, even if no text is extracted.",
"type": "BOOLEAN",
"defaultValue": "false"
},
{
"name": "MIN_CHARS_FOR_LANGUAGE_DETECTION",
"description": "Specifies minimum length of detected text before language filtering is applied.",
"type": "INT",
"defaultValue": "20"
}
]
}
},
"actions": [
{
"name": "TIKA TEXT DETECTION ACTION",
"description": "Executes the Tika text detection algorithm using the default parameters.",
"algorithm": "TIKATEXT",
"properties": []
},
{
"name": "TIKA TEXT DETECTION SOURCE MEDIA ONLY ACTION",
"description": "Executes the Tika text detection algorithm using the default parameters on source media only, no derivatives.",
"algorithm": "TIKATEXT",
"properties": [
{
"name": "SOURCE_MEDIA_ONLY",
"value": "TRUE"
}
]
}
],
"tasks": [
{
"name": "TIKA TEXT DETECTION TASK",
"description": "Performs Tika text detection.",
"actions": [
"TIKA TEXT DETECTION ACTION"
]
},
{
"name": "TIKA TEXT DETECTION SOURCE MEDIA ONLY TASK",
"description": "Performs Tika text detection on source media only, no derivatives.",
"actions": [
"TIKA TEXT DETECTION SOURCE MEDIA ONLY ACTION"
]
}
],
"pipelines": [
{
"name": "TIKA TEXT DETECTION PIPELINE",
"description": "Performs Tika text detection.",
"tasks": [
"TIKA TEXT DETECTION TASK"
]
},
{
"name": "TIKA TEXT DETECTION WITH KEYWORD TAGGING PIPELINE",
"description": "Performs Tika text detection and keyword tagging.",
"tasks": [
"TIKA TEXT DETECTION TASK",
"KEYWORD TAGGING (WITH FF REGION) TASK"
]
}
]
}