-
Notifications
You must be signed in to change notification settings - Fork 65
/
extractor.js
63 lines (49 loc) · 1.53 KB
/
extractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import _ from 'lodash';
export default class Extractor {
constructor(baseline, tags) {
this.baseline = baseline;
this.tags = tags || [];
}
tag(tag) {
const tags = Array.isArray(tag) ? tag : [tag];
this.tags.push.apply(this.tags, tags);
}
extract(words) {
let result = {};
this.tags.forEach(tag => {
let matches = [];
let currentMatch = null;
do {
let startIndex = currentMatch ? currentMatch.start : 0;
currentMatch = this.match(tag, words, startIndex);
if(currentMatch.tag !== null && currentMatch.matches > 1) {
matches.push(currentMatch);
}
} while(currentMatch.start > 0);
let bestMatch = _.max(matches, function(match) {
return match.matches;
});
result[tag.label] = matches.length ? bestMatch.tag : null;
});
return result;
}
match(tag, words, startIndex) {
let word = this.baseline[tag.start - 1];
let index = words.indexOf(word, startIndex || 0);
let offset = index - tag.start + 1;
let start = index + 1;
let matches = 0;
while(index !== -1 && typeof words[index] !== 'undefined' &&
typeof this.baseline[index - offset] !== 'undefined' &&
words[index] === this.baseline[index - offset]) {
matches++;
index--;
}
if(matches === 0) {
return { tag: null, start: -1, matches };
}
let endIndex = tag.end === -1 ? words.length :
start + (tag.start - tag.end);
return { tag: words.slice(start, endIndex + 1), start, matches };
}
}