/
faq.js
101 lines (92 loc) · 3.51 KB
/
faq.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import keywordExtractor from 'keyword-extractor'
import stemmer from 'stemmer'
import content from './faq.json'
const invertedIndexes = {}
const extractorOptions = { remove_digits: true, remove_duplicates: false, return_changed_case: true }
const TAG_POINTS = 5
const QUESTION_POINTS = 3
const ANSWER_POINTS = 1
content.forEach((item, contentIndex) => {
// First get the words
// Question
const keywords = item.q.split(' ').map(stemmer)
for (const word of keywords) {
if (!invertedIndexes[word]) {
invertedIndexes[word] = []
invertedIndexes[word][contentIndex] = [contentIndex, QUESTION_POINTS]
} else if (!invertedIndexes[word][contentIndex]) {
invertedIndexes[word][contentIndex] = [contentIndex, QUESTION_POINTS]
} else {
invertedIndexes[word][contentIndex][1] += QUESTION_POINTS
}
}
// Answer
const answerKeywords = keywordExtractor.extract(item.a, extractorOptions).map(stemmer)
for (const word of answerKeywords) {
if (!invertedIndexes[word]) {
invertedIndexes[word] = []
invertedIndexes[word][contentIndex] = [contentIndex, ANSWER_POINTS]
} else if (!invertedIndexes[word][contentIndex]) {
invertedIndexes[word][contentIndex] = [contentIndex, ANSWER_POINTS]
} else {
invertedIndexes[word][contentIndex][1] += ANSWER_POINTS
}
}
// Tags. Give half points for each word if it's a multi-word tag
const halfTags = []
const tags = []
item.t.forEach(item => {
tags.push(stemmer(item))
const splat = item.split(' ')
if (splat.length > 1) {
splat.forEach(halfTag => halfTags.push(stemmer(halfTag)))
}
})
const bothTags = [ halfTags, tags ]
for (let i = 0; i < bothTags.length; ++i) {
const POINTS = i === 0 ? TAG_POINTS / 2 : TAG_POINTS
const tags = bothTags[i]
for (const word of tags) {
if (!invertedIndexes[word]) {
invertedIndexes[word] = []
invertedIndexes[word][contentIndex] = [contentIndex, POINTS]
} else if (!invertedIndexes[word][contentIndex]) {
invertedIndexes[word][contentIndex] = [contentIndex, POINTS]
} else {
invertedIndexes[word][contentIndex][1] += POINTS
}
}
}
item.qe = item.q.replace(/\s/g, '-').replace(/\?/g, '')
})
for (const word in invertedIndexes) {
const invertedIndex = invertedIndexes[word]
invertedIndexes[word] = invertedIndex.filter(item => item)
}
function search (searchTerm) {
const searchTermSplit = new Set(searchTerm.split(' ').map(stemmer).filter(item => item))
const searchTermSplitSize = searchTermSplit.size
if (searchTermSplitSize === 0) return content
const intersectingDocumentIndexes = []
const documentCounts = {}
const documentPoints = {}
for (const term of searchTermSplit) {
if (!invertedIndexes[term]) continue
for (const arr of invertedIndexes[term]) {
const documentIndex = arr[0]
const points = arr[1]
if (!documentCounts[documentIndex]) {
documentCounts[documentIndex] = 1
documentPoints[documentIndex] = points
} else {
++documentCounts[documentIndex]
documentPoints[documentIndex] += points
}
}
}
for (const docIndex in documentCounts) {
if (documentCounts[docIndex] === searchTermSplitSize) intersectingDocumentIndexes.push([ docIndex, documentPoints[docIndex] ])
}
return intersectingDocumentIndexes.sort((a, b) => b[1] - a[1]).map((item, index) => content[item[0]]) // 0th index is the document index, 1st index is the number of points (weight)
}
export { content as faq, invertedIndexes, search as searchFAQ }