-
Notifications
You must be signed in to change notification settings - Fork 716
/
get-core-content-text.mjs
101 lines (87 loc) · 2.47 KB
/
get-core-content-text.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import { getPossibleElementByQuerySelector } from './get-possible-element-by-query-selector.mjs'
import { Readability, isProbablyReaderable } from '@mozilla/readability'
const adapters = {
'scholar.google': ['#gs_res_ccl_mid'],
google: ['#search'],
csdn: ['#content_views'],
bing: ['#b_results'],
wikipedia: ['#mw-content-text'],
faz: ['.atc-Text'],
golem: ['article'],
eetimes: ['article'],
'new.qq.com': ['.content-article'],
}
function getArea(e) {
const rect = e.getBoundingClientRect()
return rect.width * rect.height
}
function findLargestElement(e) {
if (!e) {
return null
}
let maxArea = 0
let largestElement = null
const limitedArea = 0.8 * getArea(e)
function traverseDOM(node) {
if (node.nodeType === Node.ELEMENT_NODE) {
const area = getArea(node)
if (area > maxArea && area < limitedArea) {
maxArea = area
largestElement = node
}
Array.from(node.children).forEach(traverseDOM)
}
}
traverseDOM(e)
return largestElement
}
function getTextFrom(e) {
return e.innerText || e.textContent
}
function postProcessText(text) {
return text
.trim()
.replaceAll(' ', '')
.replaceAll('\t', '')
.replaceAll('\n\n', '')
.replaceAll(',,', '')
}
export function getCoreContentText() {
for (const [siteName, selectors] of Object.entries(adapters)) {
if (location.hostname.includes(siteName)) {
const element = getPossibleElementByQuerySelector(selectors)
if (element) return postProcessText(getTextFrom(element))
break
}
}
const element = document.querySelector('article')
if (element) {
return postProcessText(getTextFrom(element))
}
if (isProbablyReaderable(document)) {
let article = new Readability(document.cloneNode(true), {
keepClasses: true,
}).parse()
console.log('readerable')
return postProcessText(article.textContent)
}
const largestElement = findLargestElement(document.body)
const secondLargestElement = findLargestElement(largestElement)
console.log(largestElement)
console.log(secondLargestElement)
let ret
if (!largestElement) {
ret = getTextFrom(document.body)
console.log('use document.body')
} else if (
secondLargestElement &&
getArea(secondLargestElement) > 0.5 * getArea(largestElement)
) {
ret = getTextFrom(secondLargestElement)
console.log('use second')
} else {
ret = getTextFrom(largestElement)
console.log('use first')
}
return postProcessText(ret)
}