-
Notifications
You must be signed in to change notification settings - Fork 436
/
extractor.js
38 lines (32 loc) 路 1.16 KB
/
extractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import { cleanDatePublished } from 'cleaners';
import { extractFromMeta, extractFromSelectors } from 'utils/dom';
import { extractFromUrl } from 'utils/text';
import {
DATE_PUBLISHED_META_TAGS,
DATE_PUBLISHED_SELECTORS,
DATE_PUBLISHED_URL_RES,
} from './constants';
const GenericDatePublishedExtractor = {
extract({ $, url, metaCache }) {
let datePublished;
// First, check to see if we have a matching meta tag
// that we can make use of.
// Don't try cleaning tags from this string
datePublished = extractFromMeta(
$,
DATE_PUBLISHED_META_TAGS,
metaCache,
false
);
if (datePublished) return cleanDatePublished(datePublished);
// Second, look through our selectors looking for potential
// date_published's.
datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
if (datePublished) return cleanDatePublished(datePublished);
// Lastly, look to see if a dately string exists in the URL
datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
if (datePublished) return cleanDatePublished(datePublished);
return null;
},
};
export default GenericDatePublishedExtractor;