File tree Expand file tree Collapse file tree 7 files changed +142
-1
lines changed
packages/metascraper-lang-detector Expand file tree Collapse file tree 7 files changed +142
-1
lines changed Original file line number Diff line number Diff line change @@ -95,6 +95,9 @@ The output will be something like:
95
95
96
96
Here is an example of the metadata that ** metascraper** can collect:
97
97
98
+ - ` audio ` — eg. * https://cf-media.sndcdn.com/U78RIfDPV6ok.128.mp3 * <br />
99
+ A audio URL that best represents the article.
100
+
98
101
- ` author ` — eg. * Noah Kulwin* <br />
99
102
A human-readable representation of the author's name.
100
103
@@ -103,7 +106,7 @@ Here is an example of the metadata that **metascraper** can collect:
103
106
104
107
- ` description ` — eg. * Venture capitalists are raising money at the fastest rate...* <br />
105
108
The publisher's chosen description of the article.
106
-
109
+
107
110
- ` audio ` — eg. * https://cf-media.sndcdn.com/U78RIfDPV6ok.128.mp3 * <br />
108
111
A audio URL that best represents the article.
109
112
Original file line number Diff line number Diff line change
1
+ unsafe-perm = true
2
+ save-prefix = ~
3
+ shrinkwrap = false
4
+ save = false
Original file line number Diff line number Diff line change
1
+ # metascraper-lang-detector
2
+
3
+ [ ![ npm] ( https://img.shields.io/npm/v/metascraper-lang-detector.svg?style=flat-square )] ( https://www.npmjs.com/package/metascraper-lang-detector )
4
+ [ ![ Dependency Status] ( https://david-dm.org/microlinkhq/metascraper.svg?path=packages/metascraper-lang-detector&style=flat-square )] ( https://david-dm.org/microlinkhq/metascraper?path=packages/metascraper-lang-detector )
5
+
6
+ > Get lang property from HTML markup based on natural language processor.
7
+
8
+ ## Install
9
+
10
+ ``` bash
11
+ $ npm install metascraper-lang-detector --save
12
+ ```
13
+
14
+ ## License
15
+
16
+ ** metascraper-lang-detector** © [ microlink.io] ( https://microlink.io ) , Released under the [ MIT] ( https://github.com/microlinkhq/metascraper-lang-detector/blob/master/LICENSE.md ) License.<br >
17
+ Authored and maintained by microlink.io with help from [ contributors] ( https://github.com/microlinkhq/metascraper-lang-detector/contributors ) .
18
+
19
+ > [ microlink.io] ( https://microlink.io ) · GitHub [ @microlink .io] ( https://github.com/microlinkhq ) · Twitter [ @microlinkhq ] ( https://twitter.com/microlinkhq )
Original file line number Diff line number Diff line change
1
+ 'use strict'
2
+
3
+ const { lang } = require ( '@metascraper/helpers' )
4
+ const { reduce, get } = require ( 'lodash' )
5
+ const langs = require ( 'iso-639-3' )
6
+ const franc = require ( 'franc' )
7
+
8
+ const toIso6391 = reduce (
9
+ langs ,
10
+ ( acc , { iso6393, iso6391 } ) => {
11
+ if ( iso6391 ) acc [ iso6393 ] = iso6391
12
+ return acc
13
+ } ,
14
+ { }
15
+ )
16
+
17
+ const detectLang = ( collection , field ) => {
18
+ const value = get ( collection , field )
19
+ const iso6393 = franc ( value )
20
+ return lang ( toIso6391 [ iso6393 ] )
21
+ }
22
+
23
+ module . exports = ( { fields = [ 'description' ] } ) =>
24
+ reduce (
25
+ fields ,
26
+ ( acc , prop ) => {
27
+ const fn = ( { meta } ) => detectLang ( meta , prop )
28
+ return acc . concat ( fn )
29
+ } ,
30
+ { lang : [ ] }
31
+ )
32
+
33
+ module . exports . detectLang = detectLang
Original file line number Diff line number Diff line change
1
+ {
2
+ "name" : " metascraper-lang-detector" ,
3
+ "description" : " Get lang property from HTML markup based on natural language processor" ,
4
+ "homepage" : " https://metascraper.js.org" ,
5
+ "version" : " 4.2.0" ,
6
+ "main" : " index.js" ,
7
+ "author" : {
8
+ "email" : " ian@ianstormtaylor.com" ,
9
+ "name" : " Ian Storm Taylor"
10
+ },
11
+ "repository" : {
12
+ "type" : " git" ,
13
+ "url" : " https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-lang-detector"
14
+ },
15
+ "bugs" : {
16
+ "url" : " https://github.com/microlinkhq/metascraper/issues"
17
+ },
18
+ "dependencies" : {
19
+ "@metascraper/helpers" : " ^4.2.0" ,
20
+ "franc" : " ~4.0.0" ,
21
+ "iso-639-3" : " ~1.1.0"
22
+ },
23
+ "devDependencies" : {
24
+ "mocha" : " latest" ,
25
+ "nyc" : " latest" ,
26
+ "should" : " latest" ,
27
+ "standard" : " 11"
28
+ },
29
+ "engines" : {
30
+ "node" : " >= 8"
31
+ },
32
+ "files" : [
33
+ " index.js"
34
+ ],
35
+ "scripts" : {
36
+ "test" : " NODE_PATH=.. TZ=UTC NODE_ENV=test nyc mocha test"
37
+ },
38
+ "license" : " MIT" ,
39
+ "peerDependencies" : {
40
+ "metascraper" : " ^4"
41
+ },
42
+ "standard" : {
43
+ "env" : [
44
+ " mocha"
45
+ ]
46
+ }
47
+ }
Original file line number Diff line number Diff line change
1
+ 'use strict'
2
+
3
+ const should = require ( 'should' )
4
+
5
+ const { detectLang } = require ( '..' )
6
+
7
+ describe ( 'metascraper-lang' , ( ) => {
8
+ it ( '.detectLang' , ( ) => {
9
+ should (
10
+ detectLang (
11
+ {
12
+ description :
13
+ 'A library to easily scrape metadata from an article on the web using Open Graph metadata, regular HTML metadata, and series of fallbacks.'
14
+ } ,
15
+ 'description'
16
+ )
17
+ ) . be . equal ( 'en' )
18
+ should (
19
+ detectLang (
20
+ {
21
+ description :
22
+ 'Una libreria para obtener fácilmente metadatos de cualquier artículo de internet usando Open Graph, HTML y una serie de fallbacks.'
23
+ } ,
24
+ 'description'
25
+ )
26
+ ) . be . equal ( 'es' )
27
+ should ( detectLang ( { description : null } , 'description' ) ) . be . equal ( false )
28
+ } )
29
+ } )
Original file line number Diff line number Diff line change
1
+ --require should
2
+ --reporter spec
3
+ --timeout 120000
4
+ --slow 300
5
+ --bail
6
+ --recursive
You can’t perform that action at this time.
0 commit comments