Skip to content

Commit 54af4ad

Browse files
committed
executable: add support for passing options to --simplify either as JSON or key=value
1 parent 64ab6ce commit 54af4ad

File tree

4 files changed

+43
-6
lines changed

4 files changed

+43
-6
lines changed

docs/how_to.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,16 @@ cat entities.json | wikidata-filter --languages en,fr,de,zh,eo > subset.ndjson
131131
```
132132

133133
### Simplify entity data
134-
Uses [wikidata-sdk `simplify.entity` function](https://github.com/maxlath/wikidata-sdk#simplify-entity) to parse the labels, descriptions, aliases, claims, and sitelinks.
134+
Uses [wikidata-sdk `simplify.entity` function](https://github.com/maxlath/wikidata-sdk/blob/master/docs/simplify_entities_data.md) to parse the labels, descriptions, aliases, claims, and sitelinks.
135135
```sh
136-
cat entities.json | wikidata-filter --simplified > simplified_dump.ndjson
136+
# Default simplify options
137+
cat entities.json | wikidata-filter --simplify > simplified_dump.ndjson
138+
# Custom options, see wdk.simplify.entity documentation https://github.com/maxlath/wikidata-sdk/blob/master/docs/simplify_entities_data.md
139+
# and specifically for claims options, see https://github.com/maxlath/wikidata-sdk/blob/master/docs/simplify_claims.md#options
140+
cat entities.json | wikidata-filter --simplify '{"keepRichValues":"true","keepQualifiers":"true","keepReferences":"true"}' > simplified_dump.ndjson
141+
# The options can also be passed in a lighter, urlencoded-like, key=value format
142+
# that's simpler than typing all those JSON double quotes
143+
cat entities.json | wikidata-filter --simplify 'keepRichValues=true&keepQualifiers=true&keepReferences=true' > simplified_dump.ndjson
137144
```
138145

139146
## Other options
@@ -194,5 +201,5 @@ The [equivalent SPARQL query](https://query.wikidata.org/#SELECT%20%3Fs%20%3FsLa
194201

195202
```sh
196203
DUMP='https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.json.gz'
197-
curl $DUMP | gzip -d | wikidata-filter --sitelink 'zhwiki&frwiki' --keep id,labels,sitelinks --languages zh,fr --simplified > subset.ndjson
204+
curl $DUMP | gzip -d | wikidata-filter --sitelink 'zhwiki&frwiki' --keep id,labels,sitelinks --languages zh,fr --simplify > subset.ndjson
198205
```

lib/get_format_data.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ const difference = require('lodash.difference')
33
const isPlainObject = require('lodash.isplainobject')
44
const lists = require('./lists')
55
const validate = require('./validate_arguments')
6+
const querystring = require('querystring')
67

78
module.exports = (options) => {
89
var { omit, languages, simplify } = options
@@ -16,6 +17,10 @@ module.exports = (options) => {
1617

1718
var simplifyOptions
1819
if (simplify) {
20+
if (typeof simplify === 'string') {
21+
if (simplify.match(/=/)) simplify = querystring.parse(simplify)
22+
else if (simplify[0] === '{') simplify = JSON.parse(simplify)
23+
}
1924
if (isPlainObject(simplify)) {
2025
simplifyOptions = simplify
2126
simplify = true

lib/program.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module.exports = function () {
1212
.option('-o, --omit <attributes>', help.omit, list)
1313
.option('-k, --keep <attributes>', help.keep, list)
1414
.option('-l, --languages <languages>', help.languages, list)
15-
.option('-s, --simplified', help.simplified)
15+
.option('-s, --simplify <boolean|options>', help.simplify)
1616
// misc
1717
.option('-p, --progress <boolean>', help.progress)
1818
.parse(process.argv)
@@ -47,8 +47,9 @@ const help = {
4747
Specify for which languages labels, descriptions and aliases should be kept.
4848
`,
4949

50-
simplified: `
50+
simplify: `
5151
Flag to simplify claims values. Defaults to false.
52+
Can also accept wikidata-sdk simplify.entity option object as JSON or key=values: wikidata-filter --simplify 'keepQualifiers=true&keepRichValues=true'
5253
`,
5354

5455
progress: `

test/integration.js

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
require('should')
1+
const should = require('should')
22
const { exec } = require('child_process')
33

44
describe('integration', () => {
@@ -12,4 +12,28 @@ describe('integration', () => {
1212
done()
1313
})
1414
})
15+
16+
it('should parse json simplify options', done => {
17+
exec(`cat ./test/fixtures/Q22.json | ./bin/wikidata-filter -s '{"keepQualifiers":true}'`, (err, stdout, stderr) => {
18+
if (err) return done(err)
19+
const entity = JSON.parse(stdout)
20+
entity.id.should.equal('Q22')
21+
should(entity.claims.P1549[2].mainsnak).not.be.ok()
22+
entity.claims.P1549[2].qualifiers.should.be.an.Object()
23+
entity.claims.P1549[2].qualifiers.P518.should.be.an.Object()
24+
done()
25+
})
26+
})
27+
28+
it('should parse url-encoded simplify options', done => {
29+
exec('cat ./test/fixtures/Q22.json | ./bin/wikidata-filter -s "keepQualifiers=true"', (err, stdout, stderr) => {
30+
if (err) return done(err)
31+
const entity = JSON.parse(stdout)
32+
entity.id.should.equal('Q22')
33+
should(entity.claims.P1549[2].mainsnak).not.be.ok()
34+
entity.claims.P1549[2].qualifiers.should.be.an.Object()
35+
entity.claims.P1549[2].qualifiers.P518.should.be.an.Object()
36+
done()
37+
})
38+
})
1539
})

0 commit comments

Comments
 (0)