Skip to content

Commit 642eaa3

Browse files
committed
claim filters: add support for logical operators
fix #17
1 parent 2fa8821 commit 642eaa3

File tree

7 files changed

+131
-29
lines changed

7 files changed

+131
-29
lines changed

docs/how_to.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
- [Filter entities](#filter-entities)
1010
- [By claims](#by-claims)
11+
- [claims logical operators](#claims-logical-operators)
1112
- [Long claim option](#long-claim-option)
1213
- [By sitelinks](#by-sitelinks)
1314
- [By type](#by-type)
@@ -45,6 +46,13 @@ this can be quite convinient when you don't have enough space to keep the whole
4546

4647
Of course, **this probably only make sense if the kind of entities you are looking for is somewhere above 100 000 units(?)**, given that under this level, it would probably be faster/more efficient to get the list of ids from [Wikidata Query](http://query.wikidata.org/), then [get the entities data from the API](https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities) ([wikidata-sdk](https://github.com/maxlath/wikidata-sdk#get-entities-by-id) can be helpful there).
4748

49+
#### claims logical operators
50+
You can use logical operators:
51+
```sh
52+
// the claim filter is equivalent to (P31:Q571 && (P50 || P110))
53+
cat entities.json | wikidata-filter --claim 'P31:Q571&P50|P110' > books_with_an_author_or_an_illustrator.ndjson
54+
```
55+
4856
#### Long claim option
4957
If [your claim is too long and triggers a `Argument list too long` error](https://github.com/maxlath/wikidata-filter/issues/13), you can pass a file instead:
5058
```sh

lib/get_filter_by_claims_data.js

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,29 @@ const path = require('path')
33
const validate = require('./validate_arguments')
44
const hash = require('./hash')
55

6-
module.exports = claimOption => {
7-
if (claimOption == null) return
6+
module.exports = conjonctiveClaimsFilter => {
7+
if (conjonctiveClaimsFilter == null) return
88

99
// Accept a path as claim option to work around long arguments limitations
1010
try {
11-
claimOption = fs.readFileSync(path.resolve(claimOption)).toString().trim()
11+
conjonctiveClaimsFilter = fs.readFileSync(path.resolve(conjonctiveClaimsFilter)).toString().trim()
1212
} catch (err) {
1313
if (err.code !== 'ENOENT') throw err
1414
}
1515

16-
validate.claim(claimOption)
16+
validate.claims(conjonctiveClaimsFilter)
1717

18-
var [ P, Q ] = claimOption.split(':')
18+
return conjonctiveClaimsFilter
19+
.split(/&/g)
20+
.map(disjonctiveClaimsFilter => {
21+
return disjonctiveClaimsFilter
22+
.split(/\|/g)
23+
.map(claimsData)
24+
})
25+
}
26+
27+
const claimsData = claim => {
28+
var [ P, Q ] = claim.split(':')
1929
const negatedProp = P[0] === '~'
2030
const filterByClaimValue = Q != null
2131
if (negatedProp) P = P.slice(1)

lib/valid_claims.js

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11
const wdk = require('wikidata-sdk')
22
const haveAMatch = require('./have_a_match')
3+
const every = require('lodash.every')
4+
const some = require('lodash.some')
35

4-
module.exports = (claims, filterByClaimData) => {
5-
if (!filterByClaimData) return true
6+
module.exports = (claims, conjonctiveClaimsFilter) => {
7+
if (!conjonctiveClaimsFilter) return true
8+
// console.log({ conjonctiveClaimsFilter })
9+
// ex: P31:Q571&P50|P110 => P31:Q571 && (P50 || P110)
10+
return every(conjonctiveClaimsFilter, anyDisjonctiveFilter(claims))
11+
}
12+
13+
const anyDisjonctiveFilter = claims => disjonctiveClaimsFilter => {
14+
return some(disjonctiveClaimsFilter, validClaim(claims))
15+
}
616

7-
const { P, QHash, negatedProp, filterByClaimValue } = filterByClaimData
17+
const validClaim = claims => claimsFilterData => {
18+
const { P, QHash, negatedProp, filterByClaimValue } = claimsFilterData
819
// filter-out this entity unless it has claims of the desired property
920
var propClaims = claims[P]
1021

lib/validate_arguments.js

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@ const languageCode = /^[a-z]{2}(-[a-z]{2})?$/
33
// ~ stands for negated properties
44
const isPropertyId = (str) => /^~?P\d+$/.test(str)
55

6+
const validateClaim = claim => {
7+
const [ P, Q ] = claim.split(':')
8+
if (!(isPropertyId(P))) throw new Error(`invalid claim property: ${P}`)
9+
if (Q) {
10+
const Qs = Q.split(',')
11+
for (let q of Qs) {
12+
if (!(isItemId(q))) throw new Error(`invalid claim value: ${q}`)
13+
}
14+
}
15+
}
16+
617
module.exports = {
718
value: function (label, values, list) {
819
for (let attr of values) {
@@ -14,14 +25,5 @@ module.exports = {
1425
if (!(languageCode.test(lang))) throw new Error(`invalid language: ${lang}`)
1526
}
1627
},
17-
claim: claim => {
18-
const [ P, Q ] = claim.split(':')
19-
if (!(isPropertyId(P))) throw new Error(`invalid claim property: ${P}`)
20-
if (Q) {
21-
const Qs = Q.split(',')
22-
for (let q of Qs) {
23-
if (!(isItemId(q))) throw new Error(`invalid claim value: ${q}`)
24-
}
25-
}
26-
}
28+
claims: claims => claims.split(/\||&/g).forEach(validateClaim)
2729
}

package-lock.json

Lines changed: 40 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
"check-node-version": "^2.1.0",
2626
"commander": "^2.9.0",
2727
"lodash.difference": "^4.2.0",
28+
"lodash.every": "^4.6.0",
2829
"lodash.pick": "^4.2.0",
30+
"lodash.some": "^4.6.0",
2931
"split": "^1.0.0",
3032
"through": "^2.3.8",
3133
"wikidata-sdk": "^5.1.0"

test/claims.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,44 @@ describe('claims', function () {
4747
done()
4848
})
4949
})
50+
51+
describe('disjonctive claims', function () {
52+
it('should return the entity if it has one of the specified claim', function (done) {
53+
const result = wdFilter({ claim: 'P31:Q571|P300' })(parsedEntity)
54+
should(result).equal(parsedEntity)
55+
done()
56+
})
57+
58+
it('should support multiple claim values', function (done) {
59+
const result = wdFilter({ claim: 'P6214214|P31:Q571,Q3336843' })(parsedEntity)
60+
should(result).equal(parsedEntity)
61+
done()
62+
})
63+
64+
it('should support negative claims', function (done) {
65+
const result = wdFilter({ claim: '~P31|~P2002' })(parsedEntity)
66+
should(result).equal(parsedEntity)
67+
done()
68+
})
69+
})
70+
71+
describe('conjonctive claims', function () {
72+
it("should not return the entity if it doesn't have all the specified claim", function (done) {
73+
const result = wdFilter({ claim: 'P31:Q571&P300' })(parsedEntity)
74+
should(result).be.null()
75+
done()
76+
})
77+
78+
it('should return the entity if it has all the specified claim', function (done) {
79+
const result = wdFilter({ claim: 'P31:Q3336843&P300' })(parsedEntity)
80+
should(result).equal(parsedEntity)
81+
done()
82+
})
83+
84+
it('should support negative claims', function (done) {
85+
const result = wdFilter({ claim: 'P31:Q3336843&P300&~P2002' })(parsedEntity)
86+
should(result).equal(parsedEntity)
87+
done()
88+
})
89+
})
5090
})

0 commit comments

Comments
 (0)