Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
80 lines (79 sloc) 2.94 KB
recipes:
deaths_matching:
input: deaths
steps:
- join:
type: elasticsearch
dataset: deaths
prefix: hit_
#keep_unmatched: True
query:
size: 40
query:
bool:
must:
- bool:
should:
- bool:
must:
- match:
matchid_name_match:
query: matchid_name_last_match
fuzziness: auto
- match:
matchid_date_birth_str: matchid_date_birth_str
- bool:
must:
- match:
matchid_name_match: matchid_name_last_match
- match:
matchid_date_birth_str:
query: matchid_date_birth_str
fuzziness: 1
minimum_should_match: 1
should:
- bool:
should:
- span_first:
match:
span_term:
matchid_name_match: matchid_name_last_match
end: 1
- match:
matchid_name_match: matchid_name_first_match
- match:
matchid_location_city: matchid_location_city
- match:
matchid_location_country: matchid_location_country
minimum_should_match: 2
- rename:
hit_matchid_hit_number: hit_total
hit_matchid_score_es_max: hit_max_score
hit_matchid_score_es: hit_score
- keep:
select: .*
where: hit_matchid_score_es > min(max(hit_matchid_hit_number,15),30)
- scoring:
- keep:
select: .*
where: confiance > 20
# group aggregations for multimatching
- groupby:
select: matchid_id
transform:
- hit_matchid_id: count
- matchid_hit_score: max
- groupby:
select: matchid_id
rank:
- matchid_hit_score
# filtering of multimatch (filter highly confusing names)
- keep:
select: .*
where: (matchid_hit_score_rank == 1) | (matchid_hit_score > min(0.5,matchid_hit_score_max-1))
- eval:
- confiance: round(100*matchid_hit_score)
#- diff: # optionnal diff method which display difference with hits
#- keep:
# select: .*(diff.*|confiance|score_name|score_location|score_sex|score_date|hit_distance)$
# where: confiance>10
You can’t perform that action at this time.