-
-
Notifications
You must be signed in to change notification settings - Fork 160
/
address_search_using_ids.js
200 lines (173 loc) · 7.01 KB
/
address_search_using_ids.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
const _ = require('lodash');
const peliasQuery = require('pelias-query');
const defaults = require('./search_defaults');
//------------------------------
// general-purpose search query
//------------------------------
const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery();
// scoring boost
addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( ) );
// --------------------------------
// non-scoring hard filters
addressUsingIdsQuery.filter( peliasQuery.view.boundary_country );
addressUsingIdsQuery.filter( peliasQuery.view.boundary_circle );
addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect );
addressUsingIdsQuery.filter( peliasQuery.view.sources );
addressUsingIdsQuery.filter( peliasQuery.view.boundary_gid );
// --------------------------------
// This query is a departure from traditional Pelias queries where textual
// names of admin areas were looked up. This query uses the ids returned by
// placeholder for lookups which dramatically reduces the amount of information
// that ES has to store and allows us to have placeholder handle altnames on
// behalf of Pelias.
//
// For the happy path, an input like '30 West 26th Street, Manhattan' would result
// in:
// neighbourhood_id in []
// borough_id in [421205771]
// locality_id in [85945171, 85940551, 85972655]
// localadmin_id in [404502889, 404499147, 404502891, 85972655]
//
// Where the ids are for all the various Manhattans. Each of those could
// conceivably be the Manhattan that the user was referring to so so all must be
// queried for at the same time.
//
// A counter example for this is '1 West Market Street, York, PA' where York, PA
// can be interpreted as a locality OR county. From experience, when there's
// ambiguity between locality and county for an input, the user is, with complete
// metaphysical certitude, referring to the city. If they were referring to the
// county, they would have entered 'York County, PA'. The point is that it's
// insufficient to just query for all ids because, in this case, '1 West Market Street'
// in other cities in York County, PA would be returned and would be both jarring
// to the user and almost certainly leads to incorrect results. For example,
// the following could be returned (all are towns in York County, PA):
// - 1 West Market Street, Dallastown, PA
// - 1 West Market Street, Fawn Grove, PA
// - 1 West Market Street, Shrewsbury, PA
// etc.
//
// To avoid this calamitous response, this query takes the approach of
// "granularity bands". That is, if there are any ids in the first set of any
// of these granularities:
// - neighbourhood
// - borough
// - locality
// - localadmin
// - region
// - macroregion
// - dependency
// - country
//
// then query for all ids in only those layers. Falling back, if there are
// no ids in those layers, query for the county/macrocounty layers.
//
// This methodology ensures that no happened-to-match-on-county results are returned.
//
// The decision was made to include all other layers in one to solve the issue
// where a country and city share a name, such as Mexico, which could be
// interpreted as a country AND city (in Missouri). The data itself will sort
// out which is correct. That is, it's unlikely that "11 Rock Springs Dr" exists
// in Mexico the country due to naming conventions and would be filtered out
// (though it could, but that's good because it's legitimate)
const granularity_bands = [
['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'],
['county', 'macrocounty']
];
// returns IFF there are *any* results in the granularity band
function anyResultsAtGranularityBand(results, band) {
return results.some(result => _.includes(band, result.layer));
}
// returns the ids of results at the requested layer
function getIdsAtLayer(results, layer) {
return results.filter(result => result.layer === layer).map(_.property('source_id'));
}
/**
map request variables to query variables for all inputs
provided by this HTTP request. This function operates on res.data which is the
Document-ified placeholder repsonse.
**/
function generateQuery( clean, res ){
const vs = new peliasQuery.Vars( defaults );
const results = _.defaultTo(res.data, []);
// sources
if( !_.isEmpty(clean.sources) ) {
vs.var( 'sources', clean.sources);
}
// size
if( clean.querySize ) {
vs.var( 'size', clean.querySize );
}
if( ! _.isEmpty(clean.parsed_text.number) ){
vs.var( 'input:housenumber', clean.parsed_text.number );
}
if( ! _.isEmpty(clean.parsed_text.unit) ){
vs.var( 'input:unit', clean.parsed_text.unit );
}
if( ! _.isEmpty(clean.parsed_text.postalcode) ){
vs.var( 'input:postcode', clean.parsed_text.postalcode );
}
vs.var( 'input:street', clean.parsed_text.street );
// find the first granularity band for which there are results
const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band));
// if there's a granularity band, accumulate the ids from each layer in the band
// into an object mapping layer->ids of those layers
if (granularity_band) {
const layers_to_ids = granularity_band.reduce((acc, layer) => {
acc[layer] = getIdsAtLayer(res.data, layer);
return acc;
}, {});
// use an object here instead of calling `set` since that flattens out an
// object into key/value pairs and makes identifying layers harder in query module
vs.var('input:layers', layers_to_ids);
}
// focus point
if( _.isFinite(clean['focus.point.lat']) &&
_.isFinite(clean['focus.point.lon']) ){
vs.set({
'focus:point:lat': clean['focus.point.lat'],
'focus:point:lon': clean['focus.point.lon']
});
}
// boundary rect
if( _.isFinite(clean['boundary.rect.min_lat']) &&
_.isFinite(clean['boundary.rect.max_lat']) &&
_.isFinite(clean['boundary.rect.min_lon']) &&
_.isFinite(clean['boundary.rect.max_lon']) ){
vs.set({
'boundary:rect:top': clean['boundary.rect.max_lat'],
'boundary:rect:right': clean['boundary.rect.max_lon'],
'boundary:rect:bottom': clean['boundary.rect.min_lat'],
'boundary:rect:left': clean['boundary.rect.min_lon']
});
}
// boundary circle
if( _.isFinite(clean['boundary.circle.lat']) &&
_.isFinite(clean['boundary.circle.lon']) ){
vs.set({
'boundary:circle:lat': clean['boundary.circle.lat'],
'boundary:circle:lon': clean['boundary.circle.lon']
});
if( _.isFinite(clean['boundary.circle.radius']) ){
vs.set({
'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km'
});
}
}
// boundary country
if( _.isArray(clean['boundary.country']) && !_.isEmpty(clean['boundary.country']) ){
vs.set({
'boundary:country': clean['boundary.country'].join(' ')
});
}
// boundary gid
if ( _.isString(clean['boundary.gid']) ){
vs.set({
'boundary:gid': clean['boundary.gid']
});
}
return {
type: 'address_search_using_ids',
body: addressUsingIdsQuery.render(vs)
};
}
module.exports = generateQuery;