/
analyzer_peliasStreet.js
190 lines (157 loc) · 7.88 KB
/
analyzer_peliasStreet.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// validate analyzer is behaving as expected
var tape = require('tape'),
elastictest = require('elastictest'),
schema = require('../schema'),
punctuation = require('../punctuation');
module.exports.tests = {};
module.exports.tests.analyze = function(test, common){
test( 'analyze', function(t){
var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasStreet' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up
assertAnalysis( 'lowercase', 'F', ['f']);
assertAnalysis( 'asciifolding', 'Max-Beer-Straße', ['max-beer-strasse']);
assertAnalysis( 'trim', ' f ', ['f'] );
assertAnalysis( 'keyword_street_suffix', 'foo Street', ['foo st'] );
assertAnalysis( 'keyword_street_suffix', 'foo Road', ['foo rd'] );
assertAnalysis( 'keyword_street_suffix', 'foo Crescent', ['foo cres'] );
assertAnalysis( 'keyword_compass', 'north foo', ['n foo'] );
assertAnalysis( 'keyword_compass', 'SouthWest foo', ['sw foo'] );
assertAnalysis( 'remove_ordinals', '1st 2nd 3rd 4th 5th', ['1 2 3 4 5'] );
assertAnalysis( 'remove_ordinals', 'Ast th 101st', ['ast th 101'] );
suite.run( t.end );
});
};
module.exports.tests.functional = function(test, common){
test( 'functional', function(t){
var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasStreet' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up
assertAnalysis( 'USA address', 'west 26th street', [ 'w 26 st' ]);
assertAnalysis( 'USA address', 'West 26th Street', [ 'w 26 st' ]);
assertAnalysis( 'USA address', 'w 26th st', [ 'w 26 st' ]);
assertAnalysis( 'USA address', 'WEST 26th STREET', [ 'w 26 st' ]);
assertAnalysis( 'USA address', 'WEST 26th ST', [ 'w 26 st' ]);
suite.run( t.end );
});
};
module.exports.tests.normalize_punctuation = function(test, common){
test( 'normalize punctuation', function(t){
var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasStreet' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up
assertAnalysis( 'single space', 'Chapala Street', [ 'chapala st' ]);
assertAnalysis( 'double space', 'Chapala Street', [ 'chapala st' ]);
assertAnalysis( 'triple space', 'Chapala Street', [ 'chapala st' ]);
assertAnalysis( 'quad space', 'Chapala Street', [ 'chapala st' ]);
suite.run( t.end );
});
};
module.exports.tests.remove_ordinals = function(test, common){
test( 'remove ordinals', function(t){
var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasStreet' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up
assertAnalysis( 'ordindals', "1st", ["1"] );
assertAnalysis( 'ordindals', "22nd", ["22"] );
assertAnalysis( 'ordindals', "333rd", ["333"] );
assertAnalysis( 'ordindals', "4444th", ["4444"] );
assertAnalysis( 'ordindals', "2500th", ["2500"] );
// teens
assertAnalysis( 'teens', "11th", ["11"] );
assertAnalysis( 'teens', "12th", ["12"] );
assertAnalysis( 'teens', "13th", ["13"] );
assertAnalysis( 'teens', "14th", ["14"] );
assertAnalysis( 'teens', "15th", ["15"] );
assertAnalysis( 'teens', "16th", ["16"] );
assertAnalysis( 'teens', "17th", ["17"] );
assertAnalysis( 'teens', "18th", ["18"] );
assertAnalysis( 'teens', "19th", ["19"] );
assertAnalysis( 'teens', "20th", ["20"] );
// teens (hundreds)
assertAnalysis( 'teens - hundreds', "111th", ["111"] );
assertAnalysis( 'teens - hundreds', "112th", ["112"] );
assertAnalysis( 'teens - hundreds', "113th", ["113"] );
assertAnalysis( 'teens - hundreds', "114th", ["114"] );
assertAnalysis( 'teens - hundreds', "115th", ["115"] );
assertAnalysis( 'teens - hundreds', "116th", ["116"] );
assertAnalysis( 'teens - hundreds', "117th", ["117"] );
assertAnalysis( 'teens - hundreds', "118th", ["118"] );
assertAnalysis( 'teens - hundreds', "119th", ["119"] );
assertAnalysis( 'teens - hundreds', "120th", ["120"] );
// teens (wrong suffix)
assertAnalysis( 'teens - wrong suffix', "11st", ["11st"] );
assertAnalysis( 'teens - wrong suffix', "12nd", ["12nd"] );
assertAnalysis( 'teens - wrong suffix', "13rd", ["13rd"] );
assertAnalysis( 'teens - wrong suffix', "111st", ["111st"] );
assertAnalysis( 'teens - wrong suffix', "112nd", ["112nd"] );
assertAnalysis( 'teens - wrong suffix', "113rd", ["113rd"] );
// uppercase
assertAnalysis( 'uppercase', "1ST", ["1"] );
assertAnalysis( 'uppercase', "22ND", ["22"] );
assertAnalysis( 'uppercase', "333RD", ["333"] );
assertAnalysis( 'uppercase', "4444TH", ["4444"] );
// autocomplete
assertAnalysis( 'autocomplete', "26", ["26"] );
assertAnalysis( 'autocomplete', "26t", ["26"] );
assertAnalysis( 'autocomplete', "26th", ["26"] );
assertAnalysis( 'autocomplete', "3", ["3"] );
assertAnalysis( 'autocomplete', "3r", ["3"] );
assertAnalysis( 'autocomplete', "3rd", ["3"] );
// wrong suffix
assertAnalysis( 'wrong suffix (do nothing)', "0th", ["0th"] );
assertAnalysis( 'wrong suffix (do nothing)', "26s", ["26s"] );
assertAnalysis( 'wrong suffix (do nothing)', "26st", ["26st"] );
assertAnalysis( 'wrong suffix (do nothing)', "31t", ["31t"] );
assertAnalysis( 'wrong suffix (do nothing)', "31th", ["31th"] );
assertAnalysis( 'wrong suffix (do nothing)', "21r", ["21r"] );
assertAnalysis( 'wrong suffix (do nothing)', "21rd", ["21rd"] );
assertAnalysis( 'wrong suffix (do nothing)', "29n", ["29n"] );
assertAnalysis( 'wrong suffix (do nothing)', "29nd", ["29nd"] );
suite.run( t.end );
});
};
module.exports.tests.tokenizer = function(test, common){
test( 'tokenizer', function(t){
var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasStreet' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up
// specify 2 streets with a delimeter
assertAnalysis( 'forward slash', 'Bedell Street/133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'forward slash', 'Bedell Street /133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'forward slash', 'Bedell Street/ 133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'back slash', 'Bedell Street\\133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'back slash', 'Bedell Street \\133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'back slash', 'Bedell Street\\ 133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'comma', 'Bedell Street,133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'comma', 'Bedell Street ,133rd Avenue', [ 'bedell st', '133 ave' ]);
assertAnalysis( 'comma', 'Bedell Street, 133rd Avenue', [ 'bedell st', '133 ave' ]);
suite.run( t.end );
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('peliasStreet: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
function analyze( suite, t, analyzer, comment, text, expected ){
suite.assert( function( done ){
suite.client.indices.analyze({
index: suite.props.index,
analyzer: analyzer,
text: text
}, function( err, res ){
if( err ) console.error( err );
t.deepEqual( simpleTokens( res.tokens ), expected, comment );
done();
});
});
}
function simpleTokens( tokens ){
return tokens.map( function( t ){
return t.token;
});
}