Skip to content

Commit

Permalink
feat(analysis): append ordinal suffix to numeric street names (#508)
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Jun 14, 2022
1 parent b96f7dd commit 0a650e4
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 2 deletions.
4 changes: 4 additions & 0 deletions lib/analysis/Token.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ class Token {
return _.isString(this.body) && !_.isEmpty(this.body);
}

isNumeric() {
return /^\d+$/.test(this.body);
}

findCase() {
if (this.body === _.toLower(this.body)) { return Token.LOWERCASED; }
if (this.body === _.toUpper(this.body)) { return Token.UPPERCASED; }
Expand Down
45 changes: 45 additions & 0 deletions lib/analysis/ordinals.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
const _ = require('lodash');

// The ordinal function replaces all numeric street names (ie. 30 street)
// with a version including ordinals (ie. 30th street).
// note: this is currently only configured for the English language

function ordinals(opts) {
return (tokens) => {

// consider all but final token
for (var o = 0; o < tokens.length-1; o++) {

// token must be entirely numeric
if (!tokens[o].isNumeric()) { continue; }

// token must be followed by a street type token
if (!_.has(opts.dict.streetTypes, _.toLower(tokens[o+1].body))) { continue; }

// token must either be the leftmost token or be preceeded by a directional token
if(o !== 0) {
if (!_.has(opts.dict.directionalExpansions, _.toLower(tokens[o-1].body))) {
continue;
}
}

// append the english ordinal suffix
tokens[o].body += englishOrdinalSuffix(tokens[o].body);

// maximum of one replacement
break;
}

return tokens;
};
}

function englishOrdinalSuffix(i) {
const j = i % 10, k = i % 100;
if (j === 1 && k !== 11) { return 'st'; }
if (j === 2 && k !== 12) { return 'nd'; }
if (j === 3 && k !== 13) { return 'rd'; }
return 'th';
}

module.exports = ordinals;
4 changes: 4 additions & 0 deletions lib/cleanup_v2.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const _ = require('lodash');
const dictionary = require('./analysis/dictionary');
const synonyms = require('./analysis/synonyms');
const ordinals = require('./analysis/ordinals');
const Token = require('./analysis/Token');

/**
Expand Down Expand Up @@ -116,6 +117,9 @@ function cleanupStreetName(input) {
// capitalize lowercased tokens (leaving mixed case tokens unchanged)
tokens.forEach(token => token.selectivelyCapitalize());

// add ordinals to english numeric street names
tokens = ordinals({ dict })(tokens);

// convert objects to strings and join by whitespace
return tokens.map(token => token.body).join(' ');
}
Expand Down
25 changes: 23 additions & 2 deletions test/cleanup_v2.js
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,27 @@ tape('contract english diagonals - last token position', (t) => {
t.end();
});

// add missing English street name ordinals
tape('add missing English street name ordinals', (t) => {
t.equal(analyzer('W 26 St'), 'West 26th Street');
t.equal(analyzer('W 26th St'), 'West 26th Street');
t.equal(analyzer('1 St'), '1st Street');
t.equal(analyzer('2 Rd'), '2nd Road');
t.equal(analyzer('3 Ave'), '3rd Avenue');
t.equal(analyzer('4 Ln'), '4th Lane');
t.equal(analyzer('11 St'), '11th Street');
t.equal(analyzer('12 Rd'), '12th Road');
t.equal(analyzer('13 Ave'), '13th Avenue');
t.equal(analyzer('14 Ln'), '14th Lane');
t.equal(analyzer('101 St'), '101st Street');
t.equal(analyzer('102 Rd'), '102nd Road');
t.equal(analyzer('103 Ave'), '103rd Avenue');
t.equal(analyzer('104 Ln'), '104th Lane');
t.equal(analyzer('no 1 st'), 'No 1 Street');
t.equal(analyzer('no #1 st'), 'No #1 Street');
t.end();
});

// --- NOOP inputs which should never change ---

// no-ops, these inputs should not change regardless of the algorithm used
Expand Down Expand Up @@ -223,7 +244,7 @@ tape('misc', (t) => {
t.equal(analyzer('YELLOWSTONE BLVD'), 'Yellowstone Boulevard');
t.equal(analyzer('YESHIVA LN'), 'Yeshiva Lane');
t.equal(analyzer('WYGANT PL'), 'Wygant Place');
t.equal(analyzer('W 262 ST'), 'West 262 Street');
t.equal(analyzer('W 262 ST'), 'West 262nd Street');
t.equal(analyzer('W 26TH ST'), 'West 26th Street');
t.equal(analyzer('WILLIE MC DONALD WAY'), 'Willie Mc Donald Way');
t.equal(analyzer('West 93rd Street'), 'West 93rd Street');
Expand All @@ -232,7 +253,7 @@ tape('misc', (t) => {
t.equal(analyzer('E HAMPTON BLVD'), 'East Hampton Boulevard');
t.equal(analyzer('MARATHON PKWY'), 'Marathon Parkway');
t.equal(analyzer('ANDREWS AVE S'), 'Andrews Avenue South');
t.equal(analyzer('W 13 ST'), 'West 13 Street');
t.equal(analyzer('W 13 ST'), 'West 13th Street');
t.end();
});

Expand Down

0 comments on commit 0a650e4

Please sign in to comment.