Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trim leading zeroes from house number #110

Merged
merged 5 commits into from May 19, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion import.js
Expand Up @@ -10,6 +10,9 @@ var logger = require( 'pelias-logger' ).get( 'openaddresses' );
var parameters = require( './lib/parameters' );
var importPipeline = require( './lib/importPipeline' );

var adminLookupStream = require('./lib/streams/adminLookupStream');
var deduplicatorStream = require('./lib/streams/deduplicatorStream');

// Pretty-print the total time the import took.
function startTiming() {
var startTime = new Date().getTime();
Expand All @@ -32,5 +35,9 @@ else {

var files = parameters.getFileList(peliasConfig, args);

importPipeline.create( files, args );
var deduplicator = deduplicatorStream.create(args.deduplicate);
var adminLookup = adminLookupStream.create(args.adminValues, peliasConfig);

importPipeline.create( files, args.dirPath, deduplicator, adminLookup );

}
10 changes: 4 additions & 6 deletions lib/importPipeline.js
@@ -1,7 +1,5 @@
var logger = require( 'pelias-logger' ).get( 'openaddresses' );
var peliasConfig = require( 'pelias-config' ).generate();
var adminLookupStream = require('./streams/adminLookupStream');
var deduplicatorStream = require('./streams/deduplicatorStream');
var recordStream = require('./streams/recordStream');
var model = require( 'pelias-model' );
var peliasDbclient = require( 'pelias-dbclient' );
Expand All @@ -22,14 +20,14 @@ var isUSorCAHouseNumberZero = require( './streams/isUSorCAHouseNumberZero' );
* OpenAddresses doesn't contain any) using `admin-lookup`. See the
* documentation: https://github.com/pelias/admin-lookup
*/
function createFullImportPipeline( files, opts, finalStream ){
function createFullImportPipeline( files, dirPath, deduplicatorStream, adminLookupStream, finalStream ){
logger.info( 'Importing %s files.', files.length );

finalStream = finalStream || peliasDbclient();

recordStream.create(files, opts.dirPath)
.pipe(deduplicatorStream.create(opts.deduplicate))
.pipe(adminLookupStream.create(opts.adminValues, peliasConfig))
recordStream.create(files, dirPath)
.pipe(deduplicatorStream)
.pipe(adminLookupStream)
.pipe(isUSorCAHouseNumberZero.create())
.pipe(model.createDocumentMapperStream())
.pipe(finalStream);
Expand Down
3 changes: 3 additions & 0 deletions lib/streams/cleanupStream.js
@@ -1,4 +1,5 @@
var through2 = require( 'through2' );
var _ = require('lodash');

var cleanup = require( '../cleanup' );

Expand All @@ -18,6 +19,8 @@ function createCleanupStream() {
}
});

record.NUMBER = _.trimStart(record.NUMBER, '0');

next(null, record);
});
}
Expand Down
8 changes: 7 additions & 1 deletion lib/streams/isUSorCAHouseNumberZero.js
@@ -1,14 +1,20 @@
var filter = require('through2-filter');
var _ = require('lodash');

var allZeros = /^0+$/;

function isZeroHouseNumber(record) {
return allZeros.test(record.address_parts.number);
}

function isUSorCA(record) {
return _.isEqual(record.parent.country_a, ['USA']) ||
_.isEqual(record.parent.country_a, ['CAN']);
}

module.exports.create = function create() {
return filter.obj(function(record) {
if (record.address_parts.number === '0' && isUSorCA(record)) {
if (isZeroHouseNumber(record) && isUSorCA(record)) {
return false;
}
return true;
Expand Down
3 changes: 2 additions & 1 deletion package.json
Expand Up @@ -16,7 +16,8 @@
"pelias-model": "4.0.0",
"pelias-wof-admin-lookup": "^2.0.0",
"through2": "2.0.1",
"through2-filter": "^2.0.0"
"through2-filter": "^2.0.0",
"through2-map": "^2.0.0"
},
"devDependencies": {
"deep-diff": "^0.3.3",
Expand Down