diff --git a/lib/parse-native.js b/lib/parse-native.js index d0aaf60..041199a 100644 --- a/lib/parse-native.js +++ b/lib/parse-native.js @@ -2,7 +2,7 @@ var es = require('event-stream'); var _ = require('lodash'); var Reservoir = require('reservoir'); -var debug = require('debug')('mongodb-schema:parse-native'); +// var debug = require('debug')('mongodb-schema:parse-native'); /** * Returns the type of value as a string. BSON type aware. Replaces `Object` @@ -27,6 +27,16 @@ var getTypeName = function(value) { var addToField; +var addToValue = function(type, value) { + if (type.name === 'String') { + // crop strings at 10k characters + if (value.length > 10000) { + value = value.slice(0, 10000); + } + } + type.values.pushSome(value); +}; + var addToType = function(path, value, schema) { var typeName = getTypeName(value); var type = schema[typeName] = _.get(schema, typeName, { @@ -51,10 +61,7 @@ var addToType = function(path, value, schema) { } else { type.values = _.get(type, 'values', type.name === 'String' ? new Reservoir(100) : new Reservoir(10000)); - // if (type.values.length < 100) { - // type.values.push(value); - // } - type.values.pushSome(value); + addToValue(type, value); } }; @@ -155,6 +162,20 @@ module.exports = function parse(model) { }; var revived = false; + var finalized = false; + + function cleanup() { + if (!finalized) { + finalizeSchema(schema); + finalized = true; + } + if (model && !revived) { + model.set(schema, { + parse: true + }); + } + revived = true; + } var parser = es.through(function write(obj) { _.each(_.keys(obj), function(key) { @@ -163,35 +184,15 @@ module.exports = function parse(model) { schema.count += 1; this.emit('progress', obj); }, function end() { + cleanup(); this.emit('data', schema); this.emit('end'); }); - var finalizer = es.mapSync(finalizeSchema); - - var pipeline = es.pipeline(parser, finalizer, es.map(function(data, cb) { - // debug('result', data); - if (model) { - model.set(data, { - parse: true - }); - } - revived = true; - cb(null, data); - })); - // pass progress events on to the pipeline wrapper - parser.on('progress', function(data) { - pipeline.emit('progress', data); + parser.on('close', function() { + cleanup(); + this.destroy(); }); - pipeline.on('close', function() { - if (!revived && model) { - debug('closed stream early, reviving model with partial schema.'); - model.set(finalizeSchema(schema), { - parse: true - }); - revived = true; - model.trigger('sync'); - } - }); - return pipeline; + + return parser; }; diff --git a/package.json b/package.json index 4defeec..1c9dc12 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "4.1.0", + "version": "4.1.1", "author": "Thomas Rueckstiess ", "license": "Apache-2.0", "homepage": "http://github.com/mongodb-js/mongodb-schema", @@ -67,8 +67,8 @@ "stats-lite": "^2.0.0", "cli-table": "^0.3.1", "js-yaml": "^3.5.2", - "mongodb": "^2.1.8", - "mongodb-collection-sample": "^1.0.0", + "mongodb": "^2.1.9", + "mongodb-collection-sample": "^1.1.2", "mongodb-extended-json": "^1.6.2", "mongodb-ns": "^1.0.3", "numeral": "^1.5.3",