diff --git a/README.md b/README.md index ec87c1f..c36d0d1 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,19 @@ -# mongodb-schema - -[![build status](https://secure.travis-ci.org/mongodb-js/mongodb-schema.png)](http://travis-ci.org/mongodb-js/mongodb-schema) -[![Coverage Status](https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg)](https://coveralls.io/r/mongodb-js/mongodb-schema) -[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +# mongodb-schema [![][npm_img]][npm_url] [![][travis_img]][travis_url] [![][coverage_img]][coverage_url] [![][gitter_img]][gitter_url] Infer a probabilistic schema for a MongoDB collection. +A high-level view of the class interactions is as follows: + +![](./docs/mongodb-schema_diagram.png) + ## Example -`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. As well, we'll need some data -in a collection to derive the schema of: +`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. +As well, we'll need some data in a collection to derive the schema of. + +Make sure you have a `mongod` running on localhost on port 27017 (or change the example accordingly). Then, do: -1. `npm install mongodb mongodb-schema`. +1. `npm install mongodb mongodb-schema` 2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test` 3. Create a new file `parse-schema.js` and paste in the following code: ```javascript @@ -30,63 +32,92 @@ in a collection to derive the schema of: }); ``` 4. When we run the above with `node parse-schema.js`, we'll see something - like the following: + like the following (some fields not present here for clarity): ```javascript { - ns: 'test.test', - count: 4, // The number of documents sampled - fields: [ // A collection of Field objects @see lib/field.js + "count": 4, // parsed 4 documents + "ns": "test.test", // namespace + "fields": [ // an array of Field objects, @see `./lib/field.js` { - name: "_id", - probability: 1, // Just as we expected, all 4 documents had `_id` - unique: 4, // All 4 values for `_id` were unique - types: [ + "name": "_id", + "count": 4, // 4 documents counted with _id + "type": "Number", // the type of _id is `Number` + "probability": 1, // all documents had an _id field + "unique": 4, // 4 unique values found + "has_duplicates": false, // therefore no duplicates + "types": [ // an array of Type objects, @see `./lib/types/` { - name: "Number", // The only type seen was a Number - probability: 1, - unique: 4 + "name": "Number", // name of the type + "count": 4, // 4 numbers counted + "probability": 1, + "unique": 4, + "values": [ // array of encountered values + 1, + 2, + 3, + 4 + ] } ] }, { - name: "a", // Unlike `_id`, `a` was present in only 3 of 4 documents - probability: 0.75, - unique: 3, // Of the 3 values seen, all 3 were unique - // As expected, Boolean, String, and Number values were seen. - // A handy instance of `Undefined` is also provided to represent missing data", + "name": "a", + "count": 3, // only 3 documents with field `a` counted + "probability": 0.75, // hence probability 0.75 + "type": [ // found these types + "Boolean", + "String", + "Number", + "Undefined" // for convenience, we treat Undefined as its own type + ], + "unique": 3, + "has_duplicates": false, // there were no duplicate values "types": [ { - name: "Boolean", - probability: 0.25, - unique: 1 + "name": "Boolean", + "count": 1, + "probability": 0.25, // probabilities for types are calculated factoring in Undefined + "unique": 1, + "values": [ + true + ] }, { - name: "String", - probability: 0.25, - unique: 1 + "name": "String", + "count": 1, + "probability": 0.25, + "unique": 1, + "values": [ + "true" + ] }, { - name: "Number", - probability: 0.25, - unique: 1 + "name": "Number", + "count": 1, + "probability": 0.25, + "unique": 1, + "values": [ + 1 + ] }, { - name: "Undefined", - probability: 0.25 + "name": "Undefined", + "count": 1, + "probability": 0.25, + "unique": 0 } ] } ] } - ``` +``` ### More Examples `mongodb-schema` supports all [BSON types][bson-types]. Checkout [the tests][tests] for more usage examples. - ## Installation ``` @@ -108,9 +139,6 @@ Apache 2.0 Under the hood, `mongodb-schema` uses [ampersand-state][ampersand-state] and [ampersand-collection][ampersand-collection] for modeling [Schema][schema], [Field][field]'s, and [Type][type]'s. -A high-level view of the class interactions is as follows: - -![](./docs/mongodb-schema_diagram.png) @@ -121,3 +149,12 @@ A high-level view of the class interactions is as follows: [schema]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/schema.js [field]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/field.js [type]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/type.js + +[travis_img]: https://secure.travis-ci.org/mongodb-js/mongodb-schema.svg?branch=master +[travis_url]: https://travis-ci.org/mongodb-js/mongodb-schema +[npm_img]: https://img.shields.io/npm/v/mongodb-schema.svg +[npm_url]: https://www.npmjs.org/package/mongodb-schema +[coverage_img]: https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg +[coverage_url]: https://coveralls.io/r/mongodb-js/mongodb-schema +[gitter_img]: https://badges.gitter.im/Join%20Chat.svg +[gitter_url]: https://gitter.im/mongodb-js/mongodb-js diff --git a/docs/mongodb-schema_diagram.png b/docs/mongodb-schema_diagram.png index c7003b4..134b829 100644 Binary files a/docs/mongodb-schema_diagram.png and b/docs/mongodb-schema_diagram.png differ diff --git a/examples/parse-schema.js b/examples/parse-schema.js index 42e1e8a..2cfac2e 100644 --- a/examples/parse-schema.js +++ b/examples/parse-schema.js @@ -4,7 +4,7 @@ var connect = require('mongodb'); connect('mongodb://localhost:27017/test', function(err, db){ if(err) return console.error(err); - parseSchema('test', db.collection('test').find(), function(err, schema){ + parseSchema('test.test', db.collection('test').find(), function(err, schema){ if(err) return console.error(err); console.log(JSON.stringify(schema, null, 2)); diff --git a/lib/collection-parent-mixin.js b/lib/collection-parent-mixin.js new file mode 100644 index 0000000..3f27465 --- /dev/null +++ b/lib/collection-parent-mixin.js @@ -0,0 +1,14 @@ +var _ = require('lodash'); +var AmpersandCollection = require('ampersand-collection'); + +/** + * Collection with this mixin passes its parent down to its values + */ +module.exports = { + set: function (models, options) { + options = _.defaults({ + parent: this.parent + }, options || {}); + return AmpersandCollection.prototype.set.call(this, models, options); + } +}; diff --git a/lib/field-collection.js b/lib/field-collection.js index 8ff84fa..2c6ea4e 100644 --- a/lib/field-collection.js +++ b/lib/field-collection.js @@ -1,11 +1,15 @@ var Collection = require('ampersand-collection'); var lodashMixin = require('ampersand-collection-lodash-mixin'); +var parentMixin = require('./collection-parent-mixin'); +var Field = require('./field'); /** * Container for a list of Fields. */ -var FieldCollection = Collection.extend(lodashMixin, { +var FieldCollection = Collection.extend(lodashMixin, parentMixin, { + modelType: 'FieldCollection', mainIndex: 'name', + model: Field, comparator: function (a, b) { // make sure _id is always at top, even in presence of uppercase fields var aId = a.getId(); @@ -15,8 +19,41 @@ var FieldCollection = Collection.extend(lodashMixin, { // otherwise sort case-insensitively return (aId.toLowerCase() < bId.toLowerCase()) ? -1 : 1; }, - model: function(attrs, options) { - return new attrs.klass(attrs, options); + /** + * adds a new name/value pair to the correct field, and creates the + * field first if it doesn't exist yet. Leave it to field.types to + * add the value. + * + * @param {String} name name of the field + * @param {Any} value value to be added + */ + addToField: function (name, value) { + // get or create field + var field = this.get(name); + if (!field) { + field = this.add({ + name: name, + parent: this.parent + }); + if (this.parent) this.parent.trigger('change:fields.length'); + /** + * first time we see this field. We need to compensate for + * the Undefined values we missed so far for this field, + * by setting the count to the parent count and adjusting + * total_count as well. + */ + if (this.parent && this.parent.count > 0) { + var undef = field.types.add({name: 'Undefined'}); + undef.count += this.parent.count; + field.total_count += undef.count; + } + } + // undefined are not counted towards the field's count + if(value !== undefined) field.count += 1; + // but they are counted towards the field's total_count + field.total_count += 1; + field.types.addToType(value); } }); + module.exports = FieldCollection; diff --git a/lib/field.js b/lib/field.js index 6b9cfee..0a15b34 100644 --- a/lib/field.js +++ b/lib/field.js @@ -1,14 +1,12 @@ -var _ = require('lodash'); var State = require('ampersand-state'); var TypeCollection = require('./type-collection'); -var ValueCollection = require('./value-collection'); -var FieldCollection = require('./field-collection'); -var debug = require('debug')('mongodb-schema:field'); +var _ = require('lodash'); /** * Describes a single field in the schema based on sampled values. */ -var Field = State.extend({ +module.exports = State.extend({ + modelType: 'Field', idAttribute: 'name', props: { /** @@ -25,24 +23,10 @@ var Field = State.extend({ type: 'number', default: 0 }, - /** - * Probability of this field being set. - */ - probability: { - type: 'number', - default: 0 - }, - /** - * Number of unique values seen. - */ - unique: { - type: 'number', - default: 0 - }, /** * If using shortened keys to save space, it is expected this be the "real" * name of the field that could be input by the user. For example, - * if `u` is the field's `name`, `username` is the field's title + * if `u` is the field's `name`, `username` is the field's `title` * and is much friendlier for humans. */ title: { @@ -64,7 +48,13 @@ var Field = State.extend({ description: 'string', }, session: { - parent: 'state' + /** + * Number of counts of all children types, including Undefined. + */ + total_count: { + type: 'number', + default: 0 + } }, derived: { /** @@ -84,205 +74,75 @@ var Field = State.extend({ return this.types.pluck('name'); } }, + probability: { + cache: false, + fn: function () { + if (!this.parent) return null; + return this.count / this.parent.count; + } + }, /** - * The total number of documents we would see if always set. - * This differs from `#count` as it is the value used to calculate - * the probability of this field's children. @see #commit() + * How many unique values. Not using cached property here + * because it depends on Type#unique which is also not + * cached to avoid excessive recalculations during parsing. + * @see Type#unique + * @returns {Number} */ - total: { - deps: ['count', 'probability'], - fn: function() { - if (this.probability === 1) return this.count; - var parentIsArray = this.collection.parent.type === 'Array'; - if (parentIsArray) { - return _.sum(this.types.pluck('count')); - } - return (this.count / this.probability); + unique: { + cache: false, + fn: function () { + return _.sum(this.types.pluck('unique')); } }, /** * Does this field contain any duplicate values? + * Not using cached property here because it depends on + * Type#unique which is also not cached to avoid excessive + * recalculations during parsing. + * @see Type#unique * @returns {Boolean} */ has_duplicates: { - deps: ['unique', 'count'], + cache: false, fn: function() { return this.unique < this.count; } + }, + /** + * Convenience alias to access sub-fields. Returns + * undefined if this Field does not have a 'Document' type. + * @returns {FieldCollection} + */ + fields: { + deps: ['types.length'], + fn: function() { + var objType = this.types.get('Document'); + return objType ? objType.fields : undefined; + } + }, + arrayFields: { + deps: ['types.length'], + fn: function() { + var arrType = this.types.get('Array'); + return arrType ? arrType.fields : undefined; + } } }, collections: { /** * Types seen for this field. */ - types: TypeCollection, - /** - * A sample of values seen for this field. - */ - values: ValueCollection, - /** - * Child fields. - */ - fields: FieldCollection - }, - /** - * @constructs Field - */ - initialize: function() { - this.listenTo(this.types, 'add', this.onTypeAdded); - this.listenTo(this.types, 'remove', this.onTypeRemoved); - this.listenTo(this.types, 'reset refresh', this.onTypeReset); - }, - /** - * When new types are added, trigger a change event to recalculate `this.type` - * and add listeners so any operations on `type.values` are reflected on - * `this.values`. - * - * @param {Type} type that's being added. - * @param {TypeCollection} collection the type was added to. - * @param {Object} options - */ - onTypeAdded: function(type) { - /** - * Currently have to manually trigger events on collections so - * derived properties are recalculated at the right time. - * In this case, triggering `change:types.length` will cause - * the `type` property to be recalculated correctly. - */ - this.trigger('change:types.length'); - this.listenTo(type.values, 'add', this.onValueAdded); - this.listenTo(type.values, 'remove', this.onValueRemoved); - this.listenTo(type.values, 'reset', this.onValueReset); - }, - /** - * @see Schema#onTypeAdded - * - * @param {Type} type being removed. - * @param {TypeCollection} collection it was removed from. - * @param {Object} options - */ - onTypeRemoved: function(type) { - this.trigger('change:types.length'); - this.stopListening(type.values, 'add', this.onValueAdded); - this.stopListening(type.values, 'remove', this.onValueRemoved); - this.stopListening(type.values, 'reset', this.onValueReset); - }, - onTypeReset: function() { - this.trigger('change:types.length'); - }, - /** - * @param {ValueCollection} collection the value was added to. - * @param {Value} value being added. - * @param {Object} options - */ - onValueAdded: function(value) { - this.values.add(value); - }, - /** - * @param {ValueCollection} collection the value was removed from. - * @param {Value} value being removed. - * @param {Object} options - */ - onValueRemoved: function(value) { - this.values.remove(value); - }, - onValueReset: function() { - this.values.reset(); - }, - /** - * We've finished parsing a new document! Finalize all of the probabilities - * and make sure all of our child collections are nicely sorted. - * If we have any subfields, call `commit()` on each of those as well. - */ - commit: function() { - var newprob; - newprob = this.count / this.parent.count; - if (newprob !== this.probability) { - this.probability = newprob; - } - var undef = this.types.get('Undefined'); - if ((this.total - this.count) <= 0) { - if(undef){ - debug('removing extraneous Undefined for `%s`', this.getId()); - this.types.remove({ - name: 'Undefined' - }); - } - // No undefined types to manage - } else { - if (!undef) { - debug('adding Undefined for `%s`', this.getId()); - undef = this.types.add({ - name: 'Undefined', - unique: 1 - }); - } - undef.count = (this.total - this.count); - undef.probability = (undef.count - this.count); - } - this.types.map(function(type) { - type.probability = type.count / this.total; - type.unique = _.unique(type.values.pluck('value')).length; - }.bind(this)); - this.unique = _.sum(this.types.pluck('unique')); - this.types.sort(); - - if (this.fields.length > 0) { - this.fields.map(function(field) { - field.commit(); - }); - } + types: TypeCollection }, serialize: function() { var res = this.getAttributes({ props: true, derived: true }, true); - if (this.fields.length > 0) { + res.types = this.types.serialize(); + if (this.fields) { res.fields = this.fields.serialize(); - } else { - res.values = this.values.serialize(); - res.types = this.types.serialize(); } return res; - }, -}); -module.exports = Field; - -/** - * A basic field has no descendant fields, such as `String`, `ObjectID`, - * `Boolean`, or `Date`. - */ -module.exports.Basic = Field.extend({}); - -module.exports.EmbeddedArray = Field.extend({ - props: { - type: { - type: 'string', - default: 'Array' - }, - lengths: { - type: 'array', - default: function() { - return []; - } - } - }, - derived: { - average_length: { - deps: ['lengths'], - fn: function() { - return _.sum(this.lengths) / this.lengths.length; - } - } - } -}); - -module.exports.EmbeddedDocument = Field.extend({ - props: { - type: { - type: 'string', - default: 'Object' - } } }); diff --git a/lib/index.js b/lib/index.js index b181b68..545244e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -14,10 +14,14 @@ module.exports = function(ns, docs, fn) { }); var src; - if(docs.stream){ + // MongoDB Cursors + if(docs.stream && (typeof docs.stream === 'function')) { src = docs.stream(); - } - else{ + // Streams + } else if (docs.pipe && (typeof docs.pipe === 'function')) { + src = docs; + // Arrays + } else { src = es.readArray(docs); } diff --git a/lib/parser.js b/lib/parser.js deleted file mode 100644 index 656c10a..0000000 --- a/lib/parser.js +++ /dev/null @@ -1,82 +0,0 @@ -var _ = require('lodash'); -var getTypeName = require('./type').getNameFromValue; -var Field = require('./field'); - -function onFieldSampled(schema, name, value) { - var typeName = getTypeName(value); - if (typeName === 'Array') { - onEmbeddedArray(schema, name, typeName, value); - } else if (typeName === 'Object') { - onEmbeddedDocument(schema, name, typeName, value); - } else { - onBasicField(schema, name, typeName, value); - } -} - -function onBasicField(schema, name, typeName, value) { - var field = schema.fields.get(name); - if (!field) { - field = schema.fields.add({ - name: name, - klass: Field.Basic, - parent: schema - }); - } - field.count += 1; - - var type = field.types.get(typeName); - if (!type) { - type = field.types.add({ - name: typeName, - }); - } - type.count += 1; - - type.values.add({ - value: value - }); -} - -function onEmbeddedArray(schema, name, typeName, value) { - var field = schema.fields.get(name); - - if (!field) { - field = schema.fields.add({ - name: name, - klass: Field.EmbeddedArray, - parent: schema - }); - } - - field.count += 1; - field.lengths.push(value.length); - field.trigger('change:lengths'); - _.each(value, function(d) { - var typeName = getTypeName(d); - if (typeName === 'Object') { - _.each(d, function(val, key) { - onBasicField(field, key, getTypeName(val), val); - }); - } else { - onBasicField(field, '__basic__', typeName, d); - } - }); -} - -function onEmbeddedDocument(schema, name, typeName, value) { - var field = schema.fields.get(name); - - if (!field) { - field = schema.fields.add({ - name: name, - klass: Field.EmbeddedDocument, - parent: schema - }); - } - field.count += 1; - _.each(value, function(val, key) { - onFieldSampled(field, key, val); - }); -} - -module.exports.parse = onFieldSampled; diff --git a/lib/schema.js b/lib/schema.js index 730691a..c1bd15c 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,47 +1,40 @@ var es = require('event-stream'); var _ = require('lodash'); -var State = require('ampersand-state'); -var parser = require('./parser'); -var FieldCollection = require('./field-collection'); +var DocumentType = require('./types').Document; /** - * The top level schema state. - * @class + * The top level schema document, like a Document type + * but with extra stream interface. */ -var Schema = State.extend({ +var Schema = DocumentType.extend({ idAttribute: 'ns', props: { ns: { type: 'string' }, - count: { - type: 'number', - default: 0 - } - }, - collections: { - fields: FieldCollection - }, - parse: function(doc, done) { - var schema = this; - schema.count += 1; - _.each(doc, function(val, key) { - parser.parse(schema, key, val); - }); - schema.fields.map(function(field) { - field.commit(); - }); - if (_.isFunction(done)) { - done(); + name: { + default: 'Schema' } }, stream: function() { - var schema = this; - return es.map(function(doc, done) { - schema.parse(doc, function(err) { - done(err, doc); - }); + var that = this; + var stream = es.map(function(doc, done) { + that.parse(doc); + that.trigger('data', doc, that); + done(null, doc); + }).on('end', function () { + that.trigger('end', that); }); + return stream; + }, + serialize: function() { + var res = this.getAttributes({ + props: true, + derived: true + }, true); + res = _.omit(res, ['total_count', 'modelType', 'name']); + res.fields = this.fields.serialize(); + return res; } }); diff --git a/lib/type-collection.js b/lib/type-collection.js index 606d3a4..b3529b8 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -1,20 +1,79 @@ var Collection = require('ampersand-collection'); var lodashMixin = require('ampersand-collection-lodash-mixin'); -var type = require('./type'); +var parentMixin = require('./collection-parent-mixin'); var assert = require('assert'); +var _ = require('lodash'); +var Type = require('./types/type'); +var types = require('./types'); -module.exports = Collection.extend(lodashMixin, { +/** + * Helper function to get the type name given a value + * @param {Any} value value to get type for + * @return {String} type name, e.g. Boolean or ObjectID + */ +var getTypeName = function (value) { + var T; + if (_.has(value, '_bsontype')) { + T = value._bsontype; + } else { + T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); + } + // don't want to create naming conflict with javascript Object + if (T === 'Object') T = 'Document'; + return T; +}; + + +module.exports = Collection.extend(lodashMixin, parentMixin, { + modelType: 'TypeCollection', mainIndex: 'name', - model: function(attrs, options) { - var Klass = type[attrs.name]; - assert(Klass, 'No value type for ' + attrs.name); - return new Klass(attrs, options); - }, /** * Sort by probability descending, with Undefined always last. + * Note that the application has to call collection.sort() explicitly; + * it will not be sorted automatically for performance reasons. */ comparator: function(model) { if (model.getId() === 'Undefined') return 0; return model.probability * -1; + }, + model: function(attrs, options) { + // require ./type at runtime to avoid circular imports, does this work with browserify?? + var Klass = types[attrs.name]; + assert(Klass, 'No value type for ' + attrs.name); + return new Klass(attrs, options); + }, + isModel: function(model) { + // require ./type at runtime to avoid circular imports, does this work with browserify?? + return (model instanceof Type); + }, + /** + * adds a new value to the correct type, and creates the type first + * if it doesn't exist yet. + * + * @param {Any} value value to be added + */ + addToType: function(value) { + var newTypeAdded = false; + var typeName = getTypeName(value); + // get or create type + var type = this.get(typeName); + if (!type) { + type = this.add({ + name: typeName + }); + newTypeAdded = true; + } + // leave it to type to add the value + type.parse(value); + if (newTypeAdded && this.parent) this.parent.trigger('change:types.length'); + + // trigger probability change event in all direct type children + this.each(function (type) { + type.trigger('change:probability'); + }); + }, + serialize: function() { + this.sort(); + return Collection.prototype.serialize.call(this); } }); diff --git a/lib/type.js b/lib/type.js deleted file mode 100644 index 2e72514..0000000 --- a/lib/type.js +++ /dev/null @@ -1,151 +0,0 @@ -var State = require('ampersand-state'); -var _ = require('lodash'); -var ValueCollection = require('./value-collection'); - -var Type = State.extend({ - idAttribute: 'name', - props: { - name: { - type: 'string' - }, - count: { - type: 'number', - default: 0 - }, - probability: { - type: 'number', - default: 0 - }, - unique: { - type: 'number', - default: 0 - } - }, - collections: { - values: ValueCollection - } -}); - -exports.getNameFromValue = function(value) { - var T; - if (_.has(value, '_bsontype')) { - T = value._bsontype; - } else { - T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); - } - return T; -}; - -exports.String = Type.extend({ - props: { - name: { - default: 'String' - } - } -}); - -exports.Number = Type.extend({ - props: { - name: { - default: 'Number' - } - } -}); - -exports.Long = Type.extend({ - props: { - name: { - default: 'Long' - } - } -}); - -exports.Null = Type.extend({ - props: { - name: { - default: 'Null' - } - } -}); - -exports.Timestamp = Type.extend({ - props: { - name: { - default: 'Timestamp' - } - } -}); - -exports.Boolean = Type.extend({ - props: { - name: { - default: 'Boolean' - } - } -}); - -exports.Date = Type.extend({ - props: { - name: { - default: 'Date' - } - } -}); - -exports.ObjectID = Type.extend({ - props: { - name: { - default: 'ObjectID' - } - } -}); - -exports.Undefined = Type.extend({ - props: { - name: { - default: 'Undefined' - } - } -}); - -exports.Binary = Type.extend({ - props: { - name: { - default: 'Binary' - } - } -}); - -exports.MaxKey = Type.extend({ - props: { - name: { - default: 'MaxKey' - } - } -}); - -exports.MinKey = Type.extend({ - props: { - name: { - default: 'MinKey' - } - } -}); - -exports.Object = Type.extend({ - props: { - name: { - type: 'string', - default: 'Object' - } - } -}); - -exports.Array = Type.extend({ - props: { - name: { - type: 'string', - default: 'Array' - } - } -}); diff --git a/lib/types/array.js b/lib/types/array.js new file mode 100644 index 0000000..9a9a8f9 --- /dev/null +++ b/lib/types/array.js @@ -0,0 +1,74 @@ +var Type = require('./type'); +var _ = require('lodash'); +var format = require('util').format; +var TypeCollection = require('../type-collection'); +var assert = require('assert'); + +/** + * Arrays have additional .lengths and .average_length properties + * and group their values in a nested .types collection + */ +module.exports.Array = Type.extend({ + props: { + name: { + default: 'Array' + }, + lengths: { + type: 'array', + default: function() { + return []; + } + } + }, + collections: { + types: TypeCollection + }, + derived: { + total_count: { + cache: false, + fn: function() { + return _.sum(this.lengths); + } + }, + average_length: { + deps: ['count'], + fn: function() { + return this.total_count / this.count; + } + }, + /** + * Convenience alias to access sub-fields. Returns + * null if this Field does not have a 'Document' type. + * @returns {FieldCollection} + */ + fields: { + deps: ['types.length'], + fn: function() { + var objType = this.types.get('Document'); + return objType ? objType.fields : undefined; + } + } + }, + parse: function(arr) { + assert(_.isArray(arr), format('value must be array, got `%s`', arr)); + + _.each(arr, function (val) { + this.types.addToType(val); + }.bind(this)); + + this.lengths.push(arr.length); + this.count += 1; + }, + serialize: function() { + var res = this.getAttributes({ + props: true, + derived: true + }, true); + res = _.omit(res, ['total_count', 'modelType']); + res.types = this.types.serialize(); + if (this.fields) { + res.fields = this.fields.serialize(); + } + return res; + } +}); diff --git a/lib/types/constant.js b/lib/types/constant.js new file mode 100644 index 0000000..9ed44e1 --- /dev/null +++ b/lib/types/constant.js @@ -0,0 +1,62 @@ +var Type = require('./type'); + +/** + * Types that do not need to store any values + */ +var ConstantType = Type.extend({ + derived: { + unique: { + deps: ['count'], + fn: function () { + // more than 1 constant value means no longer unique + return Math.min(this.count, 1); + } + } + } +}); + +module.exports.Null = ConstantType.extend({ + props: { + name: { + default: 'Null' + } + } +}); + +module.exports.Undefined = ConstantType.extend({ + props: { + name: { + default: 'Undefined' + } + }, + derived: { + unique: { + fn: function () { + // undefined does not count as a value + return 0; + } + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/MaxKey.html + */ +module.exports.MaxKey = ConstantType.extend({ + props: { + name: { + default: 'MaxKey' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/MinKey.html + */ +module.exports.MinKey = ConstantType.extend({ + props: { + name: { + default: 'MinKey' + } + } +}); diff --git a/lib/types/document.js b/lib/types/document.js new file mode 100644 index 0000000..53ddc73 --- /dev/null +++ b/lib/types/document.js @@ -0,0 +1,43 @@ +var Type = require('./type'); +var FieldCollection = require('../field-collection'); +var assert = require('assert'); +var _ = require('lodash'); +var format = require('util').format; + +/** + * Documents have sub-fields stored in .fields + */ +exports.Document = Type.extend({ + props: { + name: { + default: 'Document' + } + }, + collections: { + fields: FieldCollection + }, + /** + * parse sub-document and add each key/value to this.fields + * @param {Object} obj The sub-document to be parsed + */ + parse: function(obj) { + // parse sub-document and add to this.fields + assert(_.isPlainObject(obj), format('value must be object, got `%s`', obj)); + + /** + * this is a nice way to handle the Undefined values. It iterates over the union + * of object keys and existing field names, automatically inserting undefined + * for the fields that are not in the current object. + * + * Only caveat is that when a field is created for the first time, we need to + * retrospectively bump up the undefined count. That's taken care of in + * @see FieldCollection#addToField + */ + var union = _.union(_.keys(obj), this.fields.pluck('name')); + _.each(union, function(key) { + this.fields.addToField(key, obj[key]); + }.bind(this)); + + this.count += 1; + } +}); diff --git a/lib/types/index.js b/lib/types/index.js new file mode 100644 index 0000000..b5ac699 --- /dev/null +++ b/lib/types/index.js @@ -0,0 +1,8 @@ +var _ = require('lodash'); + +_.extend(module.exports, + require('./constant'), + require('./primitive'), + require('./array'), + require('./document') +); diff --git a/lib/types/primitive.js b/lib/types/primitive.js new file mode 100644 index 0000000..5475b17 --- /dev/null +++ b/lib/types/primitive.js @@ -0,0 +1,166 @@ +var Type = require('./type'); +var _ = require('lodash'); +var ValueCollection = require('../value-collection'); + +/** + * Primitive types store their values and have a .unique counter + */ +var PrimitiveType = exports.PrimitiveType = Type.extend({ + derived: { + unique: { + /** + * we're not using a cache here for performance reasons: listening + * to all add/remove/reset/sync events of PrimitiveType#values and + * recalculating uniqueness after each document is not necessary. + */ + cache: false, + fn: function () { + return _.unique(this.values.pluck('value')).length; + } + } + }, + collections: { + values: ValueCollection + }, + /** + * adds the value to the value collection. Just passing it through. + * @param {Any} value value to be added + */ + parse: function (value) { + this.values.add({value: value}); + this.count += 1; + } +}); + + +// --- Native Javascript Types --- + +exports.String = PrimitiveType.extend({ + props: { + name: { + default: 'String' + } + } +}); + +exports.Number = PrimitiveType.extend({ + props: { + name: { + default: 'Number' + } + } +}); + +exports.Boolean = PrimitiveType.extend({ + props: { + name: { + default: 'Boolean' + } + } +}); + +exports.Date = PrimitiveType.extend({ + props: { + name: { + default: 'Date' + } + } +}); + +exports.RegExp = PrimitiveType.extend({ + props: { + name: { + default: 'RegExp' + } + } +}); + +// --- BSON Types --- + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Double.html + */ +exports.Double = PrimitiveType.extend({ + props: { + name: { + default: 'Double' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Long.html + */ +exports.Long = PrimitiveType.extend({ + props: { + name: { + default: 'Long' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Timestamp.html + */ +exports.Timestamp = PrimitiveType.extend({ + props: { + name: { + default: 'Timestamp' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/ObjectID.html + */ +exports.ObjectID = PrimitiveType.extend({ + props: { + name: { + default: 'ObjectID' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Binary.html + */ +exports.Binary = PrimitiveType.extend({ + props: { + name: { + default: 'Binary' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Symbol.html + */ +exports.Symbol = PrimitiveType.extend({ + props: { + name: { + default: 'Symbol' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/Code.html + */ +exports.Code = PrimitiveType.extend({ + props: { + name: { + default: 'Code' + } + } +}); + +/** + * @see http://mongodb.github.io/node-mongodb-native/2.0/api/DBRef.html + */ +exports.DBRef = PrimitiveType.extend({ + props: { + name: { + default: 'DBRef' + } + } +}); diff --git a/lib/types/type.js b/lib/types/type.js new file mode 100644 index 0000000..536e6e1 --- /dev/null +++ b/lib/types/type.js @@ -0,0 +1,45 @@ +var _ = require('lodash'); +var AmpersandState = require('ampersand-state'); +/** + * Generic Type superclass has name, count, probability properties + */ +module.exports = AmpersandState.extend({ + idAttribute: 'name', + props: { + name: { + type: 'string' + }, + count: { + type: 'number', + default: 0 + } + }, + derived: { + modelType: { + fn: function () { + return this.name; + } + }, + probability: { + cache: false, + fn: function () { + if (!this.parent) return undefined; + return this.count / (this.parent.total_count || this.parent.count); + } + }, + }, + parse: function() { + this.count += 1; + }, + serialize: function() { + var res = this.getAttributes({ + props: true, + derived: true + }, true); + if (this.values) { + res.values = this.values.serialize(); + } + res = _.omit(res, 'modelType'); + return res; + } +}); diff --git a/lib/value-collection.js b/lib/value-collection.js index 076d81a..3262961 100644 --- a/lib/value-collection.js +++ b/lib/value-collection.js @@ -1,8 +1,15 @@ var Collection = require('ampersand-collection'); var lodashMixin = require('ampersand-collection-lodash-mixin'); +var parentMixin = require('./collection-parent-mixin'); var Value = require('./value'); -module.exports = Collection.extend(lodashMixin, { +/** + * Value Collection, stores any values wrapped in `Value` state + * and serializes back to its pure array form. + * @see Value + */ +module.exports = Collection.extend(lodashMixin, parentMixin, { + modelType: 'ValueCollection', mainIndex: 'id', model: Value, serialize: function() { diff --git a/lib/value.js b/lib/value.js index 943ac5e..f3b77e4 100644 --- a/lib/value.js +++ b/lib/value.js @@ -1,6 +1,10 @@ var State = require('ampersand-state'); +/** + * Value, wrapper for any value found in documents. + */ module.exports = State.extend({ + modelType: 'Value', idAttribute: 'id', props: { id: { diff --git a/test/array-object-types.test.js b/test/array-object-types.test.js new file mode 100644 index 0000000..3c98dec --- /dev/null +++ b/test/array-object-types.test.js @@ -0,0 +1,87 @@ +var getSchema = require('../'); +var assert = require('assert'); +var _ = require('lodash'); + + +describe('arrays and objects as type (INT-203 restructuring)', function () { + var docs = [ + { x: [1, 2, 3] }, + { x: "foo" }, + { x: { b: 1 } }, + { x: [ "bar", null, false ] }, + { x: [ {c: 1, d: 1}, {c: 2 } ] }, + { e: 1 } + ]; + + var schema; + + before(function (done) { + schema = getSchema('mixed.mess', docs, done); + }); + + describe('Field', function () { + var x; + + before(function () { + x = schema.fields.get('x'); + }); + + it('have the right type distribution of x', function() { + var dist = _.zipObject( + x.types.pluck('name'), + x.types.pluck('probability') + ); + assert.deepEqual(dist, { + 'Array': 3/6, + 'String': 1/6, + 'Document': 1/6, + 'Undefined': 1/6 + }); + }); + + it('should have an `.fields` alias for convenience', function() { + assert.deepEqual(x.fields, x.types.get('Document').fields); + }); + }); + + + describe('Nested Array', function () { + var arr; + + before(function () { + arr = schema.fields.get('x').types.get('Array'); + }); + + it('should return the lengths of all encountered arrays', function() { + assert.deepEqual(arr.lengths, [3, 3, 2]); + }); + + it('should return the probability of x being an array', function(){ + assert.equal(arr.probability, 3/6); + }); + + it('should return the total count of all containing values', function() { + assert.equal(arr.total_count, 8); + }); + + it('should return the type distribution inside an array', function () { + var arrDist = _.zipObject( + arr.types.pluck('name'), + arr.types.pluck('probability') + ); + assert.deepEqual(arrDist, { + 'Number': 3/8, + 'String': 1/8, + 'Null': 1/8, + 'Boolean': 1/8, + 'Document': 2/8 + }); + }); + + it('should have a `.fields` alias for convenience', function () { + assert.deepEqual(arr.fields, arr.types.get('Document').fields); + }); + + }); + +}); diff --git a/test/array.test.js b/test/array.test.js new file mode 100644 index 0000000..e4d35f2 --- /dev/null +++ b/test/array.test.js @@ -0,0 +1,29 @@ +var ArrayType = require('../lib/types').Array; +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:array'); + + +describe('Array', function () { + var arr; + beforeEach(function () { + arr = new ArrayType(); + }); + + it('should be constructable', function () { + assert.ok(arr); + }); + + it('should trigger types.length events when adding a new type', function (done) { + arr.on('change:types.length', function () { + assert.equal(arr.types.length, 1); + done(); + }); + arr.types.addToType(15); + }); + + it('should update .fields alias correctly', function () { + assert.equal(arr.fields, null); + arr.types.addToType({foo: 1}); + assert.equal(arr.fields, arr.types.get('Document').fields); + }); +}); diff --git a/test/basic-embedded-array.test.js b/test/basic-embedded-array.test.js index cd16420..c79f3c5 100644 --- a/test/basic-embedded-array.test.js +++ b/test/basic-embedded-array.test.js @@ -23,16 +23,16 @@ describe('basic embedded array', function() { before(function(done) { following = getSchema('following', docs, function() { - following_ids = following.fields.get('following_ids').fields.get('__basic__'); + following_ids = following.fields.get('following_ids').types.get('Array'); done(); }); }); it('should have 2 lengths for following_ids', function() { - assert.deepEqual(following.fields.get('following_ids').lengths, [1, 2]); + assert.deepEqual(following_ids.lengths, [1, 2]); }); it('should have an average length of 1.5 for following_ids', function() { - assert.equal(following.fields.get('following_ids').average_length, 1.5); + assert.equal(following_ids.average_length, 1.5); }); it('should have a sum of probability for following_ids of 1', function() { @@ -52,5 +52,4 @@ describe('basic embedded array', function() { following.toJSON(); }); }); - // @todo: write more tests when not so tired... }); diff --git a/test/basic-unique.test.js b/test/basic-unique.test.js index f3d71cf..41aa470 100644 --- a/test/basic-unique.test.js +++ b/test/basic-unique.test.js @@ -5,11 +5,18 @@ describe('unique', function() { var docs = [ { _id: 1, - registered: true + registered: true, + b: false }, { _id: 2, - registered: true + registered: true, + code: null, + b: 'false' + }, + { + _id: 3, + code: null } ]; @@ -24,13 +31,13 @@ describe('unique', function() { }); }); - it('should have count of 2 for `_id`', function() { - assert.equal(schema.fields.get('_id').count, 2); + it('should have count of 3 for `_id`', function() { + assert.equal(schema.fields.get('_id').count, 3); }); - it('should have unique of 2 for `_id`', function() { - assert.equal(schema.fields.get('_id').unique, 2); - assert.equal(schema.fields.get('_id').types.get('Number').unique, 2); + it('should have unique of 3 for `_id`', function() { + assert.equal(schema.fields.get('_id').unique, 3); + assert.equal(schema.fields.get('_id').types.get('Number').unique, 3); }); it('should not have duplicates for `_id`', function() { @@ -41,10 +48,26 @@ describe('unique', function() { assert.equal(schema.fields.get('registered').count, 2); }); - it('should have unique of 1 for `registered`', function() { + it('should have unique of 1 for `registered` type Boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').unique, 1); + }); + + it('should have unique of 1 for `registered` overall', function() { assert.equal(schema.fields.get('registered').unique, 1); }); + it('should return unique of 0 for Undefined type', function () { + assert.equal(schema.fields.get('registered').types.get('Undefined').unique, 0); + }); + + it('should have unique of 1 for `code`', function() { + assert.equal(schema.fields.get('code').types.get('Null').unique, 1); + }); + + it('should not have duplicate values for b', function() { + assert.equal(schema.fields.get('b').has_duplicates, false); + }); + it('should have duplicates for `registered`', function() { assert.equal(schema.fields.get('registered').has_duplicates, true); }); diff --git a/test/events.test.js b/test/events.test.js deleted file mode 100644 index 8f31ccc..0000000 --- a/test/events.test.js +++ /dev/null @@ -1,19 +0,0 @@ -var Schema = require('../').Schema; -var assert = require('assert'); - -describe('events', function() { - it('should fire a change:type event', function(done) { - var schema = new Schema(); - schema.fields.on('add', function(field) { - - assert.equal(field.getId(), '_id'); - field.on('change:type', function(field, newType) { - assert.equal(newType, 'Number'); - done(); - }); - }); - schema.parse({ - _id: 1 - }); - }); -}); diff --git a/test/field-collection.test.js b/test/field-collection.test.js new file mode 100644 index 0000000..cafd1cf --- /dev/null +++ b/test/field-collection.test.js @@ -0,0 +1,39 @@ +var DocumentType = require('../lib/types').Document; +var FieldCollection = require('../lib/field-collection'); +var assert = require('assert'); + +describe('FieldCollection', function () { + var collection; + + before(function () { + collection = new FieldCollection(); + }); + + it('should create new field if the field name is not present', function () { + collection.addToField('foo', 1); + assert.ok(collection.get('foo')); + assert.equal(collection.length, 1); + }); + + it('should pass down collection\'s parent to its values', function () { + var doc = new DocumentType(); + doc.parse({foo: 1, bar: 1}); + assert.equal(doc.fields.get('foo').parent, doc); + }); + + it('should trigger change:probability events in unaffected children', function (done) { + collection.addToField('field', 16); + collection.addToField('field', 5); + collection.addToField('field', 'foo'); + collection.addToField('field', 'bar'); + var field = collection.get('field'); + assert.deepEqual(field.types.pluck('probability'), [0.5, 0.5]); + + field.types.get('Number').on('change:probability', function () { + assert.deepEqual(field.types.pluck('probability'), [0.4, 0.6]); + done(); + }); + collection.addToField('field', 'baz'); + }); + +}); diff --git a/test/field.test.js b/test/field.test.js new file mode 100644 index 0000000..8be7341 --- /dev/null +++ b/test/field.test.js @@ -0,0 +1,61 @@ +var Field = require('../lib/field'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:type'); + +describe('Field', function () { + var field; + beforeEach(function () { + field = new Field(); + }); + + it('should be constructable', function () { + assert.ok(field); + }); + + it('should return single type string for Field#type for one type', function () { + field.types.addToType(16); + field.types.addToType(5); + field.types.addToType(-1); + assert.equal(field.type, 'Number'); + }); + + it('should return array of type strings for Field#type for multiple types', function () { + field.types.addToType(16); + field.types.addToType(5); + field.types.addToType("foo"); + field.types.addToType("bar"); + assert.deepEqual(field.type, ['Number', 'String']); + }); + + it('should return undefined for Field#type if no types present', function () { + assert.equal(field.type, undefined); + }); + + it('should trigger change:types.length events when adding a new type', function (done) { + field.on('change:types.length', function () { + assert.equal(field.types.length, 1); + done(); + }); + field.types.addToType(15); + }); + + it('should update Field#type when adding more values', function () { + field.types.addToType(15); + assert.equal(field.type, 'Number'); + field.types.addToType("sfo"); + assert.deepEqual(field.type, ['Number', 'String']); + }); + + it('should update .fields alias correctly', function () { + assert.equal(field.fields, null); + field.types.addToType({foo: 1}); + assert.equal(field.fields, field.types.get('Document').fields); + }); + + it('should update .arrayFields alias correctly', function () { + assert.equal(field.arrayFields, null); + field.types.addToType([{foo: 1}]); + assert.equal(field.arrayFields, field.types.get('Array').fields); + assert.ok(field.arrayFields.get('foo')); + }); +}); diff --git a/test/mixed-type-order.test.js b/test/mixed-type-order.test.js index 72a82b2..bc75720 100644 --- a/test/mixed-type-order.test.js +++ b/test/mixed-type-order.test.js @@ -37,8 +37,12 @@ describe('mixed type order', function() { it('should have 3 types for `registered`', function() { assert.equal(schema.fields.get('registered').types.length, 3); }); - it('should return the order of types as ["String", "Number", "Undefined"]', function(done) { - assert.deepEqual(schema.fields.get('registered').types.pluck('name'), ['String', 'Number', 'Undefined']); + it('should return the order of types as ["String", "Number", "Undefined"] when sorted', function(done) { + assert.deepEqual(schema.fields.get('registered').types.sort().pluck('name'), ['String', 'Number', 'Undefined']); done(); }); + it('should sort automatically when serialized', function() { + var types = schema.fields.get('registered').types; + assert.deepEqual(_.pluck(types.serialize(), 'name'), ['String', 'Number', 'Undefined']); + }); }); diff --git a/test/mixed-type-probability.test.js b/test/mixed-type-probability.test.js index 6a5d0a4..e564d5b 100644 --- a/test/mixed-type-probability.test.js +++ b/test/mixed-type-probability.test.js @@ -1,5 +1,6 @@ var getSchema = require('../'); var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test'); describe('mixed type probability', function() { var docs = [ @@ -16,7 +17,8 @@ describe('mixed type probability', function() { registered: true }, { - _id: 4 + _id: 4, + assigned: true } ]; @@ -48,4 +50,8 @@ describe('mixed type probability', function() { it('should have a probability of 25% for `registered` to be undefined', function() { assert.equal(schema.fields.get('registered').types.get('Undefined').probability, (1 / 4)); }); + it('should compensate for missed Undefined values', function() { + assert.equal(schema.fields.get('assigned').probability, 0.25); + assert.equal(schema.fields.get('assigned').types.get('Boolean').probability, 0.25); + }); }); diff --git a/test/schema.test.js b/test/schema.test.js new file mode 100644 index 0000000..69c8668 --- /dev/null +++ b/test/schema.test.js @@ -0,0 +1,139 @@ +var Schema = require('../lib/schema'); +var schemaHelper = require('../lib'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:schema'); +var es = require('event-stream'); + + +describe('Schema', function () { + var schema; + beforeEach(function () { + schema = new Schema(); + }); + + it('should be constructable', function() { + assert.ok(schema); + }); + + it('should parse a simple document', function () { + schema.parse({foo: 1}); + assert.ok(schema.fields.get('foo')); + assert.equal(schema.count, 1); + }); + + it('should parse a nested document', function () { + schema.parse({foo: {bar: 1}}); + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('foo').types.get('Document').fields.get('bar')); + assert.equal(schema.count, 1); + assert.equal(schema.fields.get('foo').types.get('Document').count, 1); + }); + + it('should set up the parent tree all the way down', function () { + schema.parse({foo: {bar: [1, 2, 3]}}); + var foo = schema.fields.get('foo'); + assert.equal(foo.parent, schema); + var subdoc = foo.types.get('Document'); + assert.equal(subdoc.parent, foo); + var bar = subdoc.fields.get('bar'); + assert.equal(bar.parent, subdoc); + var arr = bar.types.get('Array'); + assert.equal(arr.parent, bar); + var num = arr.types.get('Number'); + assert.equal(num.parent, arr); + var val = num.values.at(0); + assert.equal(val.parent, num); + }); + + it('should trigger an `end` event at the end of parsing a stream', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 1}]; + var src = es.readArray(docs); + var stream = src.pipe(schema.stream()); + stream.on('end', function () { + assert.equal(schema.count, 2); + done(); + }); + }); + + it('should trigger `data` events for each doc', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var src = es.readArray(docs); + var count = 0; + src.pipe(schema.stream()) + .on('data', function (doc) { + count ++; + }) + .on('end', function () { + assert.equal(count, 2); + done(); + }); + }); + +}); + +describe('Schema Helper', function() { + it('should be able to handle an array as input', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var src = es.readArray(docs); + var schema; + schema = schemaHelper('with.stream', src, function () { + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('bar')); + done(); + }); + }); + + it('should be able to handle a stream as input', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var schema; + schema = schemaHelper('with.stream', docs, function () { + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('bar')); + done(); + }); + }); + + it('should be able to handle an object as input that exposes a .stream() method', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var src = es.readArray(docs); + var obj = {name: 'Container Object', stream: function () { return src; }}; + var schema; + schema = schemaHelper('with.stream', obj, function () { + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('bar')); + done(); + }); + }); + + it('schema object should also trigger `end` event when done parsing', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var src = es.readArray(docs); + var schema; + schema = schemaHelper('with.stream', src, function () { + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('bar')); + }).on('end', function(schema) { + assert.equal(schema.count, 2); + done(); + }); + }); + + it('schema object should also trigger `data` events for each doc', function (done) { + var docs = [{foo: 1}, {bar: 1, foo: 2}]; + var src = es.readArray(docs); + var count = 0; + var schema; + schema = schemaHelper('with.stream', src, function () { + assert.ok(schema.fields.get('foo')); + assert.ok(schema.fields.get('bar')); + }).on('data', function(doc, innerSchema) { + count ++; + assert.ok(doc); + assert.equal(schema, innerSchema); + }).on('end', function () { + assert.equal(count, 2); + done(); + }); + }); + +}); diff --git a/test/type-collection.test.js b/test/type-collection.test.js new file mode 100644 index 0000000..58a18af --- /dev/null +++ b/test/type-collection.test.js @@ -0,0 +1,80 @@ +var TypeCollection = require('../lib/type-collection'); +var Field = require('../lib/field'); +var _ = require('lodash'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:type-collection'); +var bson = require('bson'); + + +describe('TypeCollection', function () { + var types; + beforeEach(function () { + types = new TypeCollection(); + }); + + it('should create types automatically with .addToType', function() { + types.addToType("i'm a string"); + assert.ok(types.get('String')); + assert.equal(types.get('String').count, 1); + }); + + it('should use existing types with .addToType', function() { + types.addToType(2); + types.addToType(3); + + assert.ok(types.get('Number')); + assert.equal(types.get('Number').count, 2); + assert.deepEqual(types.get('Number').values.serialize(), [2, 3]); + }); + + it('should pass collection\'s parent down to the values', function () { + var field = new Field({name: 'myfield'}); + field.types.addToType('some string'); + assert.equal(field.types.get('String').parent, field); + }); + + it('should work with any type of primitive value', function() { + types.addToType(1); + types.addToType("str"); + types.addToType(true); + types.addToType(null); + types.addToType(undefined); + types.addToType(new Date(2015, 1, 1)); + types.addToType(/^foo*/i); + types.addToType(new bson.ObjectID()); + types.addToType(new bson.Long()); + types.addToType(new bson.Double()); + types.addToType(new bson.Timestamp()); + types.addToType(new bson.Symbol()); + types.addToType(new bson.Code()); + types.addToType(new bson.MinKey()); + types.addToType(new bson.MaxKey()); + types.addToType(new bson.DBRef()); + types.addToType(new bson.Binary()); + + assert.equal(types.length, 17); + assert.equal(_.unique(types.pluck('name')).length, 17); + }); + + it('should add array values correctly', function () { + types.addToType([1, 2, 3]); + assert.ok(types.get('Array')); + assert.equal(types.get('Array').count, 1); + assert.equal(types.get('Array').types.get('Number').count, 3); + assert.deepEqual(types.get('Array').types.get('Number').values.serialize(), [1, 2, 3]); + }); + + it('should count array values correctly', function () { + types.addToType([1, 2, 3]); + types.addToType("foo"); + types.addToType([4]); + types.addToType(5); + + assert.ok(types.get('Array')); + assert.equal(types.get('Array').count, 2); + assert.equal(types.get('Array').types.get('Number').count, 4); + assert.deepEqual(types.get('Array').types.get('Number').values.serialize(), [1, 2, 3, 4]); + assert.deepEqual(types.get('Number').count, 1); + assert.deepEqual(types.get('String').count, 1); + }); +}); diff --git a/test/type.test.js b/test/type.test.js new file mode 100644 index 0000000..66ad474 --- /dev/null +++ b/test/type.test.js @@ -0,0 +1,74 @@ +var types = require('../lib/types'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:type'); + +describe('Array', function () { + var arr; + beforeEach(function () { + arr = new types.Array(); + }); + it('should be constructable', function () { + assert.ok(arr); + }); + + it('should return Array as its modelType', function () { + assert.equal(arr.getType(), 'Array'); + }); + + it('should add values of a single type to the correct type', function () { + arr.parse([5, 2, 5, 5, 0]); + assert.deepEqual(arr.types.get('Number').values.serialize(), [5, 2, 5, 5, 0]); + }); + + it('should throw if the value is not an array', function () { + assert.throws(function() { + arr.parse(5); + }); + }); + + it('should return null for Type#fields if it does not have a Document type', function() { + arr.parse([1, 2, 3, "string", false]); + assert.equal(arr.fields, null); + }); + + it('should add values of a mixed types to the correct types', function () { + arr.parse([false, "foo", true, "bar"]); + assert.deepEqual(arr.types.get('Boolean').values.serialize(), [false, true]); + assert.deepEqual(arr.types.get('String').values.serialize(), ["foo", "bar"]); + }); +}); + + +describe('Document', function () { + var doc; + beforeEach(function () { + doc = new types.Document(); + }); + it('should be constructable', function () { + assert.ok(doc); + }); + + it('should throw if the value is not an object', function () { + assert.throws(function () { + doc.parse([1, 2, 3]); + }); + }); + + it('should return null for Type#fields if it does not have a Document type', function() { + doc.parse({foo: 1}); + doc.parse({foo: 2}); + doc.parse({foo: 3}); + assert.equal(doc.fields.get('foo').fields, null); + }); + + it('should add fields recursively', function () { + doc.parse({foo: 1}); + doc.parse({foo: 2}); + doc.parse({foo: 3}); + doc.parse({foo: 'hello', bar: 'good bye'}); + assert.ok(doc.fields.get('foo')); + assert.deepEqual(doc.fields.get('foo').types.get('Number').values.serialize(), [1, 2, 3]); + assert.deepEqual(doc.fields.get('foo').types.get('String').values.serialize(), ['hello']); + assert.deepEqual(doc.fields.get('bar').types.get('String').values.serialize(), ['good bye']); + }); +}); diff --git a/test/value-collection.test.js b/test/value-collection.test.js new file mode 100644 index 0000000..c3c3a1a --- /dev/null +++ b/test/value-collection.test.js @@ -0,0 +1,33 @@ +var ValueCollection = require('../lib/value-collection'); +var NumberType = require('../lib/types').Number; +var _ = require('lodash'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:value-collection'); + +describe('ValueCollection', function () { + var collection; + + before(function () { + collection = new ValueCollection([ + {value: 0}, + {value: ""}, + {value: null}, + {value: false}, + {value: []}, + {value: {}} + ]); + }); + + it('should accept any type of value', function () { + assert.deepEqual(collection.serialize(), [0, '', null, false, [], {}]); + assert.equal(collection.length, 6); + }); + + it('should pass collection\'s parent down to the values', function () { + var num = new NumberType(null); + num.values.reset([ + {value: 250} + ]); + assert.equal(num.values.at(0).parent, num); + }); +}); diff --git a/test/values.test.js b/test/values.test.js index 0a26c7a..29c35cb 100644 --- a/test/values.test.js +++ b/test/values.test.js @@ -6,9 +6,12 @@ describe('values', function() { var schema = new Schema(); schema.fields.on('add', function(field) { assert.equal(field.getId(), '_id'); - field.values.on('add', function(value) { - assert.equal(value.value, 1); - done(); + field.types.on('add', function(type) { + assert.equal(type.name, 'Number'); + type.values.on('add', function(value) { + assert.equal(value.value, 1); + done(); + }); }); }); schema.parse({