Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
43c0d24
added tests that need to pass after restructuring.
rueckstiess Jun 25, 2015
709988c
renamed to use slugcase instead of underscores.
rueckstiess Jun 25, 2015
bfeda65
defined x once for readability.
rueckstiess Jun 25, 2015
4bdd7fe
defined x once for readability.
rueckstiess Jun 25, 2015
8af98a2
intermediate checkin, WIP
rueckstiess Jun 25, 2015
ff91c05
added parentMixin to collections
rueckstiess Jun 25, 2015
e3b4010
verify parent is passed down to values.
rueckstiess Jun 25, 2015
fde4403
added test for field-collection
rueckstiess Jun 25, 2015
d9ad384
renamed addValue to parse, schema = document
rueckstiess Jun 25, 2015
12823e3
schema name, parents working, array prob
rueckstiess Jun 25, 2015
c51a3b6
added "Undefined" handling, total_count
rueckstiess Jun 25, 2015
7d00798
undefined, end trigger, probabilities
rueckstiess Jun 25, 2015
93393df
README cleanup
imlucas Jun 28, 2015
5121097
added tests that need to pass after restructuring.
rueckstiess Jun 25, 2015
8102be4
renamed to use slugcase instead of underscores.
rueckstiess Jun 25, 2015
79c9705
defined x once for readability.
rueckstiess Jun 25, 2015
69bcb11
defined x once for readability.
rueckstiess Jun 25, 2015
9502ec3
intermediate checkin, WIP
rueckstiess Jun 25, 2015
5efe24e
added parentMixin to collections
rueckstiess Jun 25, 2015
8ac3ace
verify parent is passed down to values.
rueckstiess Jun 25, 2015
6791d27
added test for field-collection
rueckstiess Jun 25, 2015
eab57d4
renamed addValue to parse, schema = document
rueckstiess Jun 25, 2015
af21952
schema name, parents working, array prob
rueckstiess Jun 25, 2015
7438723
added "Undefined" handling, total_count
rueckstiess Jun 25, 2015
c99e551
undefined, end trigger, probabilities
rueckstiess Jun 25, 2015
a864bb9
className --> namespace
rueckstiess Jun 29, 2015
7a176df
missed dependency on total_count for Type#probability
rueckstiess Jun 29, 2015
d0b88f8
unique and has_duplicates
rueckstiess Jun 29, 2015
6b90632
schema helper takes stream&array. more tests.
rueckstiess Jun 29, 2015
de46593
100% coverage, all tests passing.
rueckstiess Jun 29, 2015
0121f08
trigger types.length change event in field.
rueckstiess Jun 29, 2015
d9005e9
split type into individual files under ./types
rueckstiess Jun 29, 2015
327a84e
Merge branch 'INT-203-arrays' of github.com:mongodb-js/mongodb-schema…
rueckstiess Jun 29, 2015
3f2edab
serialization, some unique issues, README
rueckstiess Jun 30, 2015
4096b35
updated diagram, some properties missing / renamed
rueckstiess Jun 30, 2015
55150d3
removed session parent again, causes circular event triggers.
rueckstiess Jun 30, 2015
264d0ef
added Field#arrayFields alias. more tests.
rueckstiess Jul 1, 2015
8129ac8
trigger change:probability events in all cases.
rueckstiess Jul 1, 2015
52f905a
(FAILING) test to listen to data events.
rueckstiess Jul 2, 2015
dd091e1
use schema.stream()'s end event instead
rueckstiess Jul 2, 2015
a49f9fd
passing stream events through to schema object.
rueckstiess Jul 2, 2015
469e7aa
fixed bug that under counted total_count
rueckstiess Jul 2, 2015
2b8766b
fixed bug where undefined values did not count towards total_count
rueckstiess Jul 3, 2015
bf6cb9a
Merge branch 'INT-203-arrays' of github.com:mongodb-js/mongodb-schema…
rueckstiess Jul 3, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 77 additions & 40 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# mongodb-schema

[![build status](https://secure.travis-ci.org/mongodb-js/mongodb-schema.png)](http://travis-ci.org/mongodb-js/mongodb-schema)
[![Coverage Status](https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg)](https://coveralls.io/r/mongodb-js/mongodb-schema)
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
# mongodb-schema [![][npm_img]][npm_url] [![][travis_img]][travis_url] [![][coverage_img]][coverage_url] [![][gitter_img]][gitter_url]

Infer a probabilistic schema for a MongoDB collection.

A high-level view of the class interactions is as follows:

![](./docs/mongodb-schema_diagram.png)

## Example

`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. As well, we'll need some data
in a collection to derive the schema of:
`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver.
As well, we'll need some data in a collection to derive the schema of.

Make sure you have a `mongod` running on localhost on port 27017 (or change the example accordingly). Then, do:

1. `npm install mongodb mongodb-schema`.
1. `npm install mongodb mongodb-schema`
2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test`
3. Create a new file `parse-schema.js` and paste in the following code:
```javascript
Expand All @@ -30,63 +32,92 @@ in a collection to derive the schema of:
});
```
4. When we run the above with `node parse-schema.js`, we'll see something
like the following:
like the following (some fields not present here for clarity):

```javascript
{
ns: 'test.test',
count: 4, // The number of documents sampled
fields: [ // A collection of Field objects @see lib/field.js
"count": 4, // parsed 4 documents
"ns": "test.test", // namespace
"fields": [ // an array of Field objects, @see `./lib/field.js`
{
name: "_id",
probability: 1, // Just as we expected, all 4 documents had `_id`
unique: 4, // All 4 values for `_id` were unique
types: [
"name": "_id",
"count": 4, // 4 documents counted with _id
"type": "Number", // the type of _id is `Number`
"probability": 1, // all documents had an _id field
"unique": 4, // 4 unique values found
"has_duplicates": false, // therefore no duplicates
"types": [ // an array of Type objects, @see `./lib/types/`
{
name: "Number", // The only type seen was a Number
probability: 1,
unique: 4
"name": "Number", // name of the type
"count": 4, // 4 numbers counted
"probability": 1,
"unique": 4,
"values": [ // array of encountered values
1,
2,
3,
4
]
}
]
},
{
name: "a", // Unlike `_id`, `a` was present in only 3 of 4 documents
probability: 0.75,
unique: 3, // Of the 3 values seen, all 3 were unique
// As expected, Boolean, String, and Number values were seen.
// A handy instance of `Undefined` is also provided to represent missing data",
"name": "a",
"count": 3, // only 3 documents with field `a` counted
"probability": 0.75, // hence probability 0.75
"type": [ // found these types
"Boolean",
"String",
"Number",
"Undefined" // for convenience, we treat Undefined as its own type
],
"unique": 3,
"has_duplicates": false, // there were no duplicate values
"types": [
{
name: "Boolean",
probability: 0.25,
unique: 1
"name": "Boolean",
"count": 1,
"probability": 0.25, // probabilities for types are calculated factoring in Undefined
"unique": 1,
"values": [
true
]
},
{
name: "String",
probability: 0.25,
unique: 1
"name": "String",
"count": 1,
"probability": 0.25,
"unique": 1,
"values": [
"true"
]
},
{
name: "Number",
probability: 0.25,
unique: 1
"name": "Number",
"count": 1,
"probability": 0.25,
"unique": 1,
"values": [
1
]
},
{
name: "Undefined",
probability: 0.25
"name": "Undefined",
"count": 1,
"probability": 0.25,
"unique": 0
}
]
}
]
}
```
```

### More Examples

`mongodb-schema` supports all [BSON types][bson-types].
Checkout [the tests][tests] for more usage examples.


## Installation

```
Expand All @@ -108,9 +139,6 @@ Apache 2.0
Under the hood, `mongodb-schema` uses [ampersand-state][ampersand-state] and
[ampersand-collection][ampersand-collection] for modeling [Schema][schema], [Field][field]'s, and [Type][type]'s.

A high-level view of the class interactions is as follows:

![](./docs/mongodb-schema_diagram.png)



Expand All @@ -121,3 +149,12 @@ A high-level view of the class interactions is as follows:
[schema]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/schema.js
[field]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/field.js
[type]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/type.js

[travis_img]: https://secure.travis-ci.org/mongodb-js/mongodb-schema.svg?branch=master
[travis_url]: https://travis-ci.org/mongodb-js/mongodb-schema
[npm_img]: https://img.shields.io/npm/v/mongodb-schema.svg
[npm_url]: https://www.npmjs.org/package/mongodb-schema
[coverage_img]: https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg
[coverage_url]: https://coveralls.io/r/mongodb-js/mongodb-schema
[gitter_img]: https://badges.gitter.im/Join%20Chat.svg
[gitter_url]: https://gitter.im/mongodb-js/mongodb-js
Binary file modified docs/mongodb-schema_diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion examples/parse-schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ var connect = require('mongodb');
connect('mongodb://localhost:27017/test', function(err, db){
if(err) return console.error(err);

parseSchema('test', db.collection('test').find(), function(err, schema){
parseSchema('test.test', db.collection('test').find(), function(err, schema){
if(err) return console.error(err);

console.log(JSON.stringify(schema, null, 2));
Expand Down
14 changes: 14 additions & 0 deletions lib/collection-parent-mixin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
var _ = require('lodash');
var AmpersandCollection = require('ampersand-collection');

/**
* Collection with this mixin passes its parent down to its values
*/
module.exports = {
set: function (models, options) {
options = _.defaults({
parent: this.parent
}, options || {});
return AmpersandCollection.prototype.set.call(this, models, options);
}
};
43 changes: 40 additions & 3 deletions lib/field-collection.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
var Collection = require('ampersand-collection');
var lodashMixin = require('ampersand-collection-lodash-mixin');
var parentMixin = require('./collection-parent-mixin');
var Field = require('./field');

/**
* Container for a list of Fields.
*/
var FieldCollection = Collection.extend(lodashMixin, {
var FieldCollection = Collection.extend(lodashMixin, parentMixin, {
modelType: 'FieldCollection',
mainIndex: 'name',
model: Field,
comparator: function (a, b) {
// make sure _id is always at top, even in presence of uppercase fields
var aId = a.getId();
Expand All @@ -15,8 +19,41 @@ var FieldCollection = Collection.extend(lodashMixin, {
// otherwise sort case-insensitively
return (aId.toLowerCase() < bId.toLowerCase()) ? -1 : 1;
},
model: function(attrs, options) {
return new attrs.klass(attrs, options);
/**
* adds a new name/value pair to the correct field, and creates the
* field first if it doesn't exist yet. Leave it to field.types to
* add the value.
*
* @param {String} name name of the field
* @param {Any} value value to be added
*/
addToField: function (name, value) {
// get or create field
var field = this.get(name);
if (!field) {
field = this.add({
name: name,
parent: this.parent
});
if (this.parent) this.parent.trigger('change:fields.length');
/**
* first time we see this field. We need to compensate for
* the Undefined values we missed so far for this field,
* by setting the count to the parent count and adjusting
* total_count as well.
*/
if (this.parent && this.parent.count > 0) {
var undef = field.types.add({name: 'Undefined'});
undef.count += this.parent.count;
field.total_count += undef.count;
}
}
// undefined are not counted towards the field's count
if(value !== undefined) field.count += 1;
// but they are counted towards the field's total_count
field.total_count += 1;
field.types.addToType(value);
}
});

module.exports = FieldCollection;
Loading