From 7a5a32d46bd59c966c71364b4544d9ba64f79725 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Mon, 27 Apr 2015 12:11:01 +1000 Subject: [PATCH 01/79] whitespace --- index.js | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/index.js b/index.js index d092856..c820fe5 100644 --- a/index.js +++ b/index.js @@ -1,23 +1,23 @@ var schema_sync = require('./lib/mongodb-schema.js'); -// async wrapper for mongodb-schema +// async wrapper for mongodb-schema var schema = function(documents, options, callback) { - - if (typeof options === 'function') { - callback = options; - options = {}; - } - // only execute on next event loop iteration - process.nextTick(function () { - try { - var res = schema_sync(documents, options); - callback(null, res); - } catch (e) { - callback(e); - } - }); -} + if (typeof options === 'function') { + callback = options; + options = {}; + } + + // only execute on next event loop iteration + process.nextTick(function() { + try { + var res = schema_sync(documents, options); + callback(null, res); + } catch ( e ) { + callback(e); + } + }); +}; module.exports.schema = schema; -module.exports.schema_sync = schema_sync; \ No newline at end of file +module.exports.schema_sync = schema_sync; From 7cc03b2b7745e865c5f7dd184a033d21a8e90789 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Wed, 29 Apr 2015 19:41:49 +1000 Subject: [PATCH 02/79] added first draft of new schema spec. this spec is a merge of Matt's document and the existing mongodb-schema. --- docs/schema_spec.md | 164 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 docs/schema_spec.md diff --git a/docs/schema_spec.md b/docs/schema_spec.md new file mode 100644 index 0000000..d7b0209 --- /dev/null +++ b/docs/schema_spec.md @@ -0,0 +1,164 @@ +### Scout Specification +# Schema Representation + +Author: Matt Kangas, Thomas Rueckstiess
+Last Revised: 2015-04-28
+Status: Draft
+ +## Specification + + +### 0. Definitions + +Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON notation ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. + +> #### Example + +> An object with 2 members. The name of the first member is `foo` and the name of the second member is `bar`. Both member values are 1. + +> {"foo": 1, "bar": 1} + + + +### 1. Escape Character + +We shall define `#` (ASCII 0x23) as an _escape character_ to distinguish meta data members from members originating from sampled data. + +Rationale: + +- expressible in one byte of UTF-8 (ASCII) +- Non-numeric (not in `0`..`9`, ASCII range 0x30-0x39), because this conflicts with javascript objects/arrays) +- Not `$` (ASCII character 0x24), because it is not a valid prefix for member names in MongoDB + +We shall then encode member names as follows: + +- Member name begins with no escape character: +literal member name +- Member name begins with single escape character: +encoded metadata member +- Member name begins with double escape character: +literal member name which begins with single escape character + + +### 2. General Structure + +We define a _sample set_ as a number of MongoDB documents from a single collection. The documents may have been selected in random fashion, but this definition does not impose any restrictions on the method of acquiring the documents. The documents comprising the sample set are called _sample documents_. + +We define the _shape_ of a sample set as aggregated characteristics of all members of the documents in the sample set. These characteristics are further described below. + +We define a _schema_ as a JSON representation of the _shape_ of a sample set. + +The schema must be strict, valid [JSON](http://www.json.org/). MongoDB-specific types must be converted into strict JSON as per [MongoDB's extended JSON](http://docs.mongodb.org/manual/reference/mongodb-extended-json/) definition, "strict" variant. + +The schema follows the combined structure of all documents in the sample set. This means, that for every member in any sample document, a member with the same name exists in the schema at the same nesting depth. This rule applies to members at all nesting depths. The schema can thus be seen as a superposition of all sample documents. + +Within the schema, the value of any such member is an object. This is explicitly also true for leaf members in a sample document, i.e. values that are neither arrays (BSON type 4) nor nested documents (BSON type 3). Every such object contains an encoded meta-data member with the name `#schema` (note the escape character), in addition to potential nested children. This meta-data member with the name `#schema` is called a _tag_, and its value is an array that contains one element for each [BSON type](http://bsonspec.org/spec.html) encountered in the sample set for this particular member. + + +> #### Example + +> Sample set: + +> {a: "foo"} +> {a: {b: 10, c: true}} +> {c: null} + +> Schema (with `...` placeholders for the tag arrays) + +> { +> "a": { +> "#schema": [...], // tag for a +> "b": { +> "#schema": [...], // tag for a.b +> }, +> "c": { +> "#schema": [...], // tag for a.c +> } +> }, +> "c": { +> "#schema": [...], // tag c +> } +> } + +### 3. Tags + +While the schema object itself describes the overall structure of the sample set, the aggregated characteristics of each member are contained within its tag. + +The tag array contains one element for each distinct type encountered in the sample set for the given field. The order of this array is not defined and considered an implementation detail. If a field is missing in a sample document, it is treated as type _undefined_, and we use the (deprecated) BSON type 6 to represent it. + +Each element in the array is an object with the following members: + +- `t`: (_type_) integer representing the (decimal) BSON type, unique within each schema tag +- `n`: (_number_) integer representing the number of documents encountered in the sample set that contain this field +- `p`: (_probability_) float representing the (relative) probability of this field being present given its parent field is present +- `u`: (_unique_) boolean representing whether or not the values of this field are unique under the given type +- `d`: (_data_) object containing type-specific additional data + + +> #### Example + +> Field with its tag (`...` is placeholder for type-specific data field) + +> "a": { +> "#schema": [ // tag for a +> { +> "t": 2, // "string" type +> "n": 160, // 160 encounters +> "p": 0.8, // relative probability 0.8 means 200 parent objects +> "u": false, // the values contain duplicates +> "d": {...} // placeholder, defined further below +> }, +> { +> "t": 3, // "nested document" type +> ... +> } +> ] +> } + + +### 4. Type-Specific Data + +Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `d` member, which carries additional information specific for the type. + + +##### Type 1: Float + +The `d` object contains the following members: + +- `min`: The smallest value encountered in any sample document +- `max`: The largest value encountered in any sample document +- `avg`: The mean of all sample document values +- `med`: The median of all sample document values +- `v`: An array of all values encountered, in order of traversal + + +> #### Example + +> "d" : { +> "min": 0.0 +> "max": 32.8, +> "avg": 9.3499999, +> "med": 5.25, +> "v": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] +> } + + +##### Type 2: string + + +The `d` object contains the following members: + +- `min`: The smallest value encountered in any sample document +- `max`: The largest value encountered in any sample document +- `v`: Unique set of all values encountered, ordered by counts descending +- `c`: count for each value, same order as above + + +> #### Example + +> "d" : { +> "min": "atlas", +> "max": "zoo", +> "v": [ "atlas", "song", "bird", "zoo", "breakfast" ], +> "c": [ 15, 9, 7, 5, 2 ] +> } From 4e8f378ad8d31721c2c5b4a13c0394cddf0b026e Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Wed, 29 Apr 2015 19:43:46 +1000 Subject: [PATCH 03/79] updated date in schema spec. --- docs/schema_spec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/schema_spec.md b/docs/schema_spec.md index d7b0209..ce16094 100644 --- a/docs/schema_spec.md +++ b/docs/schema_spec.md @@ -2,7 +2,7 @@ # Schema Representation Author: Matt Kangas, Thomas Rueckstiess
-Last Revised: 2015-04-28
+Last Revised: 2015-04-29
Status: Draft
## Specification From 33b18c4218b2631f998b918222797998dfa2e311 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Fri, 1 May 2015 09:48:25 +1000 Subject: [PATCH 04/79] added more types to spec. --- docs/schema_spec.md | 233 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 9 deletions(-) diff --git a/docs/schema_spec.md b/docs/schema_spec.md index ce16094..76bc95b 100644 --- a/docs/schema_spec.md +++ b/docs/schema_spec.md @@ -12,7 +12,7 @@ Status: Draft
Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON notation ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. -> #### Example +> ##### Example > An object with 2 members. The name of the first member is `foo` and the name of the second member is `bar`. Both member values are 1. @@ -55,7 +55,7 @@ The schema follows the combined structure of all documents in the sample set. Th Within the schema, the value of any such member is an object. This is explicitly also true for leaf members in a sample document, i.e. values that are neither arrays (BSON type 4) nor nested documents (BSON type 3). Every such object contains an encoded meta-data member with the name `#schema` (note the escape character), in addition to potential nested children. This meta-data member with the name `#schema` is called a _tag_, and its value is an array that contains one element for each [BSON type](http://bsonspec.org/spec.html) encountered in the sample set for this particular member. -> #### Example +> ##### Example > Sample set: @@ -95,7 +95,7 @@ Each element in the array is an object with the following members: - `d`: (_data_) object containing type-specific additional data -> #### Example +> ##### Example > Field with its tag (`...` is placeholder for type-specific data field) @@ -121,7 +121,7 @@ Each element in the array is an object with the following members: Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `d` member, which carries additional information specific for the type. -##### Type 1: Float +#### Type 1: float The `d` object contains the following members: @@ -132,9 +132,9 @@ The `d` object contains the following members: - `v`: An array of all values encountered, in order of traversal -> #### Example +> ##### Example -> "d" : { +> "d": { > "min": 0.0 > "max": 32.8, > "avg": 9.3499999, @@ -143,7 +143,7 @@ The `d` object contains the following members: > } -##### Type 2: string +#### Type 2: string The `d` object contains the following members: @@ -154,11 +154,226 @@ The `d` object contains the following members: - `c`: count for each value, same order as above -> #### Example +> ##### Example -> "d" : { +> "d": { > "min": "atlas", > "max": "zoo", > "v": [ "atlas", "song", "bird", "zoo", "breakfast" ], > "c": [ 15, 9, 7, 5, 2 ] > } + + +#### Type 3: nested document + +The `d` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. + + +#### Type 4: array + +The `d` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. + +> ##### Example + +> This array contains only strings (there is only a single element with type `2` in the `#schema` array). This element follows the normal rules for string types, as described above. + +> "d": { +> "#array": [ +> { +> "t": 2, +> "n": 490, +> "p": 1.0, +> "u": false, +> "d": { +> "min": "AUH", +> "max": "ZRH", +> "v": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], +> "c": [ 171, 110, 82, 40, 29, 23, 21, 14 ] +> } +> } +> ] +> } + + +#### Type 5: binary + +The `d` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `c` member is an array of counts of the encountered sub-types. + +> ##### Example + +> "d": { +> "sub": [ 4, 3 ] +> "c": [ 3004, 2554 ] +> } + + +#### Type 6: undefined (deprecated) + +The `d` object is empty. + + +#### Type 7: ObjectId + +The `d` object contains the following fields: + +- `min`: The smallest ObjectId value found, encoded as strict extended JSON. +- `max`: The largest ObjectId value found, encoded as strict extended JSON. + +Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `d` field further contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "d": { +> "min": {"$oid": "553f06eb1fc10e8d93515abb"}, +> "max": {"$oid": "553f06fbbeefcf581c232257"}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 86400, +> "values": [14, 4, 6, 23, ...], +> "labels": [] +> } +> } + + +#### Type 8: boolean + +The `d` field contains the distribution of `true` and `false` values. + +> ##### Example + +> "d": { +> "true": 48, +> "false": 13, +> } + + +#### Type 9: datetime + +the `d` field contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "d": { +> "min": {"$date": 1434933322}, +> "max": {"$date": 1434939935}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 30758400, +> "values": [14, 4, 6, 23] +> } +> } + + +#### Type 10: null + +The `d` object is empty. + +#### Type 11: regular expression + +The `d` object is empty. + +#### Type 12: DBPointer (deprecated) + +The `d` object is empty. + +#### Type 13: javascript code + +The `d` object is empty. + +#### Type 15: javascript code with scope + +The `d` object is empty. + +#### Type 16: 32-bit integer + +The `d` object contains the following members: + +- `min`: The minimum value encountered +- `max`: The maximum value encountered +- `med`: The median of all encoutered values +- `avg`: The mean of all encountered values +- `v`: Unique set of all values encountered, ordered by values +- `c`: count for each value, same order as above + +> ##### Example + +> "d" : { +> "min": 3, +> "max": 72, +> "med": 20, +> "avg": 30.5, +> "v": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "c": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> } + +#### Type 17: timestamp + +the `d` field contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "d": { +> "min": {"$date": 1434933322}, +> "max": {"$date": 1434939935}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 30758400, +> "values": [14, 4, 6, 23] +> } +> } + + +#### Type 18: 64-bit integer + +The `d` object contains the following members: + +- `min`: The minimum value encountered +- `max`: The maximum value encountered +- `med`: The median of all encoutered values +- `avg`: The mean of all encountered values +- `v`: Unique set of all values encountered, ordered by values +- `c`: count for each value, same order as above + +> ##### Example + +> "d" : { +> "min": 3, +> "max": 72, +> "med": 20, +> "avg": 30.5, +> "v": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "c": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> } + +#### Type 127: minkey + +The `d` object is empty. + +#### Type 255: maxkey + +The `d` object is empty. + + + +### X. Adaptive Binning + +> "bins": { // adaptive binning +> "size": 86400, // number of seconds per bucket +> "values": [14, 4, 6, 23, ...] // values per bin +> "labels": ["Apr 30", "May 1", "May 2", "May 3", ...] +> } From 0e277987bc4e2e1dc40454f3df6a79ba4b2ff52b Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Fri, 1 May 2015 17:58:09 +1000 Subject: [PATCH 05/79] first stub for redesign, see docs/schema_spec.md main file in lib/schema.js - added definitions.js file - added new test - added some utility scripts - added a fixture --- docs/new_schema_repr.js | 252 +++++++++++++++++++ docs/schema_spec.md | 144 +++++------ fixtures/all_types.js | 212 ++++++++++++++++ index.js | 24 +- lib/definitions.js | 12 + lib/mongodb-schema.js | 518 ---------------------------------------- lib/schema.js | 91 +++++++ package.json | 8 +- scripts/read_types.js | 22 ++ scripts/write_types.js | 166 +++++++++++++ test/test.js | 377 +++-------------------------- 11 files changed, 870 insertions(+), 956 deletions(-) create mode 100644 docs/new_schema_repr.js create mode 100644 fixtures/all_types.js create mode 100644 lib/definitions.js delete mode 100644 lib/mongodb-schema.js create mode 100644 lib/schema.js create mode 100644 scripts/read_types.js create mode 100644 scripts/write_types.js diff --git a/docs/new_schema_repr.js b/docs/new_schema_repr.js new file mode 100644 index 0000000..8aaa50e --- /dev/null +++ b/docs/new_schema_repr.js @@ -0,0 +1,252 @@ +"#schema" [ + { + "t": 3 // type + "n": 880 // number + "p": 0.2 // probability relative to parent + "u": true // unique flag + "d": { ... } // type-specific data + }, + ... +] + +// ### d for each type + +// 1 float +"d" : { + "min": 0, // some stats + "max": 1434933322, // .. + "avg": 1002484, // .. + "med": 998433, // .. + "v": [ ... ] // list of values in order of traversal +} + +// 2 string +"d" : { + "min": "a", + "max": "z", + "v": [ ... ], // unique set of values, order by counts + "c": [ ... ] // counts of values +} + +// 3 subdocument +"d" : { + // sub fields are handled in the main structure of the schema doc +} + +// 4 array +"d" : { + "#schema": [ + ... // array introspection, distribution of elements in array like regular #schema + ] +} + +// 5 binary +"d" : { + "sub": 4, // subtype +} + +// 6 undefined +"d" : { +} + +// 7 ObjectId +"d" : { + "min": {"$oid": "553f06eb1fc10e8d93515abb"}, + "max": {"$oid": "553f06fbbeefcf581c232257"}, + "weekdays": [1, 19, 23, 4, 6, 43, 1], + "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], + "bins": { // adaptive binning + "size": 86400, // number of seconds per bucket + "values": [14, 4, 6, 23, ...] // values per bin + } +} + +// 8 boolean +"d" : { + "true": 48, // counts + "false": 13, // .. +} + +// 9 datetime +"d" : { + "min": {"$date": 1434933322}, + "max": {"$date": 1434939935}, + "weekdays": [1, 19, 23, 4, 6, 43, 1], + "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], + "bins": { // adaptive binning + "size": 30758400, // number of seconds per bucket + "values": [14, 4, 6, 23] // values per bin + } +} + +// 10 null +"d" : { +} + +// 11 regex +"d" : { +} + +// 12 dbpointer +"d" : { +} + +// 13 javascript code +"d" : { +} + +// 15 javascript code with scope +"d" : { +} + +// 16 int-32 +"d" : { + "min": 3, + "max": 883, + "med": 145, + "avg": 168, + "v": [ ... ], // unique set of values, order by values + "c": [ ... ] // counts of values +} + +// 17 timestamp +"d" : { + +} + +// 18 int-64 +"d" : { + "min": 3, + "max": 883, + "med": 145, + "avg": 168, + "v": [ ... ], // unique set of values, order by values + "c": [ ... ] // counts of values +} + +// 127 minkey +"d" : { +} + +// 255 maxkey +"d" : { +} + + +// --------------------------------------------- + + +// Example: parsing these 3 documents ... +{ bla : 4 } +{ foo : "hello world" } +{ foo : { bar: 1, baz: [1, 2, 3] } } + + +// ... produces this schema +{ + "#root": { + "n": 3, // total count + "v": "0.7.0", // schema representation version + }, + "bla": { + "#schema": [ + { + "t": 16, + "n": 1, + "p": 0.33333333, + "u": true, + "d": { + "min": 4, + "max": 4, + "med": 4, + "avg": 4, + "v": [4], + "c": [1] + } + }, + { + "t": 6, + "n": 2, + "p": 0.6666666667, + "u": false, + "d": {} + } + ] + }, + "foo": { + "#schema": [ + { + "t": 3, // type "sub-document" + "n": 1, + "p": 0.33333333, + "u": true, + "d": {} + }, + { + "t": 6, // type "undefined" + "n": 1, + "p": 0.33333333, + "u": true, + "d": {} + }, + { + "t": 2, // type string + "n": 1, + "p": 0.33333333, + "u": true, + "d": + "min": "hello world", + "max": "hello world", + "v": ["hello world"], // unique set of values, order by counts + "c": [1] // counts of values + } + } + ], + "bar": { // note, this is inside the "foo" document + "#schema": [ + { + "t": 16, // type "int-32" + "n": 1, + "p": 1.0, // this is relative to its parent "foo" being a subdocument + "u": true, + "d": { + "min": 1, + "max": 1, + "med": 1, + "avg": 1, + "v": [1], + "c": [1] + } + } + ] + }, + "baz": { + "#schema": [ + { + "t": 4, + "n": 1, + "p": 1.0, + "u": true, + "d": { + "#schema": [ + { + "t": 16, // type "int-32" + "n": 3, + "p": 3.0, // here p is equivalent to the average number of array elements + "u": true, // this indicates that it could be a set, rather than an array + "d": { + "min": 1, + "max": 3, + "med": 2, + "avg": 2, + "v": [1, 2, 3], + "c": [1, 1, 1] + } + } + ] + } + } + ] + } + } +} diff --git a/docs/schema_spec.md b/docs/schema_spec.md index 76bc95b..2df4e9c 100644 --- a/docs/schema_spec.md +++ b/docs/schema_spec.md @@ -10,7 +10,7 @@ Status: Draft
### 0. Definitions -Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON notation ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. +Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON taxonomy ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. > ##### Example @@ -71,11 +71,11 @@ Within the schema, the value of any such member is an object. This is explicitly > "b": { > "#schema": [...], // tag for a.b > }, -> "c": { +> "counts": { > "#schema": [...], // tag for a.c > } > }, -> "c": { +> "counts": { > "#schema": [...], // tag c > } > } @@ -88,11 +88,11 @@ The tag array contains one element for each distinct type encountered in the sam Each element in the array is an object with the following members: -- `t`: (_type_) integer representing the (decimal) BSON type, unique within each schema tag -- `n`: (_number_) integer representing the number of documents encountered in the sample set that contain this field -- `p`: (_probability_) float representing the (relative) probability of this field being present given its parent field is present -- `u`: (_unique_) boolean representing whether or not the values of this field are unique under the given type -- `d`: (_data_) object containing type-specific additional data +- `type`: integer representing the (decimal) BSON type, unique within each schema tag +- `number`: integer representing the number of documents encountered in the sample set that contain this field +- `prob`: float representing the (relative) probability of this field being present given its parent field is present +- `unique`: boolean representing whether or not the values of this field are unique under the given type +- `data`: object containing type-specific additional data > ##### Example @@ -102,14 +102,14 @@ Each element in the array is an object with the following members: > "a": { > "#schema": [ // tag for a > { -> "t": 2, // "string" type -> "n": 160, // 160 encounters -> "p": 0.8, // relative probability 0.8 means 200 parent objects -> "u": false, // the values contain duplicates -> "d": {...} // placeholder, defined further below +> "type": 2, // "string" type +> "number": 160, // 160 encounters +> "prob": 0.8, // relative probability 0.8 means 200 parent objects +> "unique": false, // the values contain duplicates +> "data": {...} // placeholder, defined further below > }, > { -> "t": 3, // "nested document" type +> "type": 3, // "nested document" type > ... > } > ] @@ -118,77 +118,77 @@ Each element in the array is an object with the following members: ### 4. Type-Specific Data -Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `d` member, which carries additional information specific for the type. +Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `data` member, which carries additional information specific for the type. #### Type 1: float -The `d` object contains the following members: +The `data` object contains the following members: - `min`: The smallest value encountered in any sample document - `max`: The largest value encountered in any sample document - `avg`: The mean of all sample document values - `med`: The median of all sample document values -- `v`: An array of all values encountered, in order of traversal +- `values`: An array of all values encountered, in order of traversal > ##### Example -> "d": { +> "data": { > "min": 0.0 > "max": 32.8, > "avg": 9.3499999, > "med": 5.25, -> "v": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] +> "values": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] > } #### Type 2: string -The `d` object contains the following members: +The `data` object contains the following members: - `min`: The smallest value encountered in any sample document - `max`: The largest value encountered in any sample document -- `v`: Unique set of all values encountered, ordered by counts descending -- `c`: count for each value, same order as above +- `values`: Unique set of all values encountered, ordered by counts descending +- `counts`: count for each value, same order as above > ##### Example -> "d": { +> "data": { > "min": "atlas", > "max": "zoo", -> "v": [ "atlas", "song", "bird", "zoo", "breakfast" ], -> "c": [ 15, 9, 7, 5, 2 ] +> "values": [ "atlas", "song", "bird", "zoo", "breakfast" ], +> "counts": [ 15, 9, 7, 5, 2 ] > } #### Type 3: nested document -The `d` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. +The `data` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. #### Type 4: array -The `d` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. +The `data` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. > ##### Example > This array contains only strings (there is only a single element with type `2` in the `#schema` array). This element follows the normal rules for string types, as described above. -> "d": { +> "data": { > "#array": [ > { -> "t": 2, -> "n": 490, -> "p": 1.0, -> "u": false, -> "d": { +> "type": 2, +> "number": 490, +> "prob": 1.0, +> "unique": false, +> "data": { > "min": "AUH", > "max": "ZRH", -> "v": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], -> "c": [ 171, 110, 82, 40, 29, 23, 21, 14 ] +> "values": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], +> "counts": [ 171, 110, 82, 40, 29, 23, 21, 14 ] > } > } > ] @@ -197,29 +197,29 @@ The `d` object for arrays contains an `#array` member. It follows the structure #### Type 5: binary -The `d` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `c` member is an array of counts of the encountered sub-types. +The `data` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `counts` member is an array of counts of the encountered sub-types. > ##### Example -> "d": { +> "data": { > "sub": [ 4, 3 ] -> "c": [ 3004, 2554 ] +> "counts": [ 3004, 2554 ] > } #### Type 6: undefined (deprecated) -The `d` object is empty. +The `data` object is empty. #### Type 7: ObjectId -The `d` object contains the following fields: +The `data` object contains the following fields: - `min`: The smallest ObjectId value found, encoded as strict extended JSON. - `max`: The largest ObjectId value found, encoded as strict extended JSON. -Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `d` field further contains aggregated date and time information: +Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `data` field further contains aggregated date and time information: - `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. - `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). @@ -227,7 +227,7 @@ Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, t > ##### Example -> "d": { +> "data": { > "min": {"$oid": "553f06eb1fc10e8d93515abb"}, > "max": {"$oid": "553f06fbbeefcf581c232257"}, > "weekdays": [1, 19, 23, 4, 6, 43, 1], @@ -242,11 +242,11 @@ Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, t #### Type 8: boolean -The `d` field contains the distribution of `true` and `false` values. +The `data` field contains the distribution of `true` and `false` values. > ##### Example -> "d": { +> "data": { > "true": 48, > "false": 13, > } @@ -254,7 +254,7 @@ The `d` field contains the distribution of `true` and `false` values. #### Type 9: datetime -the `d` field contains aggregated date and time information: +the `data` field contains aggregated date and time information: - `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. - `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). @@ -262,7 +262,7 @@ the `d` field contains aggregated date and time information: > ##### Example -> "d": { +> "data": { > "min": {"$date": 1434933322}, > "max": {"$date": 1434939935}, > "weekdays": [1, 19, 23, 4, 6, 43, 1], @@ -276,49 +276,49 @@ the `d` field contains aggregated date and time information: #### Type 10: null -The `d` object is empty. +The `data` object is empty. #### Type 11: regular expression -The `d` object is empty. +The `data` object is empty. #### Type 12: DBPointer (deprecated) -The `d` object is empty. +The `data` object is empty. #### Type 13: javascript code -The `d` object is empty. +The `data` object is empty. #### Type 15: javascript code with scope -The `d` object is empty. +The `data` object is empty. #### Type 16: 32-bit integer -The `d` object contains the following members: +The `data` object contains the following members: - `min`: The minimum value encountered - `max`: The maximum value encountered - `med`: The median of all encoutered values - `avg`: The mean of all encountered values -- `v`: Unique set of all values encountered, ordered by values -- `c`: count for each value, same order as above +- `values`: Unique set of all values encountered, ordered by values +- `counts`: count for each value, same order as above > ##### Example -> "d" : { +> "data" : { > "min": 3, > "max": 72, > "med": 20, > "avg": 30.5, -> "v": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "c": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] > } #### Type 17: timestamp -the `d` field contains aggregated date and time information: +the `data` field contains aggregated date and time information: - `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. - `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). @@ -326,7 +326,7 @@ the `d` field contains aggregated date and time information: > ##### Example -> "d": { +> "data": { > "min": {"$date": 1434933322}, > "max": {"$date": 1434939935}, > "weekdays": [1, 19, 23, 4, 6, 43, 1], @@ -340,37 +340,45 @@ the `d` field contains aggregated date and time information: #### Type 18: 64-bit integer -The `d` object contains the following members: +The `data` object contains the following members: - `min`: The minimum value encountered - `max`: The maximum value encountered - `med`: The median of all encoutered values - `avg`: The mean of all encountered values -- `v`: Unique set of all values encountered, ordered by values -- `c`: count for each value, same order as above +- `values`: Unique set of all values encountered, ordered by values +- `counts`: count for each value, same order as above > ##### Example -> "d" : { +> "data" : { > "min": 3, > "max": 72, > "med": 20, > "avg": 30.5, -> "v": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "c": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] > } #### Type 127: minkey -The `d` object is empty. +The `data` object is empty. #### Type 255: maxkey -The `d` object is empty. +The `data` object is empty. +### 5. Adaptive Binning + +Some data types contain a field `bins`, where the data is discretized into bins with a variablebin size, depending on the data distribution. + +A _bin_ is defined + +The `bins` object consists of the following members: + +- `size`: this is the size of an individual bin. For numbers (types 1, 16, 18), this is a unitless number that describes the size of a bin. -### X. Adaptive Binning > "bins": { // adaptive binning > "size": 86400, // number of seconds per bucket diff --git a/fixtures/all_types.js b/fixtures/all_types.js new file mode 100644 index 0000000..b80c57d --- /dev/null +++ b/fixtures/all_types.js @@ -0,0 +1,212 @@ +module.exports = +[ + { + "_id": "5543129258b9383aab07d0fb", + "x": 123.123, + "comment": "new MongoDB.Double(123.123)", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d0fc", + "x": 456.456, + "comment": "456.456", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d0fd", + "x": "abc", + "comment": "abc", + "btype": 2 + }, + { + "_id": "5543129258b9383aab07d0fe", + "x": { + "z": 5 + }, + "comment": "{\"z\": 5}", + "btype": 3 + }, + { + "_id": "5543129258b9383aab07d0ff", + "x": [ + 9, + 8, + 7 + ], + "comment": "[9, 8, 7]", + "btype": 16 + }, + { + "_id": "5543129258b9383aab07d100", + "x": [ + { + "y": 4 + }, + { + "z": 5 + } + ], + "comment": "[{\"y\": 4}, {\"z\": 5}]", + "btype": 3 + }, + { + "_id": "5543129258b9383aab07d101", + "x": "YmluYXJ5", + "comment": "new MongoDB.Binary(\"binary\")", + "btype": 5 + }, + { + "_id": "5543129258b9383aab07d102", + "x": "5040dc5d40b67c681d000001", + "comment": "new MongoDB.ObjectID(\"5040dc5d40b67c681d000001\")", + "btype": 7 + }, + { + "_id": "5543129258b9383aab07d103", + "x": false, + "comment": "false", + "btype": 8 + }, + { + "_id": "5543129258b9383aab07d104", + "x": true, + "comment": "true", + "btype": 8 + }, + { + "_id": "5543129258b9383aab07d105", + "x": "2012-08-31T12:13:14.156Z", + "comment": "new Date(\"2012-08-31 12:13:14:156 UTC\")", + "btype": 9 + }, + { + "_id": "5543129258b9383aab07d106", + "x": null, + "comment": "null", + "btype": 10 + }, + { + "_id": "5543129258b9383aab07d107", + "x": {}, + "comment": "new RegExp(\"abc\")", + "btype": 11 + }, + { + "_id": "5543129258b9383aab07d108", + "x": {}, + "comment": "new RegExp(\"abc\", \"i\")", + "btype": 11 + }, + { + "_id": "5543129258b9383aab07d109", + "x": { + "$ref": "types", + "$id": "040dc5d40b67c681d000001", + "$db": "types" + }, + "comment": "new MongoDB.DBRef(\"types\", \"5040dc5d40b67c681d000001\", \"types\")", + "btype": 3 + }, + { + "_id": "5543129258b9383aab07d10a", + "x": { + "scope": {}, + "code": "function () { return 'test'; }" + }, + "comment": "new MongoDB.Code(\"function () { return ' test'; }\")", + "btype": 13 + }, + { + "_id": "5543129258b9383aab07d10b", + "x": "def15", + "comment": "new MongoDB.Symbol(\"def15\")", + "btype": 14 + }, + { + "_id": "5543129258b9383aab07d10c", + "x": { + "scope": { + "a": 4 + }, + "code": "function () { return a; }" + }, + "comment": " new MongoDB.Code(\"function () { return a; }\", {\"a\": 4})", + "btype": 15 + }, + { + "_id": "5543129258b9383aab07d10d", + "x": 123456, + "comment": "123456", + "btype": 16 + }, + { + "_id": "5543129258b9383aab07d10e", + "x": "8589934593", + "comment": "new MongoDB.Timestamp(1, 2)", + "btype": 17 + }, + { + "_id": "5543129258b9383aab07d10f", + "x": 1286608618, + "comment": "new MongoDB.Long(\"9876543210\")", + "btype": 18 + }, + { + "_id": "5543129258b9383aab07d110", + "x": { + "_bsontype": "MinKey" + }, + "comment": "new MongoDB.MinKey()", + "btype": 255 + }, + { + "_id": "5543129258b9383aab07d111", + "x": { + "_bsontype": "MaxKey" + }, + "comment": "new MongoDB.MaxKey()", + "btype": 127 + }, + { + "_id": "5543129258b9383aab07d112", + "x": null, + "comment": "undefined", + "btype": 10 + }, + { + "_id": "5543129258b9383aab07d113", + "x": null, + "comment": "Number.NaN", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d114", + "x": null, + "comment": "Infinity", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d115", + "x": null, + "comment": "Number.POSITIVE_INFINITY", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d116", + "x": null, + "comment": "Number.NEGATIVE_INFINITY", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d117", + "x": 5e-324, + "comment": "MIN_VALUE", + "btype": 1 + }, + { + "_id": "5543129258b9383aab07d118", + "x": 1.7976931348623157e+308, + "comment": "MAX_VALUE", + "btype": 1 + } +] diff --git a/index.js b/index.js index c820fe5..8af3825 100644 --- a/index.js +++ b/index.js @@ -1,23 +1 @@ -var schema_sync = require('./lib/mongodb-schema.js'); - -// async wrapper for mongodb-schema -var schema = function(documents, options, callback) { - - if (typeof options === 'function') { - callback = options; - options = {}; - } - - // only execute on next event loop iteration - process.nextTick(function() { - try { - var res = schema_sync(documents, options); - callback(null, res); - } catch ( e ) { - callback(e); - } - }); -}; - -module.exports.schema = schema; -module.exports.schema_sync = schema_sync; +var schema = module.exports = require('./lib/schema.js'); diff --git a/lib/definitions.js b/lib/definitions.js new file mode 100644 index 0000000..f5bb0f8 --- /dev/null +++ b/lib/definitions.js @@ -0,0 +1,12 @@ +module.exports = { + ESCAPE : '#', + SCHEMA : 'schema', + ARRAY : 'array', + ROOT : 'root', + VERSION : 'version', + TYPE : 'type', + COUNT : 'count', + PROB : 'prob', + UNIQUE : 'unique', + DATA : 'data' +} diff --git a/lib/mongodb-schema.js b/lib/mongodb-schema.js deleted file mode 100644 index 26a591c..0000000 --- a/lib/mongodb-schema.js +++ /dev/null @@ -1,518 +0,0 @@ -/** - * calculates schema of a collection by sampling some of the documents - * - * @param {Array} documents - * @param {Object} options currently supports these options: - * flat: true/false flatten the schema to dot-notation top-level names - * data: true/false run data sampling and return information about data - * filter: {...} only return fields/subfields that match the filter - * - * @returns {Object} the schema document with counts ($count), types ($type), - * an array flag ($array) and probability of occurrence - * given the parent field ($prob). - */ -function schema(documents, options) { - var SCHEMA_VERSION = "0.6.0"; - - /** - * right-aligned string split - * - * @param {String} str string to split - * @param {String} sep character to use for split, or null for any whitespace - * @param {Number} maxsplit maximum number of splits (from the end of the string) - * - * @returns {Array} an array with (if provided, at most maxsplit) elements - * - * @example - * // returns ["foo.bar", "baz"] - * _rsplit( "foo.bar.baz", ".", 1 ) - */ - function _rsplit(str, sep, maxsplit) { - var split = str.split(sep || /\s+/); - return maxsplit ? [ split.slice(0, -maxsplit).join(sep) ].concat(split.slice(-maxsplit)) : split; - } - - /** - * flattens an object and results in an object with only top-level properties with dot-notation - * - * @param {Object} obj object to flatten - * - * @return {Number} maxsplit maximum number of splits (from the end of the string) - * - * @example - * // returns {"a.b" 1, "a.c": false} - * _flatten( {a: {b: 1, c: false}} ) - */ - var _flatten = function(obj) { - function recursive(obj) { - var result = {}; - - for (var o in obj) { - if (!obj.hasOwnProperty(o)) continue; - if (((typeof obj[o]) === 'object') && ([$t, $d].indexOf(o) === -1)) { - var flatObject = recursive(obj[o]); - for (var x in flatObject) { - if (!flatObject.hasOwnProperty(x)) continue; - - result[o + '.' + x] = flatObject[x]; - } - } else { - result[o] = obj[o]; - } - } - return result; - } - - // first flatten completely - var flatobj = recursive(obj); - - // now fold back in $-prefixed leaves - var finalobj = {}; - - for (var f in flatobj) { - // only own properties - if (!flatobj.hasOwnProperty(f)) continue; - - if (f.indexOf('.') !== -1) { - var split = _rsplit(f, '.', 1); - if (!(split[0] in finalobj)) { - finalobj[split[0]] = {}; - } - finalobj[split[0]][split[1]] = flatobj[f]; - } else { - finalobj[f] = flatobj[f]; - } - } - - return finalobj; - }; - - - /** - * recursively infers a schema of an object, keeping track of counts and types of nested objects - * - * @mixin {Object} schema resulting schema, initially {} - * @param {Object} obj object to infer schema - * - */ - function _infer(schema, obj) { - schema[$c] = ($c in schema) ? schema[$c] + 1 : 1; - - if (!($t in schema)) { - schema[$t] = {}; - } - - // special case: ObjectId, it's an object but we don't want to reach into it - if (typeof ObjectId !== 'undefined' && obj instanceof ObjectId) { - type = 'objectid'; - schema[$t][type] = (type in schema[$t]) ? schema[$t][type] + 1 : 1; - return schema; - } - - // special case: Date (ISODate is also a Date) - if (obj instanceof Date) { - type = 'date'; - schema[$t][type] = (type in schema[$t]) ? schema[$t][type] + 1 : 1; - return schema; - } - - // special case: nulls get their own type - if (obj === null) { - type = 'null'; - schema[$t][type] = (type in schema[$t]) ? schema[$t][type] + 1 : 1; - return schema; - } - - var type = typeof obj; - schema[$t][type] = (type in schema[$t]) ? schema[$t][type] + 1 : 1; - - if (obj && typeof obj == 'object') { - - Object.keys(obj).forEach(function(key) { - var val = obj[key]; - if (!(key in schema)) { - schema[key] = {}; - } - - if (val instanceof Array) { - // special case: lists collapse here - val.forEach(function (el) { - // create n fake documents with single value - var doc = {}; - doc[key] = el; - _infer(schema, doc); - }); - // subtract n from total count - schema[$c] -= val.length; - schema[key][$a] = true; - // no need to infer data, has happened during collapsing already - return; - } else { - // objects need to be handled recursively - _infer(schema[key], val) - } - - // handle data inference - if (options.data && (Object.keys(schema[key][$t]).length === 1)) { - if (!($d in schema[key])) { - schema[key][$d] = {}; - } - var d = schema[key][$d]; - switch (typeof val) { - // numbers, calculate min and max - case 'number': - if (!('min' in d)) d['min'] = Infinity; - if (!('max' in d)) d['max'] = -Infinity; - d['min'] = (val < d['min']) ? val : d['min']; - d['max'] = (val > d['max']) ? val : d['max']; - break; - // strings, collect histogram - case 'string': - if (val in d) { - d[val]++; - } else { - if (Object.keys(d).length < options.data.maxCardinality) { - d[val] = 1; - } else { - d[$o] = $o in d ? d[$o] + 1 : 1; - } - } - break; - case 'object': - // dates, calculate min and max date - if (val instanceof Date) { - if (!('min' in d)) d['min'] = new Date(100000000*86400000); - if (!('max' in d)) d['max'] = new Date(-100000000*86400000); - d['min'] = (val.getTime() < d['min'].getTime()) ? val : d['min']; - d['max'] = (val.getTime() > d['max'].getTime()) ? val : d['max']; - } - break; - } - } - }); - - } - return schema; - } - - /** - * clean up the output of _infer, collapsing single types and calculating - * probabilities (stored in "$p" field) - * - * @param {Object} schema - * @param {Number} count keep track of count in recursive calls - * - * @returns {Object} cleaned up schema - */ - function _cleanup(schema, count) { - if (typeof schema !== 'object') { - return schema; - } - - if (schema[$t] !== undefined) { - var type_keys = Object.keys(schema[$t]); - if (type_keys.length === 1) { - schema[$t] = type_keys[0]; - } - } - - if (schema[$c] !== undefined) { - if (count) { - schema[$p] = schema[$c] / count; - } - count = schema[$c]; - } - - if (schema[$d] !== undefined) { - // remove data for inner nodes - if (!($t in schema)) { - delete schema[$d]; - } - // remove mixed data - if (typeof schema[$t] === 'object') { - delete schema[$d]; - } - - // remove boolean data - if (schema[$t] === 'boolean') { - delete schema[$d]; - } - - // remove null data - if (schema[$t] === 'null') { - delete schema[$d]; - } - - // remove unique strings - if (schema[$t] === 'string') { - // check for uniqueness - var values = Object.keys( schema[$d] ).map(function ( key ) { return schema[$d][key]; }); - var maxCount = Math.max.apply( null, values ); - if (maxCount === 1 && values.length > 1) { - schema[$t] = 'text'; - delete schema[$d]; - } else { - schema[$t] = 'category'; - } - } - } - - // recursive call for each property - Object.keys(schema).forEach(function (key) { - if (key === '__schema') return; - _cleanup(schema[key], count); - }); - - return schema; - } - - - function _uncleanup(schema) { - if (typeof schema !== 'object') { - return schema; - } - - // nest single type under {$type: ...} - if (schema[$t] !== undefined) { - if (typeof schema[$t] !== 'object') { - var obj = {}; - obj[schema[$t]] = schema[$c]; - schema[$t] = obj; - } - } - - // combine text/category to string - if (schema[$t] !== undefined) { - var string_sum = (schema[$t].text || 0) + (schema[$t].category || 0); - if (string_sum > 0) { - if ('text' in schema[$t]) delete schema[$t].text; - if ('category' in schema[$t]) delete schema[$t].category; - schema[$t].string = string_sum; - } - } - - // remove $prop - if (schema[$p] !== undefined) { - delete schema[$p]; - } - - // recursive call for each property - Object.keys(schema).forEach(function (key) { - if (key === '__schema') return; - _uncleanup(schema[key]); - }); - - return schema; - } - - function _getObjectValues(obj) { - var values = Object.keys(obj).map(function (key) { - return obj[key]; - }); - return values; - } - - /** - * merges the attributes and values from obj into the defaults object - * and returns the result. - * - * @param {Obeject} defaults - * @param {Object} obj - * - * @returns {Object} merged object - */ - function _mergeDefaults(defaults, obj) { - for (var key in obj) { - if (!obj.hasOwnProperty(key)) { - continue; - } - defaults[key] = obj[key]; - } - return defaults; - } - - /** - * filter leaf nodes of the schema based on a schema filter document, - * only return the matching ones. - * - * @param {Object} schema - * @param {Number} filter_obj - * - * @returns {Object} filtered schema - */ - function _filter(schema, filter_obj) { - - if (typeof schema !== 'object') { - return false; - } - - // only filter leaves, skip internal nodes - var isLeaf = Object.keys(schema).every(function (key) { - // ignore special keys - if (metavar_names.indexOf(key) !== -1) { - return true; - } - return (typeof schema[key] !== 'object'); - }); - - if (isLeaf) { - for (fk in filter_obj) { - if (!(fk in schema) || (schema[fk] != filter_obj[fk])) { - return false; - } - } - return true; - } - - // recursive call for each property - var matchChildren = Object.keys(schema) - - .filter(function(key) { - return (metavar_names.indexOf(key) === -1); - }) - - .map(function (key) { - var res = _filter(schema[key], filter_obj); - if (!res) { - delete schema[key]; - } - return res; - }); - - if (!matchChildren.some( function (d) {return d;} )) { - return false; - } else { - return true; - } - } - - // define defaults - var options = options || {}; - options.raw = options.raw || false; - options.flat = options.flat === false ? false : true; - options.data = options.data || false; - options.filter = options.filter || null; - options.merge = options.merge || false; - options.metavars = _mergeDefaults({ - prefix: '#', - count: 'count', - type: 'type', - data: 'data', - array: 'array', - prob: 'prob', - other: 'other' - }, options.metavars); - - var metavar_names = _getObjectValues(options.metavars); - - // remap options.metavars - var $c = options.metavars.prefix + options.metavars.count, - $t = options.metavars.prefix + options.metavars.type, - $d = options.metavars.prefix + options.metavars.data, - $a = options.metavars.prefix + options.metavars.array, - $p = options.metavars.prefix + options.metavars.prob, - $o = options.metavars.prefix + options.metavars.other; - - // nested options.data - if (options.data) { - if (typeof options.data !== 'object') { - options.data = {}; - } - options.data.maxCardinality = options.data.maxCardinality || 100; - } - - // infer schema of each document - if (options.raw) { - var schema = options.merge.raw_schema || {}; - } else { - var schema = options.merge ? _uncleanup(options.merge) : {}; - } - - // add schema information - if (schema['__schema'] !== undefined) { - // stop if incompatible versions - var sver = schema['__schema'].version.split('.'); - var myver = SCHEMA_VERSION.split('.'); - if ((sver[0] != myver[0]) || (sver[0] === 0 && (sver[1] != myver[1]))) { - throw Error('cannot merge schema, version incompatible'); - } - } - - documents.forEach(function (doc) { - schema = _infer(schema, doc); - }); - - // clean up schema if not in raw mode - if (!options.raw) { - schema = _cleanup(schema); - // always delete outermost #type - delete schema[$t]; - } - - // return deep or flat version - if (options.flat) { - schema = _flatten(schema); - } - - // filter schema - if (options.filter !== null) { - _filter(schema, options.filter); - } - - // if merge option set, replace with `true` to avoid circular reference - if (options.merge) { - options.merge = true; - } - - // add schema version and options - schema['__schema'] = { - version: SCHEMA_VERSION, - options: options - } - - - if (options.raw) { - // piggyback cleanup function on raw output - return { - raw_schema: schema, - cleanup: function() { - return _cleanup(schema); - } - } - } - - return schema; -} - -/** - * extend the DBCollection object to provide the .schema() method - * - * @param {Object} options supports two options: {samples: 123, flat: true} - * - * @returns {Object} the schema document with counts ($c), types ($t), - * an array flag ($a) and probability of occurrence - * given the parent field ($p). - */ -if (typeof DBCollection !== 'undefined') { - DBCollection.prototype.schema = function(options) { - - // default options - var options = options || {}; - options.samples = options.samples || 100; - - // limit of 0 means all documents - if (options.samples === 'all') { - options.samples = 0; - } - - // get documents - var cursor = this.find({}, null, options.samples /* limit */, 0 /* skip*/, 0 /* batchSize */); - - return schema(cursor, options); - } -} - -// export for node.js if module is defined -if (typeof module !== 'undefined') { - module.exports = schema; -} \ No newline at end of file diff --git a/lib/schema.js b/lib/schema.js new file mode 100644 index 0000000..314b2bb --- /dev/null +++ b/lib/schema.js @@ -0,0 +1,91 @@ +var pkg = require('../package.json'), + find = require('lodash.find'), + defs = require('./definitions'); + + + +/** + * return the bson type of `value` + * @param {any} value value to get the type for + * @return {number} bson type as decimal number + */ +function _getType(value) { + +} + + + +/** + * analyse property and integrate it into the schema + * @param {array} documents array of sample documents to integrate into schema + * @return {object} resulting schema + */ +function _infer(schema, name, value) { + // create schema member if not present yet + + if (!(name in schema)) { + schema[name] = {}; + schema[name][defs.ESCAPE + defs.SCHEMA] = []; + } + + // get type of `value` + var type = _getType(value); + + // find schema array element for correct type or create one + // @review should this be an object rather than array? at least while building it? + var type_obj = find(schema[name], function (el) { + return el[defs.TYPE] === + }); + + // increase counts, add data + + +} + + + +/** + * main schema function + * @param {array} documents array of sample documents to integrate into schema + * @return {object} resulting schema + */ +module.exports = function(documents) { + + var schema = {}; + + // add root tag and version + var root = defs.ESCAPE + defs.ROOT; + schema[root] = {}; + schema[root][defs.VERSION] = pkg.version; + schema[root][defs.COUNT] = 0; + + // ensure `documents` is array or undefined + if (documents === undefined) { + documents = []; + } + if (!(documents instanceof Array)) { + throw new TypeError('`documents` must be an array.'); + } + + // walk all documents + documents.forEach(function (doc) { + + // increase global counter + schema[root][defs.COUNT] += 1; + + for (var name in doc) { + if (!doc.hasOwnProperty(name)) continue; + + // process this property + _infer(schema, name, doc[name]); + + + + } + + }); + + console.log(JSON.stringify(schema, null, 2)); + return schema; +} + diff --git a/package.json b/package.json index 75385d9..16b37d9 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer probabilistic schema of javascript objects or a MongoDB collection.", - "version": "0.6.0", + "version": "0.7.0", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", @@ -21,8 +21,14 @@ "schema" ], "dependencies": { + "lodash.find": "^3.2.0", + "mongodb": "^2.0.28" }, "devDependencies": { "mocha": "^2.0.1" + }, + "directories": { + "doc": "docs", + "test": "test" } } diff --git a/scripts/read_types.js b/scripts/read_types.js new file mode 100644 index 0000000..73dca0b --- /dev/null +++ b/scripts/read_types.js @@ -0,0 +1,22 @@ +// script to read documents from the test.types collection created by `write_types.js` + +var MongoClient = require('mongodb').MongoClient, + assert = require('assert'); + +var url = 'mongodb://localhost:27017/test'; + +MongoClient.connect(url, function(err, db) { + assert.equal(null, err); + + // Get the documents collection + var collection = db.collection('types'); + + // find all documents and print them out + collection.find({}).toArray(function(err, docs) { + assert.equal(err, null); + + console.log(JSON.stringify(docs, null, 2)); + db.close(); + + }); +}); diff --git a/scripts/write_types.js b/scripts/write_types.js new file mode 100644 index 0000000..783c5e0 --- /dev/null +++ b/scripts/write_types.js @@ -0,0 +1,166 @@ +// adapted from: https://realprogrammer.wordpress.com/2013/02/10/mongodb-and-node-js-part-1-list-of-documents-for-all-types/ + +// writes documents of each bson type to a MongoDB instance running at localhost:27027 in the test.types collection + +var MongoDB = require('mongodb'); + +/* + Type codes + ========== + 1 "\x01" e_name double Floating point + 2 "\x02" e_name string UTF-8 string + 3 "\x03" e_name document Embedded document + 4 "\x04" e_name document Array + 5 "\x05" e_name binary Binary data + + 7 "\x07" e_name (byte*12) ObjectId + 8 "\x08" e_name "\x00" Boolean "false" + 8 "\x08" e_name "\x01" Boolean "true" + 9 "\x09" e_name int64 UTC datetime + 10 "\x0A" e_name Null value + 11 "\x0B" e_name cstring cstring Regular expression + + 13 "\x0D" e_name string JavaScript code + + 15 "\x0F" e_name code_w_s JavaScript code w/ scope + 16 "\x10" e_name int32 32-bit Integer + 17 "\x11" e_name int64 Timestamp + 18 "\x12" e_name int64 64-bit integer + 255 "\xFF" e_name Min key + 127 "\x7F" e_name Max key + + Deprecated type codes + ===================== + 6 "\x06" e_name Undefined — Deprecated + 12 "\x0C" e_name string (byte*12) DBPointer — Deprecated + 14 "\x0E" e_name string Symbol — Deprecated + + */ + +var typeDocuments; + +typeDocuments = [ + {"x": new MongoDB.Double(123.123), + "comment": "new MongoDB.Double(123.123)", + "btype": 1}, + {"x": 456.456, + "comment": "456.456", + "btype": 1}, + {"x": "abc", + "comment": "abc", + "btype": 2}, + {"x": {"z": 5}, + "comment": "{\"z\": 5}", + "btype": 3}, + // this is not type:4 + {"x": [9, 8, 7], + "comment": "[9, 8, 7]", + "btype": 16}, + {"x": [ + {"y": 4}, + {"z": 5} + ], "comment": "[{\"y\": 4}, {\"z\": 5}]", + "btype": 3}, + {"x": new MongoDB.Binary("binary"), + "comment": "new MongoDB.Binary(\"binary\")", + "btype": 5}, + // t:6 deprecated (was 'undefined') - not implemented + {"x": new MongoDB.ObjectID("5040dc5d40b67c681d000001"), + "comment": "new MongoDB.ObjectID(\"5040dc5d40b67c681d000001\")", + "btype": 7}, + {"x": false, + "comment": "false", + "btype": 8}, + {"x": true, + "comment": "true", + "btype": 8}, + {"x": new Date("2012-08-31 12:13:14:156 UTC"), + "comment": "new Date(\"2012-08-31 12:13:14:156 UTC\")", + "btype": 9}, + {"x": null, + "comment": "null", + "btype": 10}, + {"x": new RegExp("abc"), + "comment": "new RegExp(\"abc\")", + "btype": 11}, + {"x": new RegExp("abc", "i"), + "comment": "new RegExp(\"abc\", \"i\")", + "btype": 11}, + // t:12 DBRef deprecated - still implemented + // this is not type:12 + {"x": new MongoDB.DBRef("types", "040dc5d40b67c681d000001", "types"), + "comment": "new MongoDB.DBRef(\"types\", \"5040dc5d40b67c681d000001\", \"types\")", + "btype": 3}, + {"x": new MongoDB.Code("function () { return 'test'; }"), + "comment": "new MongoDB.Code(\"function () { return ' test'; }\")", + "btype": 13}, + // t:14 Symbol deprecated - still implemented + {"x": new MongoDB.Symbol("def15"), + "comment": "new MongoDB.Symbol(\"def15\")", + "btype": 14}, + {"x": new MongoDB.Code("function () { return a; }", {"a": 4}), + "comment": " new MongoDB.Code(\"function () { return a; }\", {\"a\": 4})", + "btype": 15}, + {"x": 123456, + "comment": "123456", + "btype": 16}, + {"x": new MongoDB.Timestamp(1, 2), + "comment": "new MongoDB.Timestamp(1, 2)", + "btype": 17}, + {"x": new MongoDB.Long("9876543210"), + "comment": "new MongoDB.Long(\"9876543210\")", + "btype": 18}, + {"x": new MongoDB.MinKey(), + "comment": "new MongoDB.MinKey()", + "btype": 255}, + {"x": new MongoDB.MaxKey(), + "comment": "new MongoDB.MaxKey()", + "btype": 127}, + // ADDITIONAL POSSIBLE VALUES + // 'undefined' will be converted to 'null'; type will be 'null' (aka 10) also + {"x": undefined, + "comment": "undefined", + "btype": 10}, + {"x": Number.NaN, + "comment": "Number.NaN", + "btype": 1}, + {"x": Infinity, + "comment": "Infinity", + "btype": 1}, + {"x": Number.POSITIVE_INFINITY, + "comment": "Number.POSITIVE_INFINITY", + "btype": 1}, + {"x": Number.NEGATIVE_INFINITY, + "comment": "Number.NEGATIVE_INFINITY", + "btype": 1}, + {"x": Number.MIN_VALUE, + "comment": "MIN_VALUE", + "btype": 1}, + {"x": Number.MAX_VALUE, + "comment": "MAX_VALUE", + "btype": 1} +]; + +var Db = MongoDB.Db, + Server = MongoDB.Server; +var db = new Db('test', new Server("127.0.0.1", 27017, + {auto_reconnect: false, poolSize: 4}), {native_parser: false, safe: false}); + +db.open(function (err, db) { + "use strict"; + db.dropCollection("types", function (err, result) { + if (err) { + console.log(err.toString()); + } + console.log("dropped collection"); + db.collection("types", function (err, collection) { + collection.insert(typeDocuments, {safe: true}, function (err, res) { + if (err) { + console.log(err.toString()); + } + console.log("inserted all types into test.types"); + db.close(); + }); + }); + }); +}); diff --git a/test/test.js b/test/test.js index 4b323ab..80752dd 100644 --- a/test/test.js +++ b/test/test.js @@ -1,349 +1,34 @@ -var schema = require('../').schema, - schema_sync = require('../').schema_sync; - -var assert = require('assert'); - +var schema = require('../lib/schema'), + defs = require('../lib/definitions'), + assert = require('assert'), + allTypes = require('../fixtures/all_types'), + pkg = require('../package.json'); describe('mongodb-schema', function() { - it('should import correctly', function () { - assert.ok(schema); - assert.ok(schema_sync); - }); - describe ('schema_sync', function() { - - it('should work with a simple document', function () { - var result = schema_sync([{a: 1, b: 1}]); - var expected = { - "#count":1, - "a": { - "#count": 1, - "#type": "number", - "#prob": 1 - }, - "b": { - "#count": 1, - "#type": "number", - "#prob": 1 - } - }; - assert.deepEqual(result['#count'], 1); - assert.deepEqual(result.a, expected.a); - assert.deepEqual(result.b, expected.b); - }); - - it('should work with an empty list of documents', function () { - var result = schema_sync([]); - assert.equal(Object.keys(result).length, 1); - assert.equal(Object.keys(result)[0], '__schema'); - }); - - it('should correctly parse nested documents', function () { - var result = schema_sync([ - {a: {b: 1}}, - {a: {b: {c: 2}}}, - ], {flat: false}); - - var expected = { - "#count": 2, - "a": { - "#count": 2, - "#type": "object", - "b": { - "#count": 2, - "#type": { - "number": 1, - "object": 1 - }, - "c": { - "#count": 1, - "#type": "number", - "#prob": 0.5 - }, - "#prob": 1 - }, - "#prob": 1 - } - }; - delete result['__schema']; - - assert.deepEqual(result, expected); - }); - - it('should collapse arrays, set the #array flag and increase #count numbers', function () { - var result = schema_sync([ - {a: [1, 2, 3, 4]}, - {a: [5, 6]} - ]); - - var expected = { - "#count": 2, - "a": { - "#count": 6, - "#type": "number", - "#array": true, - "#prob": 3 - } - }; - - assert.deepEqual(result.a, expected.a); - }); - - it('should not break with empty arrays for data inference', function () { - var result = schema_sync([ - {a: []} - ], {data: true}); - }); - - it('should infer data for collapsed arrays', function () { - var result = schema_sync([ - {a: [1, 2, 3, 4]}, - {a: [5, 6]} - ], {data: true}); - - var expected = { - min: 1, - max: 6 - }; - - assert.deepEqual(result.a['#data'], expected); - }); - - it('should accept an existing schema and merge with new data', function () { - var result = schema_sync([ - {a: 1} - ], {data: true}); - - result = schema_sync([ - {a: 2} - ], {data: true, merge: result}); - - assert.equal(result['#count'], 2); - assert.equal(result.a['#count'], 2); - assert.deepEqual(result.a['#data'], {"min": 1, "max": 2}); - - }); - - it('should merge existing text/category data correctly with new strings', function() { - var result = schema_sync([ - {a: "foo"}, {a: "foo"}, {a: "bar"}, {a: "bar"} - ], {data: true}); - - result = schema_sync([ - {a: "foo"}, {a: "bar"} - ], {data: true, merge: result}); - - assert.deepEqual(result.a['#type'], "category"); - - var result = schema_sync([ - {a: "1"}, {a: "2"}, {a: "3"}, {a: "4"} - ], {data: true}); - - result = schema_sync([ - {a: "5"}, {a: "6"} - ], {data: true, merge: result}); - - assert.deepEqual(result.a['#type'], "text"); - - }); - - it('should flatten the schema with the {flat: true} option', function () { - var result = schema_sync([ - {a: {b: 1}}, - {a: 2} - ], {flat: true}); - - assert(result['a'] !== undefined); - assert(result['a.b'] !== undefined); - assert(result['a']['b'] === undefined); - }); - - var docs = [ - { - a: "foo", - b: [1, 2, 3], - c: true, - d: new Date(2014, 1, 1), - e: null, - f: "foo" - }, - { - a: "bar", - b: 2, - c: false, - d: new Date(2013, 1, 1), - e: null, - f: "foo" - }, - { - a: "baz", - b: 8, - c: false, - d: new Date(2012, 1, 1), - e: null, - f: "bar" - } - ]; - - it('should infer types correctly (not testing ObjectId currently)', function () { - var result = schema_sync(docs); - - assert.equal(result.a['#type'], 'string'); - assert.equal(result.b['#type'], 'number'); - assert.equal(result.c['#type'], 'boolean'); - assert.equal(result.d['#type'], 'date'); - assert.equal(result.e['#type'], 'null'); - assert.equal(result.f['#type'], 'string'); - }); - - it('should distinguish `text` and `category` types when using {data: true}', function () { - var result = schema_sync(docs, {data: true}); - - assert.equal(result.a['#type'], 'text'); - assert.equal(result.b['#type'], 'number'); - assert.equal(result.c['#type'], 'boolean'); - assert.equal(result.d['#type'], 'date'); - assert.equal(result.e['#type'], 'null'); - assert.equal(result.f['#type'], 'category'); - }); - - it('should calculate bounds for date/number and histograms for category', function () { - var result = schema_sync(docs, {data: true}); - - assert.ok( !('#data' in result.a) ); - assert.ok( !('#data' in result.c) ); - assert.ok( !('#data' in result.e) ); - - assert.deepEqual(result.b['#data'], { - min: 1, - max: 8 - }); - assert.deepEqual(result.d['#data'], { - min: new Date(2012, 1, 1), - max: new Date(2014, 1, 1) - }); - assert.deepEqual(result.f['#data'], { - foo: 2, - bar: 1 - }); - }); - - it('should track #data for arrays', function () { - var result = schema_sync([ - {a: ["foo", "foo", "bar"]}, - {a: ["bar", "baz", "foo"]} - ], {data: true}); - - var expected = { - "foo": 3, - "bar": 2, - "baz": 1 - } - - assert.deepEqual(result.a['#data'], expected); - }); - - it('should not infer data for mixed numbers and dates', function () { - var result = schema_sync([ - {a: 1}, - {a: 5}, - {a: new Date(2014, 08, 20)} - ], {data: true}); - - assert.ok( !('#data' in result.a) ); - }); - - it('should handle mixed types for arrays', function () { - var result = schema_sync([ - {a: ["foo", 1, "bar"]}, - {a: ["bar", null, new Date(2014, 1, 1)]} - ], {data: true}); - - var expected = { - "string": 3, - "number": 1, - "date": 1, - "null": 1 - }; - - assert.deepEqual(result.a['#type'], expected); - }); - - it('should count sub-fields correctly', function () { - var result = schema_sync([ - {a: 1, b: 1}, - {a: 1, c: 1}, - {a: 1, b: 1} - ]); - - assert.equal(result['#count'], 3); - assert.equal(result.a['#count'], 3); - assert.equal(result.b['#count'], 2); - assert.equal(result.c['#count'], 1); - }); - - it('should return a type histogram for mixed types', function () { - var result = schema_sync([ - {a: 1}, {a: "1"}, {a: null}, {a: "1"}, {a: {b: 1}}, {a: 1}, {a: "1"} - ]); - - var expected = { - "number": 2, - "string": 3, - "null": 1, - "object": 1 - }; - assert.deepEqual(result.a['#type'], expected); - }); - - it('should let you change the meta-variable names', function () { - var result = schema_sync([ - {a: "a"}, {a: "a"}, {a: "b"}, {a: ["c"]}, {a: "d"}, {a: "e"}, {a: "f"} - ], { - data: { maxCardinality: 3 }, - metavars: { prefix: '#', other: 'rest', count: 'num', prob: 'p' } - }); - - assert(result.a['#num'] !== undefined); - assert(result.a['#type'] !== undefined); - assert(result.a['#p'] !== undefined); - assert(result.a['#array'] !== undefined); - assert(result.a['#data']['#rest'] !== undefined); - }); - - - it('should work with raw mode and output the same final result', function () { - var result = schema_sync([{a:1}], {data: true, raw: true}); - var result = schema_sync([{a:2}], {data: true, raw: true, merge: result}).cleanup(); - var expected = schema_sync([{a:1}, {a:2}], {data: true}); - - assert.deepEqual(result.a, expected.a); - }); - - it('should collect categories in #other when maxCardinality is reached', function () { - var result = schema_sync([ - {a: "a"}, {a: "a"}, {a: "b"}, {a: "c"}, {a: "d"}, {a: "e"}, {a: "f"} - ], {data: {maxCardinality: 3}}); - - assert.ok('#other' in result.a['#data']); - }); - - }); - - describe ('schema', function() { - it('should work asynchronously and not return an error', function (done) { - schema([{a:1, b:1}, {a:2, b:2}], {}, function (err, result) { - if (err) throw err; - assert.equal(err, null); - assert.equal(result['#count'], 2); - done(); - }); - }); - - it('should return an error if `documents` is not an array', function (done) { - schema(1, {}, function (err, result) { - assert.ok(err); - done(); - }); - }); - }); + var root = defs.ESCAPE + defs.ROOT; + + it('should import correctly', function () { + assert.ok(schema); + }); + + it('should have a root object with the correct version', function () { + var result = schema(); + assert.ok(result[root] !== undefined); + assert.equal(result[root][defs.VERSION], pkg.version); + }); + + it('should have 0 count without any documents', function () { + var result = schema([]); + assert.equal(result[root][defs.COUNT], 0); + }); + + it('should throw an error if documents is not an array or undefined', function () { + assert.throws(function () { schema("i'm not an array") }, TypeError); + assert.doesNotThrow(function () { schema() }); + }); + + it('should parse all types without error', function () { + assert.ok(schema(allTypes)); + }); }); + From e01af8394fc44731e4229ca7798f1ddfae9515c3 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Fri, 1 May 2015 18:38:44 +1000 Subject: [PATCH 06/79] can't use JSON.stringify and therefore fixtures --- lib/schema.js | 6 +++--- scripts/read_types.js | 9 +++++---- test/test.js | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index 314b2bb..b1beccc 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -10,7 +10,8 @@ var pkg = require('../package.json'), * @return {number} bson type as decimal number */ function _getType(value) { - + // @todo return correct type + return 0; } @@ -34,7 +35,7 @@ function _infer(schema, name, value) { // find schema array element for correct type or create one // @review should this be an object rather than array? at least while building it? var type_obj = find(schema[name], function (el) { - return el[defs.TYPE] === + return el[defs.TYPE] === type; }); // increase counts, add data @@ -85,7 +86,6 @@ module.exports = function(documents) { }); - console.log(JSON.stringify(schema, null, 2)); return schema; } diff --git a/scripts/read_types.js b/scripts/read_types.js index 73dca0b..8ed1377 100644 --- a/scripts/read_types.js +++ b/scripts/read_types.js @@ -6,17 +6,18 @@ var MongoClient = require('mongodb').MongoClient, var url = 'mongodb://localhost:27017/test'; MongoClient.connect(url, function(err, db) { - assert.equal(null, err); + assert.ifError(err); // Get the documents collection var collection = db.collection('types'); // find all documents and print them out collection.find({}).toArray(function(err, docs) { - assert.equal(err, null); + assert.ifError(err); - console.log(JSON.stringify(docs, null, 2)); + // don't use JSON.stringify here, it loses a lot of information like _bsontype + console.log(docs); db.close(); - }); }); + diff --git a/test/test.js b/test/test.js index 80752dd..cc22f12 100644 --- a/test/test.js +++ b/test/test.js @@ -27,8 +27,8 @@ describe('mongodb-schema', function() { assert.doesNotThrow(function () { schema() }); }); - it('should parse all types without error', function () { - assert.ok(schema(allTypes)); + it('should parse documents of all types without error', function () { + assert.ok( schema(allTypes) ); }); }); From b1896ee2f198a8a789b765a5c71873daf57ebae7 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Sun, 3 May 2015 14:11:40 +1000 Subject: [PATCH 07/79] added type detection and rudimentary type_obj also some _bsontype shenanigans. --- lib/schema.js | 90 +++++++++++++++++++++++++++++++++++++++++++++------ package.json | 3 ++ test/test.js | 6 ++++ 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index b1beccc..94b3d39 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,7 +1,22 @@ var pkg = require('../package.json'), find = require('lodash.find'), - defs = require('./definitions'); - + defs = require('./definitions'), + BSON = require('bson'), + isInteger = require('is-integer'), + debug = require('debug')('schema:main'); + + +// these types have a _bsontype property +var bsontypeMap = { + 'ObjectID': 7, + 'Long': 18, + 'MinKey': 255, + 'MaxKey': 127, + 'Code': 15, // no differentiation to 13 + 'Binary': 5, + 'DBRef': 12, + 'Timestamp': 17 +} /** @@ -10,8 +25,41 @@ var pkg = require('../package.json'), * @return {number} bson type as decimal number */ function _getType(value) { - // @todo return correct type - return 0; + if (typeof value === 'number') { + // could be int (16) or float (1) + return isInteger(value) ? 16 : 1; + } + + if (typeof value === 'string') { + // could be symbol (14, deprecated) or string (2), assume string + return 2; + } + + if (typeof value === 'boolean') { + return 8; + } + + if (value === null) { + return 10; + } + + if (typeof value === 'object') { + // could be embedded document (3), array (4), binary (5), objectid (7), + // datetime (9), regular expression (11), dbref (12), code (13), + // code with scope (15), timestamp (17), minkey (255), maxkey (127). + + if (value.hasOwnProperty('_bsontype')) { + // dbref, binary, code, code with scope, timestamp, maxkey, minkey + return bsontypeMap[value._bsontype]; + } + + if (value instanceof BSON.ObjectId) { + return 7; + } + + return 3; + } + } @@ -22,25 +70,47 @@ function _getType(value) { * @return {object} resulting schema */ function _infer(schema, name, value) { - // create schema member if not present yet + // create schema member if not present yet if (!(name in schema)) { schema[name] = {}; schema[name][defs.ESCAPE + defs.SCHEMA] = []; } + var tag = schema[name][defs.ESCAPE + defs.SCHEMA]; // get type of `value` - var type = _getType(value); + var bsontype = _getType(value); // find schema array element for correct type or create one - // @review should this be an object rather than array? at least while building it? - var type_obj = find(schema[name], function (el) { - return el[defs.TYPE] === type; + // @review should this be an object rather than array? at least while building the schema? + var type_obj = find(tag, function (el) { + return el[defs.TYPE] === bsontype; }); - // increase counts, add data + if (!type_obj) { + // not found, create one + type_obj = {}; + type_obj[defs.TYPE] = bsontype; + type_obj[defs.COUNT] = 0; + type_obj[defs.UNIQUE] = true; + type_obj[defs.DATA] = {}; + + tag.push(type_obj); + } + + // increase counts, add data, check uniqueness + type_obj[defs.COUNT] += 1; + // @todo add data, verify still unique + // special handling for arrays (type 4) + // recursive call for nested documents (type 3) + if (bsontype === 3) { + for (var subname in value) { + if (!value.hasOwnProperty(subname)) continue; + _infer(schema[name], subname, value[subname]); + } + } } diff --git a/package.json b/package.json index 16b37d9..40c9525 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,9 @@ "schema" ], "dependencies": { + "bson": "^0.3.1", + "debug": "^2.1.3", + "is-integer": "^1.0.4", "lodash.find": "^3.2.0", "mongodb": "^2.0.28" }, diff --git a/test/test.js b/test/test.js index cc22f12..7a62159 100644 --- a/test/test.js +++ b/test/test.js @@ -2,6 +2,7 @@ var schema = require('../lib/schema'), defs = require('../lib/definitions'), assert = require('assert'), allTypes = require('../fixtures/all_types'), + BSON = require('bson'), pkg = require('../package.json'); describe('mongodb-schema', function() { @@ -30,5 +31,10 @@ describe('mongodb-schema', function() { it('should parse documents of all types without error', function () { assert.ok( schema(allTypes) ); }); + + it('should create the correct type objects inside #schema tag', function () { + var result = schema([ {a: "foo"}, {a: 1, b: {c: BSON.ObjectId() }} ]); + console.log(JSON.stringify(result, null, 2)); + }) }); From 2d602360a5803bc2fda050e615ae30106061886f Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Sun, 3 May 2015 14:12:04 +1000 Subject: [PATCH 08/79] disable raw mode. print x, btype, comment only. --- scripts/read_types.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/read_types.js b/scripts/read_types.js index 8ed1377..f10a830 100644 --- a/scripts/read_types.js +++ b/scripts/read_types.js @@ -1,22 +1,28 @@ // script to read documents from the test.types collection created by `write_types.js` +var BSON = require('bson').BSONPure.BSON; var MongoClient = require('mongodb').MongoClient, assert = require('assert'); var url = 'mongodb://localhost:27017/test'; + MongoClient.connect(url, function(err, db) { assert.ifError(err); // Get the documents collection - var collection = db.collection('types'); + var collection = db.collection('types', {raw: false}); // find all documents and print them out - collection.find({}).toArray(function(err, docs) { + collection.find({}).sort({btype:1}).toArray(function(err, docs) { assert.ifError(err); // don't use JSON.stringify here, it loses a lot of information like _bsontype - console.log(docs); + docs.forEach(function (doc) { + console.log(doc.x, doc.btype, doc.comment); + console.log() + // console.log(BSON.deserialize(doc)); + }); db.close(); }); }); From faf8cd5ecff898ccef862f9974ef95f087dcbeb2 Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Sun, 3 May 2015 14:18:13 +1000 Subject: [PATCH 09/79] copied spec to README doc for this branch. --- README.md | 821 ++++++++++++++++++++++++------------------------------ 1 file changed, 370 insertions(+), 451 deletions(-) diff --git a/README.md b/README.md index 7e3122e..51c5d69 100644 --- a/README.md +++ b/README.md @@ -3,469 +3,388 @@ mongodb-schema Infer probabilistic schema of javascript objects or a MongoDB collection. -This package is dual-purpose. It serves as a [node.js module](#usage-with-nodejs) and can also be used in the [MongoDB](#usage-with-mongodb) shell directly, where it extends the `DBCollection` shell object. - -_mongodb-schema_ is an early prototype. Use at your own risk. - -##### Upgrade Warning -Version 0.5.x has significant changes in the schema format compared to 0.4.x and may break backwards-compatibility. Upgrade with caution. +**Warning**: This is a complete rewrite of the current master branch (0.6.0) and will introduce significant differences. -
+## Specification -## Usage with Node.js +Author: Matt Kangas, Thomas Rueckstiess
+Last Revised: 2015-04-29
+Status: Draft
-### Installation -Install the script with: +### 0. Terminology -``` -npm install mongodb-schema -``` +Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON taxonomy ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. -### Usage +> ##### Example -Load the module and use call `schema( documents, options, callback )`, which will call `callback(err, res)` with an error or the result once it's done analysing the documents. +> An object with 2 members. The name of the first member is `foo` and the name of the second member is `bar`. Both member values are 1. -```js -var schema = require('mongodb-schema'); +> {"foo": 1, "bar": 1} -// define some documents -var documents = [ - {a: 1}, - {a: {b: "hello"}} -]; -// call with options and callback function -schema( documents, {flat: false}, function (err, res) { - if (err) return console.error( err ); - console.log( JSON.stringify( res, null, "\t" ) ); -}) -``` -This would output: -```json -{ - "#count": 2, - "a": { - "#count": 2, - "#type": { - "number": 1, - "object": 1 - }, - "b": { - "#count": 1, - "#type": "string", - "#prob": 0.5 - }, - "#prob": 1 - }, - "__schema": { - "version": "0.5.0", - "options": { - "flat": false, - "raw": false, - "data": false, - "filter": null, - "merge": false, - "metavars": { - "prefix": "#", - "count": "count", - "type": "type", - "data": "data", - "array": "array", - "prob": "prob", - "other": "other" - } - } - } -} -``` - -
- -## Usage with MongoDB - -### Installation - -There are two ways to load the script, one-time (if you just want to test how it works) and permanent (for frequent use). - -#### 1. Load the script directly (one-time usage) - -Call the `mongo` shell this command: - -``` -mongo /lib/mongodb-schema.js --shell -``` - -It will first load `mongodb-schema.js` and the open the shell as usual. You will have to add the script every time you open the shell. Replace the `` part with the actual path where the `mongodb-schema` folder is located. - -#### 2. Load the script via the `.mongorc.js` file (permanent usage) - -You can also add the following line to your `~/.mongorc.js` file to always load the file on shell startup (unless started with `--norc`): - -```js -load('/lib/mongodb-schema.js') -``` - -Replace the `` part with the actual path where the `mongodb-schema` folder is located. - - -### Usage - -##### Basic Usage - -The script extends the `DBCollection` object to have another new method: `.schema()`. On a collection called `foo`, run it with: - -```js -db.foo.schema() -``` - -This will use the first 100 (by default) documents from the collection and calculate a probabilistic schema based on these documents. - -##### Usage with options - -You can pass in an options object into the `.schema()` method. See [Options]() below. Example: - -```js -db.foo.schema( {samples: 20, flat: true} ) -``` - -This will use the first 20 documents to calculate the schema and return the schema as flat object (all fields are collapsed to the top with dot-notation). See the [Examples](#examples) section below for nested vs. flat schemata. - -
- -## Schema Format - -The schema format is in JSON, and the shape resembles the shape of a superposition of all inferred documents. Each level of the schema (from the root level down into each nested sub-document) has annotations with special meta-variables, which by default start with a `#`. Examples of such annotations are `#count`, `#type`, etc. By default, -the schema is then flattened at the end, to bring all nested keys to the root level (this option can be disabled with -`{flat: false}`). The schema also contains a special key `__schema`, under which the schema version and the options used to generate the schema are stored. - -### Example - -Here's a first example, created from a set of 3 documents: -```js - -schema([ - {"a": 1, "b": "foo", "c": {"d": null, "e": 4.3}}, - {"a": 2, "b": "bar"}, - {"a": 3, "b": "baz", "c": {"d": "boo", "e": 2.9}} -]) - -// output -{ - "#count": 3, - "a": { - "#count": 3, - "#type": "number", - "#prob": 1 - }, - "b": { - "#count": 3, - "#type": "string", - "#prob": 1 - }, - "c": { - "#count": 2, - "#prob": 0.6666666666666666 - }, - "c.d": { - "#count": 2, - "#type": { - "null": 1, - "string": 1 - }, - "#prob": 1 - }, - "c.e": { - "#count": 2, - "#type": "number", - "#prob": 1 - }, - "__schema": { - "version": "0.5.0", - "options": { - "raw": false, - "flat": true, - "data": false, - "filter": null, - "merge": false, - "metavars": { - "prefix": "#", - "count": "count", - "type": "type", - "data": "data", - "array": "array", - "prob": "prob", - "other": "other" - } - } - } -} -``` - -A lot going on here already. There is a top-level `#count`, that just counts all the parsed documents. Each of the sub-documents on any nested level get their own section in the schema, just as if all documents were superimposed on top of each other (think "transparent slides"). Each sub-document has a `#count` of its own, together with `#type` information and a probability `#prob`. These fields are explained below. - -### Sampling Size - -The MongoDB shell version of the script has an additional parameter `samples`, which by default is set to 100 and limits the number of samples to 100. You can change it to another value, or use the option `{samples: 'all'}` to look at all the documents (careful: this can be computationally expensive, depending on the number of documents). - -This option is not available when used as a stand-alone javascript or node module. - - -### Flat Format - -If you pass in the option `{flat: false}` as second parameter to `schema`, The flattening is skipped and the document is returned in its nested form. Here is the same schema as above, but with the `{flat: false}` option: - -```js -{ - "#count": 3, - "a": { - "#count": 3, - "#type": "number", - "#prob": 1 - }, - "b": { - "#count": 3, - "#type": "string", - "#prob": 1 - }, - "c": { - "#count": 2, - "d": { - "#count": 2, - "#type": { - "null": 1, - "string": 1 - }, - "#prob": 1 - }, - "e": { - "#count": 2, - "#type": "number", - "#prob": 1 - }, - "#prob": 0.6666666666666666 - }, - "__schema": { - "version": "0.5.0", - "options": { - "flat": false, - "raw": false, - "data": false, - "filter": null, - "merge": false, - "metavars": { - "prefix": "#", - "count": "count", - "type": "type", - "data": "data", - "array": "array", - "prob": "prob", - "other": "other" - } - } - } -} -``` - -### Data Inference - -You can enable data inference mode with the `{data: true}` option. The schema analyser will then gather statistics of your data for each field. The kind of information that is collected depends on the data type. - - -##### Numbers and Dates - -For numbers and dates, you will get some statistics under the `#data` field, with `min` and `max` value of all the documents seen. Example: -```js -schema([ - {"a": 2}, {"a": 8}, {"a": 1}, {"a": 7} -], {data: true}) - -// output -{ - "#count": 4, - "a": { - "#count": 4, - "#type": "number", - "#data": { - "min": 1, - "max": 8 - }, - "#prob": 1 - } -} -``` - -#### Strings - -When you enable data inference, the type of strings changes to either `text` or `category`. `text` is free-form string like a description. It is assumed that the descriptions are unique. `category` is chosen when duplicate values are encountered. In the case of `category`, the `#data` key of the field contains a histogram of values and their counts. - -The maximum cardinality is set to 100 by default. If there are more categories, an additional key `#other` is included. This is to limit the amount of memory needed to keep the histogram stats. The maximum cardinality can be configured with the `data.maxCardinality` value. Instead of assigning `true` to the data option, you can pass in a sub-document to set the maximum cardinality: - -Example: - -```js -schema([ - {a: "a"}, {a: "a"}, {a: "b"}, {a: "c"}, {a: "d"}, {a: "e"}, {a: "f"} -], { data: { maxCardinality: 3 }}); - -// output -{ - "#count": 7, - "a": { - "#count": 7, - "#type": "category", - "#data": { - "a": 2, - "b": 1, - "c": 1, - "#other": 3 - }, - "#prob": 1 - }, - "__schema": { - // ... - } -} -``` - - -#### Counts and Probabilities - -The schema keeps count of the number of documents and sub-documents on each level. This information is stored in the `#count` field. If we pass in a single empty document `{}`, the output is this: - -```json -{ - "#count": 1, -} -``` - -Passing in a document with a field `{a: 1}` returns this schema: - -```json -{ - "#count": 1, - "a": { - "#count": 1, - "#type": "number", - "#prob": 1 - } -} -``` - -The `"a"` sub-document receives its own `#count` field and only counts the number of occurences where the the `"a"` sub-document was present. Another example, for this list of documents: `[ {a: 1}, {b: 1}, {a: 0}, {a: 2}, {b: 5} ]`: - -```json -{ - "#count": 5, - "a": { - "#count": 3, - "#type": "number", - "#prob": 0.6 - }, - "b": { - "#count": 2, - "#type": "number", - "#prob": 0.4 - } -} -``` - -We can see a total of 5 documents (top-level `"#count"`) and `"a"` was present 3 times, `"b"` twice. -
- -Additionally, the schema contains a `#prob` value, indicating the relative probability for a sub-document given its parent document. - -#### Array Collapsing - -Arrays are not handled as a distinct data type. Instead, they are collapsed and interpreted as individual values for the given field. This is similar to how MongoDB treats arrays in context of querying: `db.coll.find({a: 1})` will match documents like `{a: [1, 4, 9]}`. - -Because each value of the array is treated as a separate instance of the sub-document, this affects the statistics like `#count` and `#prob`, and you can end up with a probability larger than 1, as it represents the average length of the array. - -To indentify a schema with a collapsed array field, the `#array` flag is set to `true` if at least one array was collapsed for a given field. - -Example: - -```js -schema([ - {a: [1, 2, 3, 4]}, - {a: [5, 6]} -]) - -// output -{ - "#count": 2, - "a": { - "#count": 6, - "#type": "number", - "#array": true, - "#prob": 3 - } -} -``` - - -#### Meta Variables - -By default, the meta variables used to present schema data are prefixed with a `#` symbol. The individual meta variables are: - -- `#count` -- `#prob` -- `#type` -- `#data` -- `#array` - -The reason for the `#`-prefix is to distinguish any meta fields from actual data fields. Should this cause a conflict with your actual data, choose a different prefix that does not collide with data keys. This can be achieved with the `metavars` option. Here is an example: - -```js - -schema([ - {a: 1}, - {a: [-2, -3]} -], { - data: true, - metavars: { - prefix: "__", - count: "num", - data: "statistics" - } -}) - -// output -{ - "__num": 2, - "a": { - "__num": 3, - "__type": "number", - "__statistics": { - "min": -3, - "max": 1 - }, - "__array": true, - "__prob": 1.5 - } -} -``` - - -#### Merge Existing Schema - -Sometimes you want to merge an existing schema with some new data. In that case, you can pass in the existing schema, and it will be amended with the new values. Use the `merge` option to pass in an existing schema, like so: +### 1. Escape Character -```js -schema( documents, {merge: myExistingSchema} ) -``` - -This works fine when the `data` option is not set, but if you infer data as well, this is not going to work, because the cleanup step throws away histograms of non-categorical data. If you want to merge a schema and also infer data, the best way is to use the `raw` mode. This mode returns the schema before the cleanup step. You can pass the raw schema back into another call to merge. - -To clean the raw data up and convert it to a "final" version, just call the `.cleanup()` function on the raw schema object. +We shall define `#` (ASCII 0x23) as an _escape character_ to distinguish meta data members from members originating from sampled data. -Example: +Rationale: -```js -var raw_schema = schema( documents, {raw: true, data: true}); -raw_schema = schema( more_documents, {raw; true, merge: raw_schema}); -raw_schema = schema( even_more_documents, {raw; true, merge: raw_schema}); -var schema = raw_schema.cleanup(); -``` +- expressible in one byte of UTF-8 (ASCII) +- Non-numeric (not in `0`..`9`, ASCII range 0x30-0x39), because this conflicts with javascript objects/arrays) +- Not `$` (ASCII character 0x24), because it is not a valid prefix for member names in MongoDB +We shall then encode member names as follows: +- Member name begins with no escape character: +literal member name +- Member name begins with single escape character: +encoded metadata member +- Member name begins with double escape character: +literal member name which begins with single escape character + + +### 2. General Structure + +We define a _sample set_ as a number of MongoDB documents from a single collection. The documents may have been selected in random fashion, but this definition does not impose any restrictions on the method of acquiring the documents. The documents comprising the sample set are called _sample documents_. + +We define the _shape_ of a sample set as aggregated characteristics of all members of the documents in the sample set. These characteristics are further described below. + +We define a _schema_ as a JSON representation of the _shape_ of a sample set. + +The schema must be strict, valid [JSON](http://www.json.org/). MongoDB-specific types must be converted into strict JSON as per [MongoDB's extended JSON](http://docs.mongodb.org/manual/reference/mongodb-extended-json/) definition, "strict" variant. + +The schema follows the combined structure of all documents in the sample set. This means, that for every member in any sample document, a member with the same name exists in the schema at the same nesting depth. This rule applies to members at all nesting depths. The schema can thus be seen as a superposition of all sample documents. + +Within the schema, the value of any such member is an object. This is explicitly also true for leaf members in a sample document, i.e. values that are neither arrays (BSON type 4) nor nested documents (BSON type 3). Every such object contains an encoded meta-data member with the name `#schema` (note the escape character), in addition to potential nested children. This meta-data member with the name `#schema` is called a _tag_, and its value is an array that contains one element for each [BSON type](http://bsonspec.org/spec.html) encountered in the sample set for this particular member. + + +> ##### Example + +> Sample set: + +> {a: "foo"} +> {a: {b: 10, c: true}} +> {c: null} + +> Schema (with `...` placeholders for the tag arrays) + +> { +> "a": { +> "#schema": [...], // tag for a +> "b": { +> "#schema": [...], // tag for a.b +> }, +> "counts": { +> "#schema": [...], // tag for a.c +> } +> }, +> "counts": { +> "#schema": [...], // tag c +> } +> } + +### 3. Tags + +While the schema object itself describes the overall structure of the sample set, the aggregated characteristics of each member are contained within its tag. + +The tag array contains one element for each distinct type encountered in the sample set for the given field. The order of this array is not defined and considered an implementation detail. If a field is missing in a sample document, it is treated as type _undefined_, and we use the (deprecated) BSON type 6 to represent it. + +Each element in the array is an object with the following members: + +- `type`: integer representing the (decimal) BSON type, unique within each schema tag +- `number`: integer representing the number of documents encountered in the sample set that contain this field +- `prob`: float representing the (relative) probability of this field being present given its parent field is present +- `unique`: boolean representing whether or not the values of this field are unique under the given type +- `data`: object containing type-specific additional data + + +> ##### Example + +> Field with its tag (`...` is placeholder for type-specific data field) + +> "a": { +> "#schema": [ // tag for a +> { +> "type": 2, // "string" type +> "number": 160, // 160 encounters +> "prob": 0.8, // relative probability 0.8 means 200 parent objects +> "unique": false, // the values contain duplicates +> "data": {...} // placeholder, defined further below +> }, +> { +> "type": 3, // "nested document" type +> ... +> } +> ] +> } + + +### 4. Type-Specific Data + +Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `data` member, which carries additional information specific for the type. + + +#### Type 1: float + +The `data` object contains the following members: + +- `min`: The smallest value encountered in any sample document +- `max`: The largest value encountered in any sample document +- `avg`: The mean of all sample document values +- `med`: The median of all sample document values +- `values`: An array of all values encountered, in order of traversal + + +> ##### Example + +> "data": { +> "min": 0.0 +> "max": 32.8, +> "avg": 9.3499999, +> "med": 5.25, +> "values": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] +> } + + +#### Type 2: string + + +The `data` object contains the following members: + +- `min`: The smallest value encountered in any sample document +- `max`: The largest value encountered in any sample document +- `values`: Unique set of all values encountered, ordered by counts descending +- `counts`: count for each value, same order as above + + +> ##### Example + +> "data": { +> "min": "atlas", +> "max": "zoo", +> "values": [ "atlas", "song", "bird", "zoo", "breakfast" ], +> "counts": [ 15, 9, 7, 5, 2 ] +> } + + +#### Type 3: nested document + +The `data` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. + + +#### Type 4: array + +The `data` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. + +> ##### Example + +> This array contains only strings (there is only a single element with type `2` in the `#schema` array). This element follows the normal rules for string types, as described above. + +> "data": { +> "#array": [ +> { +> "type": 2, +> "number": 490, +> "prob": 1.0, +> "unique": false, +> "data": { +> "min": "AUH", +> "max": "ZRH", +> "values": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], +> "counts": [ 171, 110, 82, 40, 29, 23, 21, 14 ] +> } +> } +> ] +> } + + +#### Type 5: binary + +The `data` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `counts` member is an array of counts of the encountered sub-types. + +> ##### Example + +> "data": { +> "sub": [ 4, 3 ] +> "counts": [ 3004, 2554 ] +> } + + +#### Type 6: undefined (deprecated) + +The `data` object is empty. + + +#### Type 7: ObjectId + +The `data` object contains the following fields: + +- `min`: The smallest ObjectId value found, encoded as strict extended JSON. +- `max`: The largest ObjectId value found, encoded as strict extended JSON. + +Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `data` field further contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "data": { +> "min": {"$oid": "553f06eb1fc10e8d93515abb"}, +> "max": {"$oid": "553f06fbbeefcf581c232257"}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 86400, +> "values": [14, 4, 6, 23, ...], +> "labels": [] +> } +> } + + +#### Type 8: boolean + +The `data` field contains the distribution of `true` and `false` values. + +> ##### Example + +> "data": { +> "true": 48, +> "false": 13, +> } + + +#### Type 9: datetime + +the `data` field contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "data": { +> "min": {"$date": 1434933322}, +> "max": {"$date": 1434939935}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 30758400, +> "values": [14, 4, 6, 23] +> } +> } + + +#### Type 10: null + +The `data` object is empty. + +#### Type 11: regular expression + +The `data` object is empty. + +#### Type 12: DBPointer (deprecated) + +The `data` object is empty. + +#### Type 13: javascript code + +The `data` object is empty. + +#### Type 15: javascript code with scope + +The `data` object is empty. + +#### Type 16: 32-bit integer + +The `data` object contains the following members: + +- `min`: The minimum value encountered +- `max`: The maximum value encountered +- `med`: The median of all encoutered values +- `avg`: The mean of all encountered values +- `values`: Unique set of all values encountered, ordered by values +- `counts`: count for each value, same order as above + +> ##### Example + +> "data" : { +> "min": 3, +> "max": 72, +> "med": 20, +> "avg": 30.5, +> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> } + +#### Type 17: timestamp + +the `data` field contains aggregated date and time information: + +- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. +- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). +- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. + +> ##### Example + +> "data": { +> "min": {"$date": 1434933322}, +> "max": {"$date": 1434939935}, +> "weekdays": [1, 19, 23, 4, 6, 43, 1], +> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], +> "bins": { +> "size": 30758400, +> "values": [14, 4, 6, 23] +> } +> } + + +#### Type 18: 64-bit integer + +The `data` object contains the following members: + +- `min`: The minimum value encountered +- `max`: The maximum value encountered +- `med`: The median of all encoutered values +- `avg`: The mean of all encountered values +- `values`: Unique set of all values encountered, ordered by values +- `counts`: count for each value, same order as above + +> ##### Example + +> "data" : { +> "min": 3, +> "max": 72, +> "med": 20, +> "avg": 30.5, +> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], +> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] +> } + +#### Type 127: minkey + +The `data` object is empty. + +#### Type 255: maxkey + +The `data` object is empty. + + +### 5. Adaptive Binning + +Some data types contain a field `bins`, where the data is discretized into bins with a variablebin size, depending on the data distribution. + +A _bin_ is defined as ... @TODO + +The `bins` object consists of the following members: + +- `size`: this is the size of an individual bin. For numbers (types 1, 16, 18), this is a unitless number that describes the size of a bin. + + +> "bins": { // adaptive binning +> "size": 86400, // number of seconds per bucket +> "values": [14, 4, 6, 23, ...] // values per bin +> "labels": ["Apr 30", "May 1", "May 2", "May 3", ...] +> } From 69cbc7b1894cc0ec32350ce7112d6659bc2b786c Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Sun, 3 May 2015 09:18:35 -0400 Subject: [PATCH 10/79] fix(project): add missing files --- .jsfmtrc | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++ .jshintrc | 19 ++++++ .npmignore | 6 ++ 3 files changed, 196 insertions(+) create mode 100644 .jsfmtrc create mode 100644 .jshintrc create mode 100644 .npmignore diff --git a/.jsfmtrc b/.jsfmtrc new file mode 100644 index 0000000..be9bbb1 --- /dev/null +++ b/.jsfmtrc @@ -0,0 +1,171 @@ +{ + + + "// docs, basically" : "https://github.com/paulirish/sublime-jsfmt#formatting-rules", + "preset": "default", + "plugins": [ + "esformatter-quotes", + "esformatter-semicolons", + "esformatter-braces" + ], + "quotes": { + "type": "single", + "avoidEscape": false + }, + "indent": { + "value": " " + }, + "whiteSpace" : { + "value" : " ", + "removeTrailing" : 1, + + "before" : { + "ArrayExpressionOpening" : 0, + "ArrayExpressionClosing" : 0, + "ArrayExpressionComma" : 0, + "ArgumentComma" : 0, + "ArgumentList" : 0, + "ArgumentListArrayExpression" : 0, + "ArgumentListFunctionExpression" : 0, + "ArgumentListObjectExpression" : 0, + "AssignmentOperator" : 1, + "BinaryExpression": 0, + "BinaryExpressionOperator" : 1, + "BlockComment" : 1, + "CallExpression" : -1, + "CatchParameterList" : 0, + "CatchOpeningBrace" : 1, + "CatchClosingBrace" : 1, + "CatchKeyword" : 1, + "CommaOperator" : 0, + "ConditionalExpressionConsequent" : 1, + "ConditionalExpressionAlternate" : 1, + "DoWhileStatementOpeningBrace" : 1, + "DoWhileStatementClosingBrace" : 1, + "DoWhileStatementConditional" : 1, + "EmptyStatement" : 0, + "ExpressionClosingParentheses" : 0, + "FinallyKeyword" : -1, + "FinallyOpeningBrace" : 1, + "FinallyClosingBrace" : 1, + "ForInStatement" : 1, + "ForInStatementExpressionOpening" : 1, + "ForInStatementExpressionClosing" : 0, + "ForInStatementOpeningBrace" : 1, + "ForInStatementClosingBrace" : 1, + "ForStatement" : 1, + "ForStatementExpressionOpening" : 1, + "ForStatementExpressionClosing" : 0, + "ForStatementOpeningBrace" : 1, + "ForStatementClosingBrace" : 1, + "ForStatementSemicolon" : 0, + "FunctionDeclarationOpeningBrace" : 1, + "FunctionDeclarationClosingBrace" : 1, + "FunctionExpressionOpeningBrace" : 1, + "FunctionExpressionClosingBrace" : 1, + "IfStatementConditionalOpening" : 1, + "IfStatementConditionalClosing" : 0, + "IfStatementOpeningBrace" : 1, + "IfStatementClosingBrace" : 1, + "ElseStatementOpeningBrace" : 1, + "ElseStatementClosingBrace" : 1, + "ElseIfStatementOpeningBrace" : 1, + "ElseIfStatementClosingBrace" : 1, + "MemberExpressionClosing" : 0, + "LineComment" : 1, + "LogicalExpressionOperator" : 1, + "Property" : 1, + "PropertyValue" : 1, + "ParameterComma" : 0, + "ParameterList" : 0, + "SwitchDiscriminantOpening" : 1, + "SwitchDiscriminantClosing" : 0, + "ThrowKeyword": 1, + "TryKeyword": -1, + "TryOpeningBrace" : 1, + "TryClosingBrace" : 1, + "UnaryExpressionOperator": 0, + "VariableName" : 1, + "VariableValue" : 1, + "WhileStatementConditionalOpening" : 1, + "WhileStatementConditionalClosing" : 0, + "WhileStatementOpeningBrace" : 1, + "WhileStatementClosingBrace" : 1 + }, + + "after" : { + "ArrayExpressionOpening" : 0, + "ArrayExpressionClosing" : 0, + "ArrayExpressionComma" : 1, + "ArgumentComma" : 1, + "ArgumentList" : 0, + "ArgumentListArrayExpression" : 0, + "ArgumentListFunctionExpression" : 0, + "ArgumentListObjectExpression" : 0, + "AssignmentOperator" : 1, + "BinaryExpression": 0, + "BinaryExpressionOperator" : 1, + "BlockComment" : 1, + "CallExpression" : 0, + "CatchParameterList" : 0, + "CatchOpeningBrace" : 1, + "CatchClosingBrace" : 1, + "CatchKeyword" : 1, + "CommaOperator" : 1, + "ConditionalExpressionConsequent" : 1, + "ConditionalExpressionTest" : 1, + "DoWhileStatementOpeningBrace" : 1, + "DoWhileStatementClosingBrace" : 1, + "DoWhileStatementBody" : 1, + "EmptyStatement" : 0, + "ExpressionOpeningParentheses" : 0, + "FinallyKeyword" : -1, + "FinallyOpeningBrace" : 1, + "FinallyClosingBrace" : 1, + "ForInStatement" : 1, + "ForInStatementExpressionOpening" : 0, + "ForInStatementExpressionClosing" : 1, + "ForInStatementOpeningBrace" : 1, + "ForInStatementClosingBrace" : 1, + "ForStatement" : 1, + "ForStatementExpressionOpening" : 0, + "ForStatementExpressionClosing" : 1, + "ForStatementClosingBrace" : 1, + "ForStatementOpeningBrace" : 1, + "ForStatementSemicolon" : 1, + "FunctionReservedWord": 0, + "FunctionName" : 0, + "FunctionExpressionOpeningBrace" : 1, + "FunctionExpressionClosingBrace" : 0, + "FunctionDeclarationOpeningBrace" : 0, + "FunctionDeclarationClosingBrace" : 0, + "IfStatementConditionalOpening" : 0, + "IfStatementConditionalClosing" : 1, + "IfStatementOpeningBrace" : 1, + "IfStatementClosingBrace" : 1, + "ElseStatementOpeningBrace" : 1, + "ElseStatementClosingBrace" : 1, + "ElseIfStatementOpeningBrace" : 1, + "ElseIfStatementClosingBrace" : 1, + "MemberExpressionOpening" : 0, + "LogicalExpressionOperator" : 1, + "ObjectExpressionClosingBrace": 0, + "PropertyName" : 0, + "PropertyValue" : 0, + "ParameterComma" : 1, + "ParameterList" : 0, + "SwitchDiscriminantOpening" : 0, + "SwitchDiscriminantClosing" : 1, + "ThrowKeyword": 1, + "TryKeyword": -1, + "TryOpeningBrace" : 1, + "TryClosingBrace" : 1, + "UnaryExpressionOperator": 0, + "VariableName" : 1, + "WhileStatementConditionalOpening" : 0, + "WhileStatementConditionalClosing" : 1, + "WhileStatementOpeningBrace" : 1, + "WhileStatementClosingBrace" : 1 + } + } +} diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 0000000..8420af3 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,19 @@ +{ + "eqeqeq": true, + "node": true, + "browser": true, + "newcap": true, + "predef": [ + "describe", + "it", + "before", + "beforeEach", + "after", + "afterEach" + ], + "unused": true, + "trailing": true, + "boss": true, + "loopfunc": true, + "undef": true +} diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..2f3c35e --- /dev/null +++ b/.npmignore @@ -0,0 +1,6 @@ +docs/ +fixtures/ +test/ +.travis.yml +.jshintrc +.jsfmtrc From 0b626c34c5a54f6d0cd1f5527c6f58ec9c8a8d66 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Sun, 3 May 2015 09:20:13 -0400 Subject: [PATCH 11/79] wip(cleanup): roughing in updates --- bin/mongodb-schema.docopt | 10 +++++++ bin/mongodb-schema.js | 7 +++++ lib/schema.js | 63 +++++++++++++++++++++++++-------------- package.json | 4 ++- 4 files changed, 61 insertions(+), 23 deletions(-) create mode 100644 bin/mongodb-schema.docopt create mode 100644 bin/mongodb-schema.js diff --git a/bin/mongodb-schema.docopt b/bin/mongodb-schema.docopt new file mode 100644 index 0000000..0c9052f --- /dev/null +++ b/bin/mongodb-schema.docopt @@ -0,0 +1,10 @@ +mongodb-schema + +Infer the probabilistic schema for a MongoDB collection. + +Usage: + mongodb-schema [] [] + +Options: + -h --help Show this screen. + --version Show version. diff --git a/bin/mongodb-schema.js b/bin/mongodb-schema.js new file mode 100644 index 0000000..9ce1386 --- /dev/null +++ b/bin/mongodb-schema.js @@ -0,0 +1,7 @@ +#!/usr/bin/env node + +var schema = require('../'); +var fs = require('fs'); +var docopt = require('docopt').docopt; +var pkg = require('../package.json'); +var argv = docopt(fs.readFileSync(__dirname + '/m.docopt', 'utf-8'), version: pkg.version}); diff --git a/lib/schema.js b/lib/schema.js index 94b3d39..c59a27f 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,10 +1,11 @@ -var pkg = require('../package.json'), - find = require('lodash.find'), - defs = require('./definitions'), - BSON = require('bson'), - isInteger = require('is-integer'), - debug = require('debug')('schema:main'); - +var pkg = require('../package.json'); +var find = require('lodash.find'); +var defs = require('./definitions'); +var BSON = require('bson'); +var isInteger = require('is-integer'); +var each = require('lodash.foreach'); +var debug = require('debug')('schema:main'); +var stream = require('stream'); // these types have a _bsontype property var bsontypeMap = { @@ -12,7 +13,7 @@ var bsontypeMap = { 'Long': 18, 'MinKey': 255, 'MaxKey': 127, - 'Code': 15, // no differentiation to 13 + 'Code': 15, // no differentiation to 13 'Binary': 5, 'DBRef': 12, 'Timestamp': 17 @@ -83,7 +84,7 @@ function _infer(schema, name, value) { // find schema array element for correct type or create one // @review should this be an object rather than array? at least while building the schema? - var type_obj = find(tag, function (el) { + var type_obj = find(tag, function(el) { return el[defs.TYPE] === bsontype; }); @@ -113,6 +114,12 @@ function _infer(schema, name, value) { } } +// @todo: switch the counting below to a prototypal model. +function Schema() { + +} +Schema.prototype.version = pkg.version; +Schema.prototype.count = 0; /** @@ -121,9 +128,10 @@ function _infer(schema, name, value) { * @return {object} resulting schema */ module.exports = function(documents) { - var schema = {}; + // @todo: see above on moving this to a class. + // add root tag and version var root = defs.ESCAPE + defs.ROOT; schema[root] = {}; @@ -134,28 +142,39 @@ module.exports = function(documents) { if (documents === undefined) { documents = []; } + if (!(documents instanceof Array)) { throw new TypeError('`documents` must be an array.'); } - + // @todo: finish cleanup // walk all documents - documents.forEach(function (doc) { - + each(documents, function inspect_document(doc) { // increase global counter schema[root][defs.COUNT] += 1; - for (var name in doc) { - if (!doc.hasOwnProperty(name)) continue; - - // process this property - _infer(schema, name, doc[name]); + each(doc, function(property) { + _infer(schema, property, doc[property]); + }); + }); + return schema; +}; +var inherits = require('util').inherits; +function SchemaTransformStream(opts) { + SchemaTransformStream._super.call(this, { + objectMode: true + }); +} - } +inherits(SchemaTransformStream, stream.Transform); +SchemaTransformStream.prototype._transform = function(document, encoding, done) { + debug('_transform: %j', { + encoding: encoding, + document: document }); + done(); +}; - return schema; -} - +module.exports.stream = SchemaTransformStream; diff --git a/package.json b/package.json index 40c9525..7632a75 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mongodb-schema", - "description": "Infer probabilistic schema of javascript objects or a MongoDB collection.", + "description": "Infer the probabilistic schema for a MongoDB collection.", "version": "0.7.0", "author": "Thomas Rueckstiess ", "license": "MIT", @@ -23,8 +23,10 @@ "dependencies": { "bson": "^0.3.1", "debug": "^2.1.3", + "docopt": "^0.6.2", "is-integer": "^1.0.4", "lodash.find": "^3.2.0", + "lodash.foreach": "^3.0.3", "mongodb": "^2.0.28" }, "devDependencies": { From a95add67f35a40c23ca102bc522029632703147b Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Mon, 4 May 2015 12:11:58 +1000 Subject: [PATCH 12/79] more type detection, and started aggregating --- lib/schema.js | 128 ++++++++++++++++++++++++++++++++++---------------- test/test.js | 7 ++- 2 files changed, 94 insertions(+), 41 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index c59a27f..cff3994 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -50,66 +50,116 @@ function _getType(value) { // code with scope (15), timestamp (17), minkey (255), maxkey (127). if (value.hasOwnProperty('_bsontype')) { - // dbref, binary, code, code with scope, timestamp, maxkey, minkey + // objectid, dbref, binary, code, code with scope, timestamp, maxkey, minkey return bsontypeMap[value._bsontype]; } - if (value instanceof BSON.ObjectId) { - return 7; + if (value instanceof Array) { + return 4; } + if (value instanceof Date) { + return 9; + } + + if (value instanceof RegExp) { + return 11; + } + + // if nothing matches, it's a nested document return 3; } + // should not get here + throw Error('invalid type'); +} + + + +function _pushValue(value, data_obj) { + if (!data_obj.hasOwnProperty('values')) { + data_obj.values = []; + } + data_obj.values.push(value); +} + +function _addToSet(value, data_obj) { + } +function _aggregate(name, value, type, data_obj) { + + switch (type) { + case 1: _pushValue(value, data_obj); break; + case 2: _pushValue(value, data_obj); break; + case 3: break; + // ... + + } + + if (type === 1) { // float + _pushValue(value, data_obj); + } + + if (type === 2) { + // @todo + } +} + +function _finalize(schema) { + +} /** * analyse property and integrate it into the schema * @param {array} documents array of sample documents to integrate into schema * @return {object} resulting schema */ -function _infer(schema, name, value) { +function _infer(obj, schema) { - // create schema member if not present yet - if (!(name in schema)) { - schema[name] = {}; - schema[name][defs.ESCAPE + defs.SCHEMA] = []; - } - var tag = schema[name][defs.ESCAPE + defs.SCHEMA]; + for (var name in obj) { + if (!obj.hasOwnProperty(name)) continue; - // get type of `value` - var bsontype = _getType(value); + var value = obj[name]; - // find schema array element for correct type or create one - // @review should this be an object rather than array? at least while building the schema? - var type_obj = find(tag, function(el) { - return el[defs.TYPE] === bsontype; - }); + // create schema member if not present yet + if (!(name in schema)) { + schema[name] = {}; + schema[name][defs.ESCAPE + defs.SCHEMA] = []; + } + var tag = schema[name][defs.ESCAPE + defs.SCHEMA]; - if (!type_obj) { - // not found, create one - type_obj = {}; - type_obj[defs.TYPE] = bsontype; - type_obj[defs.COUNT] = 0; - type_obj[defs.UNIQUE] = true; - type_obj[defs.DATA] = {}; + // get type of `value` + var bsontype = _getType(value); - tag.push(type_obj); - } + // find schema array element for correct type or create one + // @review should this be an object rather than array? at least while building the schema? + var type_obj = find(tag, function (el) { + return el[defs.TYPE] === bsontype; + }); + + if (!type_obj) { + // not found, create one + type_obj = {}; + type_obj[defs.TYPE] = bsontype; + type_obj[defs.COUNT] = 0; + type_obj[defs.PROB] = 0.0; + type_obj[defs.UNIQUE] = null; // should be determined at the end + type_obj[defs.DATA] = {}; + + tag.push(type_obj); + } - // increase counts, add data, check uniqueness - type_obj[defs.COUNT] += 1; - // @todo add data, verify still unique + // increase counts, add data, check uniqueness + type_obj[defs.COUNT] += 1; + _aggregate(name, value, bsontype, type_obj[defs.DATA]); - // special handling for arrays (type 4) + // special handling for arrays (type 4) - // recursive call for nested documents (type 3) - if (bsontype === 3) { - for (var subname in value) { - if (!value.hasOwnProperty(subname)) continue; - _infer(schema[name], subname, value[subname]); + // recursive call for nested documents (type 3) + if (bsontype === 3) { + _infer(value, schema[name]); } } } @@ -131,7 +181,7 @@ module.exports = function(documents) { var schema = {}; // @todo: see above on moving this to a class. - + // add root tag and version var root = defs.ESCAPE + defs.ROOT; schema[root] = {}; @@ -151,10 +201,7 @@ module.exports = function(documents) { each(documents, function inspect_document(doc) { // increase global counter schema[root][defs.COUNT] += 1; - - each(doc, function(property) { - _infer(schema, property, doc[property]); - }); + _infer(doc, schema); }); return schema; }; @@ -178,3 +225,4 @@ SchemaTransformStream.prototype._transform = function(document, encoding, done) }; module.exports.stream = SchemaTransformStream; + diff --git a/test/test.js b/test/test.js index 7a62159..d9e1d0d 100644 --- a/test/test.js +++ b/test/test.js @@ -32,9 +32,14 @@ describe('mongodb-schema', function() { assert.ok( schema(allTypes) ); }); + it('should detect the correct type for every type', function () { + var result = schema(allTypes); + console.log(JSON.stringify(result, null, 2)); + }); + it('should create the correct type objects inside #schema tag', function () { var result = schema([ {a: "foo"}, {a: 1, b: {c: BSON.ObjectId() }} ]); - console.log(JSON.stringify(result, null, 2)); + // @todo }) }); From ce34b2f898ee6fe66580435139fea7c92021cb6e Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Sun, 3 May 2015 23:12:37 -0400 Subject: [PATCH 13/79] roughing in from scout modelling --- examples/simple.js | 18 +++++++++++++++++ lib/definitions.js | 12 ------------ lib/index.js | 23 ++++++++++++++++++++++ lib/models/field-collection.js | 3 +++ lib/models/field.js | 23 ++++++++++++++++++++++ lib/models/index.js | 0 lib/models/long-value.js | 5 +++++ lib/models/number-value.js | 17 +++++++++++++++++ lib/models/schema.js | 16 ++++++++++++++++ lib/models/string-value.js | 17 +++++++++++++++++ lib/models/value-collection.js | 21 ++++++++++++++++++++ lib/models/value.js | 15 +++++++++++++++ lib/schema.js | 35 +++------------------------------- package.json | 4 ++++ 14 files changed, 165 insertions(+), 44 deletions(-) create mode 100755 examples/simple.js delete mode 100644 lib/definitions.js create mode 100644 lib/index.js create mode 100644 lib/models/field-collection.js create mode 100644 lib/models/field.js create mode 100644 lib/models/index.js create mode 100644 lib/models/long-value.js create mode 100644 lib/models/number-value.js create mode 100644 lib/models/schema.js create mode 100644 lib/models/string-value.js create mode 100644 lib/models/value-collection.js create mode 100644 lib/models/value.js diff --git a/examples/simple.js b/examples/simple.js new file mode 100755 index 0000000..026b5ab --- /dev/null +++ b/examples/simple.js @@ -0,0 +1,18 @@ +#!/usr/bin/env node + +var schema = require('../'); +var es = require('event-stream'); + +var docs = [ + { + _id: 1, + username: 'Adam' + }, + { + _id: 2, + username: 'Brian' + } +]; + + +es.readArray(docs).pipe(schema.stream()).pipe(es.stringify()); diff --git a/lib/definitions.js b/lib/definitions.js deleted file mode 100644 index f5bb0f8..0000000 --- a/lib/definitions.js +++ /dev/null @@ -1,12 +0,0 @@ -module.exports = { - ESCAPE : '#', - SCHEMA : 'schema', - ARRAY : 'array', - ROOT : 'root', - VERSION : 'version', - TYPE : 'type', - COUNT : 'count', - PROB : 'prob', - UNIQUE : 'unique', - DATA : 'data' -} diff --git a/lib/index.js b/lib/index.js new file mode 100644 index 0000000..43b41d5 --- /dev/null +++ b/lib/index.js @@ -0,0 +1,23 @@ +var inherits = require('util').inherits; +var debug = require('debug')('mongodb-schema'); +var stream = require('stream'); + + +function SchemaTransformStream(opts) { + opts = opts || {}; + SchemaTransformStream._super.call(this, { + objectMode: true + }); +} + +inherits(SchemaTransformStream, stream.Transform); + +SchemaTransformStream.prototype._transform = function(document, encoding, done) { + debug('_transform: %j', { + encoding: encoding, + document: document + }); + done(); +}; + +module.exports.stream = SchemaTransformStream; diff --git a/lib/models/field-collection.js b/lib/models/field-collection.js new file mode 100644 index 0000000..ec9055d --- /dev/null +++ b/lib/models/field-collection.js @@ -0,0 +1,3 @@ +module.exports = require('ampersand-collection').extend({ + model: require('./field') +}); diff --git a/lib/models/field.js b/lib/models/field.js new file mode 100644 index 0000000..21a9c0d --- /dev/null +++ b/lib/models/field.js @@ -0,0 +1,23 @@ +var AmpersandState = require('ampersand-state'); +var ValueCollection = require('./value-collection'); + +module.exports = AmpersandState.extend({ + props: { + _id: { + type: 'string', + required: true + }, + displayName: { + type: 'string', + default: function() { + return this._id; + } + }, + description: { + type: 'string' + } + }, + children: { + types: ValueCollection + } +}); diff --git a/lib/models/index.js b/lib/models/index.js new file mode 100644 index 0000000..e69de29 diff --git a/lib/models/long-value.js b/lib/models/long-value.js new file mode 100644 index 0000000..9b30b3c --- /dev/null +++ b/lib/models/long-value.js @@ -0,0 +1,5 @@ +module.exports = require('./number-value').extend({ + _id: { + default: 'Long' + } +}); diff --git a/lib/models/number-value.js b/lib/models/number-value.js new file mode 100644 index 0000000..fdbce94 --- /dev/null +++ b/lib/models/number-value.js @@ -0,0 +1,17 @@ +module.exports = require('./value').extend({ + props: { + _id: { + default: 'Number' + }, + unique: { + type: 'number', + default: 0 + }, + values: { + type: 'array', + default: function() { + return []; + } + } + } +}); diff --git a/lib/models/schema.js b/lib/models/schema.js new file mode 100644 index 0000000..f744686 --- /dev/null +++ b/lib/models/schema.js @@ -0,0 +1,16 @@ +var AmpersandModel = require('ampersand-model'); + +var FieldCollection = require('./field-collection'); +var debug = require('debug')('mongodb-schema'); + +module.exports = AmpersandModel.extend({ + children: { + fields: FieldCollection + }, + analyze: function(sampledField, done) { + if (this.fields.get(sampledField._id)) { + debug('already have the field %j', sampledField); + return done(); + } + } +}); diff --git a/lib/models/string-value.js b/lib/models/string-value.js new file mode 100644 index 0000000..c0eb969 --- /dev/null +++ b/lib/models/string-value.js @@ -0,0 +1,17 @@ +module.exports = require('./value').extend({ + props: { + _id: { + default: 'String' + }, + unique: { + type: 'number', + default: 0 + }, + values: { + type: 'array', + default: function() { + return []; + } + } + } +}); diff --git a/lib/models/value-collection.js b/lib/models/value-collection.js new file mode 100644 index 0000000..dbb77e6 --- /dev/null +++ b/lib/models/value-collection.js @@ -0,0 +1,21 @@ +var AmpersandCollection = require('ampersand-collection'); + +// @todo: dedupe by finally breaking types.js out of mongodb-extended-json. +var TYPED_STATES = { + 'Number': require('./number-value'), + 'String': require('./string-value') +}; + +module.exports = AmpersandCollection.extend({ + model: function(val, options) { + var T = (val && val._bsonType) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, ''); + var Klass = TYPED_STATES[T]; + + if (!Klass) { + throw new TypeError('No value type for ' + T); + } + return new Klass({ + value: val + }, options); + } +}); diff --git a/lib/models/value.js b/lib/models/value.js new file mode 100644 index 0000000..a9eff81 --- /dev/null +++ b/lib/models/value.js @@ -0,0 +1,15 @@ +var AmpersandState = require('ampersand-state'); + +module.exports = AmpersandState.extend({ + props: { + _id: { + type: 'string', + required: true, + default: 'Unknown' + }, + count: { + type: 'number', + default: 0 + } + } +}); diff --git a/lib/schema.js b/lib/schema.js index cff3994..e3cbce2 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,11 +1,11 @@ +var defs = require('./definitions'); var pkg = require('../package.json'); var find = require('lodash.find'); -var defs = require('./definitions'); var BSON = require('bson'); var isInteger = require('is-integer'); var each = require('lodash.foreach'); -var debug = require('debug')('schema:main'); var stream = require('stream'); +var debug = require('debug')('mongodb-schema'); // these types have a _bsontype property var bsontypeMap = { @@ -17,7 +17,7 @@ var bsontypeMap = { 'Binary': 5, 'DBRef': 12, 'Timestamp': 17 -} +}; /** @@ -164,14 +164,6 @@ function _infer(obj, schema) { } } -// @todo: switch the counting below to a prototypal model. -function Schema() { - -} -Schema.prototype.version = pkg.version; -Schema.prototype.count = 0; - - /** * main schema function * @param {array} documents array of sample documents to integrate into schema @@ -205,24 +197,3 @@ module.exports = function(documents) { }); return schema; }; - -var inherits = require('util').inherits; - -function SchemaTransformStream(opts) { - SchemaTransformStream._super.call(this, { - objectMode: true - }); -} - -inherits(SchemaTransformStream, stream.Transform); - -SchemaTransformStream.prototype._transform = function(document, encoding, done) { - debug('_transform: %j', { - encoding: encoding, - document: document - }); - done(); -}; - -module.exports.stream = SchemaTransformStream; - diff --git a/package.json b/package.json index 7632a75..10b93e6 100644 --- a/package.json +++ b/package.json @@ -21,9 +21,13 @@ "schema" ], "dependencies": { + "ampersand-collection": "^1.4.5", + "ampersand-state": "^4.5.4", "bson": "^0.3.1", "debug": "^2.1.3", "docopt": "^0.6.2", + "event-stream": "^3.3.0", + "flatnest": "^0.2.2", "is-integer": "^1.0.4", "lodash.find": "^3.2.0", "lodash.foreach": "^3.0.3", From cb30eb897cf1ae33d9fa2589788b3f7b6c7ecacf Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 01:34:39 -0400 Subject: [PATCH 14/79] ampersand all hooked up --- examples/simple.js | 10 +- index.js | 2 +- lib/index.js | 92 +++++++++++++++---- lib/models/field-collection.js | 3 +- lib/models/field.js | 7 +- lib/models/index.js | 0 lib/models/long-value.js | 5 - lib/models/number-value.js | 17 ---- lib/models/schema.js | 16 ---- lib/models/string-value.js | 17 ---- ...value-collection.js => type-collection.js} | 12 ++- lib/models/type-long.js | 5 + lib/models/type-number.js | 27 ++++++ lib/models/type-string.js | 27 ++++++ lib/models/type.js | 40 ++++++++ lib/models/value.js | 15 --- package.json | 2 + 17 files changed, 199 insertions(+), 98 deletions(-) delete mode 100644 lib/models/index.js delete mode 100644 lib/models/long-value.js delete mode 100644 lib/models/number-value.js delete mode 100644 lib/models/schema.js delete mode 100644 lib/models/string-value.js rename lib/models/{value-collection.js => type-collection.js} (63%) create mode 100644 lib/models/type-long.js create mode 100644 lib/models/type-number.js create mode 100644 lib/models/type-string.js create mode 100644 lib/models/type.js delete mode 100644 lib/models/value.js diff --git a/examples/simple.js b/examples/simple.js index 026b5ab..cad4420 100755 --- a/examples/simple.js +++ b/examples/simple.js @@ -1,6 +1,6 @@ #!/usr/bin/env node -var schema = require('../'); +var schema = require('../')(); var es = require('event-stream'); var docs = [ @@ -11,8 +11,14 @@ var docs = [ { _id: 2, username: 'Brian' + }, + { + _id: 3, + username: 'Cat' } ]; -es.readArray(docs).pipe(schema.stream()).pipe(es.stringify()); +es.readArray(docs).pipe(schema.stream().on('end', function(){ + console.log('Finalized schema has fields: ', JSON.stringify(schema.fields, null, 2)); +})).pipe(es.stringify()).pipe(process.stdout); diff --git a/index.js b/index.js index 8af3825..fb88a3c 100644 --- a/index.js +++ b/index.js @@ -1 +1 @@ -var schema = module.exports = require('./lib/schema.js'); +var schema = module.exports = require('./lib'); diff --git a/lib/index.js b/lib/index.js index 43b41d5..ca17824 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,23 +1,81 @@ -var inherits = require('util').inherits; -var debug = require('debug')('mongodb-schema'); -var stream = require('stream'); +var AmpersandState = require('ampersand-state'); +var FieldCollection = require('./models/field-collection'); +var debug = require('debug')('mongodb-schema:models:schema'); +var es = require('event-stream'); +var flatten = require('flatnest').flatten; +var _ = require('lodash'); - -function SchemaTransformStream(opts) { - opts = opts || {}; - SchemaTransformStream._super.call(this, { - objectMode: true - }); +function getValueType(value) { + var T; + if (_.has(value, '_bsonType')) { + T = value._bsonType; + } else { + T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); + } + return T; } -inherits(SchemaTransformStream, stream.Transform); +var Schema = AmpersandState.extend({ + collections: { + fields: FieldCollection + }, + props: { + count: { + type: 'number', + default: 0 + } + }, + stream: function() { + var schema = this; + return es.through(function(doc) { + debug('updating based on %j', doc); + _.each(_.pairs(flatten(doc)), function(d) { + var _id = d[0]; + var value = d[1]; + var T = getValueType(d[1]); + var field = schema.fields.get(_id); + + if (!field) { + debug('`%s` is a new field', _id); + field = schema.fields.add({ + _id: _id + }); -SchemaTransformStream.prototype._transform = function(document, encoding, done) { - debug('_transform: %j', { - encoding: encoding, - document: document - }); - done(); + field.types.add({ + _id: T, + count: 1, + value: value + }); + } else { + debug('`%s` is already a known field with %d type(s)', _id, field.types.length); + var existingType = field.types.get(T); + if (!existingType) { + debug('new type `%s` for field `%s`', T, _id); + field.types.add({ + _id: T, + count: 1, + value: value + }); + } else { + debug('updating existing type %j', existingType); + existingType.values.push(value); + existingType.count += 1; + } + } + this.emit('data', field); + }, this); + schema.count += 1; + }, function() { + debug('finalized schema is', JSON.stringify(schema, null, 2)); + this.emit('end'); + }); + } +}); + +module.exports = function() { + return new Schema(); }; -module.exports.stream = SchemaTransformStream; +module.exports.stream = function() { + return new Schema().stream(); +}; diff --git a/lib/models/field-collection.js b/lib/models/field-collection.js index ec9055d..55bc73c 100644 --- a/lib/models/field-collection.js +++ b/lib/models/field-collection.js @@ -1,3 +1,4 @@ module.exports = require('ampersand-collection').extend({ - model: require('./field') + model: require('./field'), + mainIndex: '_id' }); diff --git a/lib/models/field.js b/lib/models/field.js index 21a9c0d..4013042 100644 --- a/lib/models/field.js +++ b/lib/models/field.js @@ -1,7 +1,8 @@ var AmpersandState = require('ampersand-state'); -var ValueCollection = require('./value-collection'); +var TypeCollection = require('./type-collection'); module.exports = AmpersandState.extend({ + idAttribute: '_id', props: { _id: { type: 'string', @@ -17,7 +18,7 @@ module.exports = AmpersandState.extend({ type: 'string' } }, - children: { - types: ValueCollection + collections: { + types: TypeCollection } }); diff --git a/lib/models/index.js b/lib/models/index.js deleted file mode 100644 index e69de29..0000000 diff --git a/lib/models/long-value.js b/lib/models/long-value.js deleted file mode 100644 index 9b30b3c..0000000 --- a/lib/models/long-value.js +++ /dev/null @@ -1,5 +0,0 @@ -module.exports = require('./number-value').extend({ - _id: { - default: 'Long' - } -}); diff --git a/lib/models/number-value.js b/lib/models/number-value.js deleted file mode 100644 index fdbce94..0000000 --- a/lib/models/number-value.js +++ /dev/null @@ -1,17 +0,0 @@ -module.exports = require('./value').extend({ - props: { - _id: { - default: 'Number' - }, - unique: { - type: 'number', - default: 0 - }, - values: { - type: 'array', - default: function() { - return []; - } - } - } -}); diff --git a/lib/models/schema.js b/lib/models/schema.js deleted file mode 100644 index f744686..0000000 --- a/lib/models/schema.js +++ /dev/null @@ -1,16 +0,0 @@ -var AmpersandModel = require('ampersand-model'); - -var FieldCollection = require('./field-collection'); -var debug = require('debug')('mongodb-schema'); - -module.exports = AmpersandModel.extend({ - children: { - fields: FieldCollection - }, - analyze: function(sampledField, done) { - if (this.fields.get(sampledField._id)) { - debug('already have the field %j', sampledField); - return done(); - } - } -}); diff --git a/lib/models/string-value.js b/lib/models/string-value.js deleted file mode 100644 index c0eb969..0000000 --- a/lib/models/string-value.js +++ /dev/null @@ -1,17 +0,0 @@ -module.exports = require('./value').extend({ - props: { - _id: { - default: 'String' - }, - unique: { - type: 'number', - default: 0 - }, - values: { - type: 'array', - default: function() { - return []; - } - } - } -}); diff --git a/lib/models/value-collection.js b/lib/models/type-collection.js similarity index 63% rename from lib/models/value-collection.js rename to lib/models/type-collection.js index dbb77e6..9c7f25d 100644 --- a/lib/models/value-collection.js +++ b/lib/models/type-collection.js @@ -2,13 +2,17 @@ var AmpersandCollection = require('ampersand-collection'); // @todo: dedupe by finally breaking types.js out of mongodb-extended-json. var TYPED_STATES = { - 'Number': require('./number-value'), - 'String': require('./string-value') + 'Number': require('./type-number'), + 'String': require('./type-string') }; module.exports = AmpersandCollection.extend({ - model: function(val, options) { - var T = (val && val._bsonType) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, ''); + mainIndex: '_id', + model: function(attrs, options) { + if (attrs && Object.keys(attrs).length === 0) return; + + var val = attrs.value; + var T = (val && val._bsonType) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, '$1'); var Klass = TYPED_STATES[T]; if (!Klass) { diff --git a/lib/models/type-long.js b/lib/models/type-long.js new file mode 100644 index 0000000..27f4db8 --- /dev/null +++ b/lib/models/type-long.js @@ -0,0 +1,5 @@ +module.exports = require('./type-number').extend({ + _id: { + default: 'Long' + } +}); diff --git a/lib/models/type-number.js b/lib/models/type-number.js new file mode 100644 index 0000000..2dc68c5 --- /dev/null +++ b/lib/models/type-number.js @@ -0,0 +1,27 @@ +var _ = require('lodash'); + +module.exports = require('./type').extend({ + props: { + _id: { + default: 'Number' + }, + count: { + type: 'number', + default: 1 + }, + values: { + type: 'array', + default: function() { + return []; + } + } + }, + derived: { + unique: { + deps: ['values', 'count'], + fn: function() { + return _.unique(this.values).length; + } + } + } +}); diff --git a/lib/models/type-string.js b/lib/models/type-string.js new file mode 100644 index 0000000..7c4f346 --- /dev/null +++ b/lib/models/type-string.js @@ -0,0 +1,27 @@ +var _ = require('lodash'); + +module.exports = require('./type').extend({ + props: { + _id: { + default: 'String' + }, + count: { + type: 'number', + default: 1 + }, + values: { + type: 'array', + default: function() { + return []; + } + } + }, + derived: { + unique: { + deps: ['values', 'count'], + fn: function() { + return _.unique(this.values).length; + } + } + } +}); diff --git a/lib/models/type.js b/lib/models/type.js new file mode 100644 index 0000000..d905ae4 --- /dev/null +++ b/lib/models/type.js @@ -0,0 +1,40 @@ +var AmpersandState = require('ampersand-state'); +var _ = require('lodash'); + +module.exports = AmpersandState.extend({ + idAttribute: '_id', + props: { + _id: { + type: 'string' + }, + count: { + type: 'number', + default: 1 + }, + values: { + type: 'array', + default: function() { + return []; + } + } + }, + derived: { + unique: { + deps: ['values', 'count'], + fn: function() { + return _.unique(this.values).length; + } + } + }, + initialize: function(options) { + if (options.value) { + this.values.push(options.value); + } + }, + serialize: function() { + return this.getAttributes({ + props: true, + derived: true + }, true); + } +}); diff --git a/lib/models/value.js b/lib/models/value.js deleted file mode 100644 index a9eff81..0000000 --- a/lib/models/value.js +++ /dev/null @@ -1,15 +0,0 @@ -var AmpersandState = require('ampersand-state'); - -module.exports = AmpersandState.extend({ - props: { - _id: { - type: 'string', - required: true, - default: 'Unknown' - }, - count: { - type: 'number', - default: 0 - } - } -}); diff --git a/package.json b/package.json index 10b93e6..0c8aac4 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ ], "dependencies": { "ampersand-collection": "^1.4.5", + "ampersand-model": "^5.0.3", "ampersand-state": "^4.5.4", "bson": "^0.3.1", "debug": "^2.1.3", @@ -29,6 +30,7 @@ "event-stream": "^3.3.0", "flatnest": "^0.2.2", "is-integer": "^1.0.4", + "lodash": "^3.8.0", "lodash.find": "^3.2.0", "lodash.foreach": "^3.0.3", "mongodb": "^2.0.28" From 7773c1d925387462db0282522b121f8f857145f5 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 01:36:50 -0400 Subject: [PATCH 15/79] connect long types --- lib/models/type-collection.js | 3 ++- lib/models/type-long.js | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/models/type-collection.js b/lib/models/type-collection.js index 9c7f25d..066014a 100644 --- a/lib/models/type-collection.js +++ b/lib/models/type-collection.js @@ -3,7 +3,8 @@ var AmpersandCollection = require('ampersand-collection'); // @todo: dedupe by finally breaking types.js out of mongodb-extended-json. var TYPED_STATES = { 'Number': require('./type-number'), - 'String': require('./type-string') + 'String': require('./type-string'), + 'Long': require('./type-long'), }; module.exports = AmpersandCollection.extend({ diff --git a/lib/models/type-long.js b/lib/models/type-long.js index 27f4db8..6998b9f 100644 --- a/lib/models/type-long.js +++ b/lib/models/type-long.js @@ -1,5 +1,7 @@ module.exports = require('./type-number').extend({ - _id: { - default: 'Long' + props: { + _id: { + default: 'Long' + } } }); From 19b442a31130a4c86ff18795367a8da34547c3bc Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 01:56:09 -0400 Subject: [PATCH 16/79] add example of how to integrate with scout --- examples/scout.js | 52 +++++++++++++++++++++++++++++++++++++++++++++++ lib/index.js | 3 +++ 2 files changed, 55 insertions(+) create mode 100644 examples/scout.js diff --git a/examples/scout.js b/examples/scout.js new file mode 100644 index 0000000..320e565 --- /dev/null +++ b/examples/scout.js @@ -0,0 +1,52 @@ +var scout = require('scout-client')(); +var schema = require('mongodb-schema'); +var _ = require('lodash'); + +var Schema = schema.extend({ + fetch: function(options) { + options = _.defaults(options, { + size: 5, + query: {}, + fields: null + }); + scout.sample(this.ns, options).pipe(this.stream()); + } +}); + +var FieldView = AmpersandView.extend({ + bindings: { + 'model.displayName': { + hook: 'name' + } + }, + template: require('./field.jade') +}); + +var FieldListView = AmpersandView.extend({ + template: require('./field-list.jade'), + render: function() { + this.renderWithTemplate({}); + this.renderCollectionView(this.collection, FieldView, this.queryByHook('fields')); + } +}); + +var CollectionView = AmpersandView.extend({ + model: Schema, + initialize: function(opts) { + this.model.ns = opts.ns; + this.model.fetch(); + }, + template: require('./collection.jade'), + subviews: { + fields: { + hook: 'fields-container', + prepareView: function(el) { + return new FieldListView({ + el: el, + parent: this, + collection: this.model.fields + }); + } + } + } +}); diff --git a/lib/index.js b/lib/index.js index ca17824..bca0747 100644 --- a/lib/index.js +++ b/lib/index.js @@ -75,7 +75,10 @@ var Schema = AmpersandState.extend({ module.exports = function() { return new Schema(); }; +module.exports.extend = Schema.extend; module.exports.stream = function() { return new Schema().stream(); }; + +module.exports.FieldCollection = require('./models/field-collection'); From bc46b663608e6e1f54e2b631e0d3ea0775734de4 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 19:36:53 -0400 Subject: [PATCH 17/79] fix(main): export Schema class --- lib/index.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/index.js b/lib/index.js index bca0747..bca7329 100644 --- a/lib/index.js +++ b/lib/index.js @@ -38,7 +38,8 @@ var Schema = AmpersandState.extend({ if (!field) { debug('`%s` is a new field', _id); field = schema.fields.add({ - _id: _id + _id: _id, + count: 1 }); field.types.add({ @@ -75,10 +76,11 @@ var Schema = AmpersandState.extend({ module.exports = function() { return new Schema(); }; + module.exports.extend = Schema.extend; module.exports.stream = function() { return new Schema().stream(); }; - +module.exports.Schema = Schema; module.exports.FieldCollection = require('./models/field-collection'); From 0ff2076b76570fe2edd61941d225eaa006c72fbb Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 19:37:34 -0400 Subject: [PATCH 18/79] fix(models): more types and dedupe --- lib/models/field.js | 6 +++++- lib/models/type-boolean.js | 7 +++++++ lib/models/type-collection.js | 2 ++ lib/models/type-null.js | 7 +++++++ lib/models/type-number.js | 20 -------------------- lib/models/type-string.js | 20 -------------------- 6 files changed, 21 insertions(+), 41 deletions(-) create mode 100644 lib/models/type-boolean.js create mode 100644 lib/models/type-null.js diff --git a/lib/models/field.js b/lib/models/field.js index 4013042..4d726c4 100644 --- a/lib/models/field.js +++ b/lib/models/field.js @@ -16,7 +16,11 @@ module.exports = AmpersandState.extend({ }, description: { type: 'string' - } + }, + count: { + type: 'number', + default: 0 + }, }, collections: { types: TypeCollection diff --git a/lib/models/type-boolean.js b/lib/models/type-boolean.js new file mode 100644 index 0000000..3983204 --- /dev/null +++ b/lib/models/type-boolean.js @@ -0,0 +1,7 @@ +module.exports = require('./type').extend({ + props: { + _id: { + default: 'Boolean' + } + } +}); diff --git a/lib/models/type-collection.js b/lib/models/type-collection.js index 066014a..a474d77 100644 --- a/lib/models/type-collection.js +++ b/lib/models/type-collection.js @@ -5,6 +5,8 @@ var TYPED_STATES = { 'Number': require('./type-number'), 'String': require('./type-string'), 'Long': require('./type-long'), + 'Boolean': require('./type-boolean'), + 'Null': require('./type-null'), }; module.exports = AmpersandCollection.extend({ diff --git a/lib/models/type-null.js b/lib/models/type-null.js new file mode 100644 index 0000000..b066770 --- /dev/null +++ b/lib/models/type-null.js @@ -0,0 +1,7 @@ +module.exports = require('./type').extend({ + props: { + _id: { + default: 'Null' + } + } +}); diff --git a/lib/models/type-number.js b/lib/models/type-number.js index 2dc68c5..b979c77 100644 --- a/lib/models/type-number.js +++ b/lib/models/type-number.js @@ -1,27 +1,7 @@ -var _ = require('lodash'); - module.exports = require('./type').extend({ props: { _id: { default: 'Number' - }, - count: { - type: 'number', - default: 1 - }, - values: { - type: 'array', - default: function() { - return []; - } - } - }, - derived: { - unique: { - deps: ['values', 'count'], - fn: function() { - return _.unique(this.values).length; - } } } }); diff --git a/lib/models/type-string.js b/lib/models/type-string.js index 7c4f346..09849f9 100644 --- a/lib/models/type-string.js +++ b/lib/models/type-string.js @@ -1,27 +1,7 @@ -var _ = require('lodash'); - module.exports = require('./type').extend({ props: { _id: { default: 'String' - }, - count: { - type: 'number', - default: 1 - }, - values: { - type: 'array', - default: function() { - return []; - } - } - }, - derived: { - unique: { - deps: ['values', 'count'], - fn: function() { - return _.unique(this.values).length; - } } } }); From 3c14d72d87a108e8fd480cbddeb4c19dbc1a91e7 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 19:38:03 -0400 Subject: [PATCH 19/79] fix(type): Add probability as a derived prop --- lib/index.js | 1 + lib/models/type.js | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/lib/index.js b/lib/index.js index bca7329..3327f59 100644 --- a/lib/index.js +++ b/lib/index.js @@ -62,6 +62,7 @@ var Schema = AmpersandState.extend({ existingType.values.push(value); existingType.count += 1; } + field.count += 1; } this.emit('data', field); }, this); diff --git a/lib/models/type.js b/lib/models/type.js index d905ae4..695fa23 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -24,6 +24,14 @@ module.exports = AmpersandState.extend({ fn: function() { return _.unique(this.values).length; } + }, + probability: { + deps: ['count'], + cached: false, + fn: function() { + var field = this.collection.parent; + return this.count / field.count; + } } }, initialize: function(options) { From 091392e81ec20175c500504bcfd370e5d5c91406 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 4 May 2015 19:45:49 -0400 Subject: [PATCH 20/79] explicitly skip nested arrays for now --- lib/index.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/index.js b/lib/index.js index 3327f59..fef17ee 100644 --- a/lib/index.js +++ b/lib/index.js @@ -15,6 +15,10 @@ function getValueType(value) { return T; } +function isArrayKey(_id) { + return /\[(\d+)\]/.test(_id); +} + var Schema = AmpersandState.extend({ collections: { fields: FieldCollection @@ -35,6 +39,10 @@ var Schema = AmpersandState.extend({ var T = getValueType(d[1]); var field = schema.fields.get(_id); + if (isArrayKey(_id)) { + return debug('@todo: smush nested arrays. skipping `%s`', _id); + } + if (!field) { debug('`%s` is a new field', _id); field = schema.fields.add({ From 8e2f6c72761182cb0417066d43229b5c153980b9 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Wed, 6 May 2015 06:26:10 -0400 Subject: [PATCH 21/79] simplifying code layout --- lib/models/type-boolean.js | 7 ------- lib/models/type-collection.js | 12 ++---------- lib/models/type-long.js | 7 ------- lib/models/type-null.js | 7 ------- lib/models/type-number.js | 7 ------- lib/models/type-string.js | 7 ------- lib/models/type.js | 34 +++++++++++++++++++++++++++++++++- 7 files changed, 35 insertions(+), 46 deletions(-) delete mode 100644 lib/models/type-boolean.js delete mode 100644 lib/models/type-long.js delete mode 100644 lib/models/type-null.js delete mode 100644 lib/models/type-number.js delete mode 100644 lib/models/type-string.js diff --git a/lib/models/type-boolean.js b/lib/models/type-boolean.js deleted file mode 100644 index 3983204..0000000 --- a/lib/models/type-boolean.js +++ /dev/null @@ -1,7 +0,0 @@ -module.exports = require('./type').extend({ - props: { - _id: { - default: 'Boolean' - } - } -}); diff --git a/lib/models/type-collection.js b/lib/models/type-collection.js index a474d77..b9e887c 100644 --- a/lib/models/type-collection.js +++ b/lib/models/type-collection.js @@ -1,13 +1,5 @@ var AmpersandCollection = require('ampersand-collection'); - -// @todo: dedupe by finally breaking types.js out of mongodb-extended-json. -var TYPED_STATES = { - 'Number': require('./type-number'), - 'String': require('./type-string'), - 'Long': require('./type-long'), - 'Boolean': require('./type-boolean'), - 'Null': require('./type-null'), -}; +var type = require('./type'); module.exports = AmpersandCollection.extend({ mainIndex: '_id', @@ -16,7 +8,7 @@ module.exports = AmpersandCollection.extend({ var val = attrs.value; var T = (val && val._bsonType) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, '$1'); - var Klass = TYPED_STATES[T]; + var Klass = type[T]; if (!Klass) { throw new TypeError('No value type for ' + T); diff --git a/lib/models/type-long.js b/lib/models/type-long.js deleted file mode 100644 index 6998b9f..0000000 --- a/lib/models/type-long.js +++ /dev/null @@ -1,7 +0,0 @@ -module.exports = require('./type-number').extend({ - props: { - _id: { - default: 'Long' - } - } -}); diff --git a/lib/models/type-null.js b/lib/models/type-null.js deleted file mode 100644 index b066770..0000000 --- a/lib/models/type-null.js +++ /dev/null @@ -1,7 +0,0 @@ -module.exports = require('./type').extend({ - props: { - _id: { - default: 'Null' - } - } -}); diff --git a/lib/models/type-number.js b/lib/models/type-number.js deleted file mode 100644 index b979c77..0000000 --- a/lib/models/type-number.js +++ /dev/null @@ -1,7 +0,0 @@ -module.exports = require('./type').extend({ - props: { - _id: { - default: 'Number' - } - } -}); diff --git a/lib/models/type-string.js b/lib/models/type-string.js deleted file mode 100644 index 09849f9..0000000 --- a/lib/models/type-string.js +++ /dev/null @@ -1,7 +0,0 @@ -module.exports = require('./type').extend({ - props: { - _id: { - default: 'String' - } - } -}); diff --git a/lib/models/type.js b/lib/models/type.js index 695fa23..afb3912 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -1,7 +1,7 @@ var AmpersandState = require('ampersand-state'); var _ = require('lodash'); -module.exports = AmpersandState.extend({ +var Type = AmpersandState.extend({ idAttribute: '_id', props: { _id: { @@ -46,3 +46,35 @@ module.exports = AmpersandState.extend({ }, true); } }); + +exports.String = Type.extend({ + props: { + _id: { + default: 'String' + } + } +}); + +exports.Number = Type.extend({ + props: { + _id: { + default: 'Number' + } + } +}); + +exports.Null = Type.extend({ + props: { + _id: { + default: 'Null' + } + } +}); + +exports.Boolean = Type.extend({ + props: { + _id: { + default: 'Boolean' + } + } +}); From 9415061c25e57b0f0a417a5528d28a3606caf2d5 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Wed, 6 May 2015 13:53:58 -0400 Subject: [PATCH 22/79] skip old tests for now --- lib/schema.js | 2 +- test/test.js | 50 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index e3cbce2..e392672 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,4 +1,4 @@ -var defs = require('./definitions'); +// var defs = require('./definitions'); var pkg = require('../package.json'); var find = require('lodash.find'); var BSON = require('bson'); diff --git a/test/test.js b/test/test.js index d9e1d0d..73f3545 100644 --- a/test/test.js +++ b/test/test.js @@ -1,45 +1,55 @@ -var schema = require('../lib/schema'), - defs = require('../lib/definitions'), - assert = require('assert'), - allTypes = require('../fixtures/all_types'), - BSON = require('bson'), - pkg = require('../package.json'); +var schema = require('../'), + assert = require('assert'), + allTypes = require('../fixtures/all_types'), + BSON = require('bson'), + pkg = require('../package.json'); -describe('mongodb-schema', function() { - var root = defs.ESCAPE + defs.ROOT; +describe.skip('mongodb-schema', function() { + // var root = defs.ESCAPE + defs.ROOT; - it('should import correctly', function () { + it('should import correctly', function() { assert.ok(schema); }); - it('should have a root object with the correct version', function () { + it('should have a root object with the correct version', function() { var result = schema(); assert.ok(result[root] !== undefined); assert.equal(result[root][defs.VERSION], pkg.version); }); - it('should have 0 count without any documents', function () { + it('should have 0 count without any documents', function() { var result = schema([]); assert.equal(result[root][defs.COUNT], 0); }); - it('should throw an error if documents is not an array or undefined', function () { - assert.throws(function () { schema("i'm not an array") }, TypeError); - assert.doesNotThrow(function () { schema() }); + it('should throw an error if documents is not an array or undefined', function() { + assert.throws(function() { + schema('i\'m not an array'); + }, TypeError); + assert.doesNotThrow(function() { + schema(); + }); }); - it('should parse documents of all types without error', function () { - assert.ok( schema(allTypes) ); + it('should parse documents of all types without error', function() { + assert.ok(schema(allTypes)); }); - it('should detect the correct type for every type', function () { + it('should detect the correct type for every type', function() { var result = schema(allTypes); console.log(JSON.stringify(result, null, 2)); }); - it('should create the correct type objects inside #schema tag', function () { - var result = schema([ {a: "foo"}, {a: 1, b: {c: BSON.ObjectId() }} ]); + it('should create the correct type objects inside #schema tag', function() { + var result = schema([{ + a: 'foo' + }, { + a: 1, + b: { + c: BSON.ObjectId() + } + }]); // @todo - }) + }); }); From edfa6c3d999bbd021cd80b0e93def1c6bc6a23c2 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Wed, 6 May 2015 13:54:35 -0400 Subject: [PATCH 23/79] refactor schema values for ease of UI --- lib/index.js | 13 +++++++------ lib/models/field-collection.js | 3 ++- lib/models/field.js | 11 ++++++++++- lib/models/type.js | 16 +++++++--------- lib/models/value-collection.js | 4 ++++ lib/models/value.js | 16 ++++++++++++++++ 6 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 lib/models/value-collection.js create mode 100644 lib/models/value.js diff --git a/lib/index.js b/lib/index.js index fef17ee..712214c 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,6 +4,7 @@ var debug = require('debug')('mongodb-schema:models:schema'); var es = require('event-stream'); var flatten = require('flatnest').flatten; var _ = require('lodash'); +var format = require('util').format; function getValueType(value) { var T; @@ -53,7 +54,7 @@ var Schema = AmpersandState.extend({ field.types.add({ _id: T, count: 1, - value: value + value: format('%j', value).replace(/"/g, '') }); } else { debug('`%s` is already a known field with %d type(s)', _id, field.types.length); @@ -63,11 +64,11 @@ var Schema = AmpersandState.extend({ field.types.add({ _id: T, count: 1, - value: value + value: format('%j', value).replace(/"/g, '') }); } else { debug('updating existing type %j', existingType); - existingType.values.push(value); + existingType.values.add(format('%j', value).replace(/"/g, '')); existingType.count += 1; } field.count += 1; @@ -76,9 +77,9 @@ var Schema = AmpersandState.extend({ }, this); schema.count += 1; }, function() { - debug('finalized schema is', JSON.stringify(schema, null, 2)); - this.emit('end'); - }); + debug('finalized schema is', JSON.stringify(schema, null, 2)); + this.emit('end'); + }); } }); diff --git a/lib/models/field-collection.js b/lib/models/field-collection.js index 55bc73c..1f8f15a 100644 --- a/lib/models/field-collection.js +++ b/lib/models/field-collection.js @@ -1,4 +1,5 @@ module.exports = require('ampersand-collection').extend({ model: require('./field'), - mainIndex: '_id' + mainIndex: '_id', + comparator: '_id' }); diff --git a/lib/models/field.js b/lib/models/field.js index 4d726c4..a6b76e5 100644 --- a/lib/models/field.js +++ b/lib/models/field.js @@ -1,5 +1,6 @@ var AmpersandState = require('ampersand-state'); var TypeCollection = require('./type-collection'); +var ValueCollection = require('./value-collection'); module.exports = AmpersandState.extend({ idAttribute: '_id', @@ -23,6 +24,14 @@ module.exports = AmpersandState.extend({ }, }, collections: { - types: TypeCollection + types: TypeCollection, + values: ValueCollection + }, + initialize: function() { + this.types.on('add', function(type) { + type.values.on('add', function(model) { + this.parent.values.add(model); + }.bind(this)); + }); } }); diff --git a/lib/models/type.js b/lib/models/type.js index afb3912..74fbbd2 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -1,5 +1,6 @@ var AmpersandState = require('ampersand-state'); var _ = require('lodash'); +var ValueCollection = require('./value-collection'); var Type = AmpersandState.extend({ idAttribute: '_id', @@ -10,19 +11,16 @@ var Type = AmpersandState.extend({ count: { type: 'number', default: 1 - }, - values: { - type: 'array', - default: function() { - return []; - } } }, + collections: { + values: ValueCollection + }, derived: { unique: { deps: ['values', 'count'], fn: function() { - return _.unique(this.values).length; + return _.unique(this.values.models).length; } }, probability: { @@ -35,8 +33,8 @@ var Type = AmpersandState.extend({ } }, initialize: function(options) { - if (options.value) { - this.values.push(options.value); + if (options.value && this.values) { + this.values.add(options.value); } }, serialize: function() { diff --git a/lib/models/value-collection.js b/lib/models/value-collection.js new file mode 100644 index 0000000..4f4980b --- /dev/null +++ b/lib/models/value-collection.js @@ -0,0 +1,4 @@ +var AmpersandCollection = require('ampersand-collection'); +module.exports = AmpersandCollection.extend({ + model: require('./value') +}); diff --git a/lib/models/value.js b/lib/models/value.js new file mode 100644 index 0000000..bd4df32 --- /dev/null +++ b/lib/models/value.js @@ -0,0 +1,16 @@ +var AmpersandState = require('ampersand-state'); + +module.exports = AmpersandState.extend({ + _idAttribute: '_id', + props: { + _id: { + type: 'any' + } + }, + initialize: function(val) { + if (val === '') { + val = ''; + } + this._id = val; + } +}); From f7663c211accdd775605b4cf12024b4e4b41dd1b Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Tue, 12 May 2015 14:47:31 -0400 Subject: [PATCH 24/79] cleanup before diving back in --- lib/flatten.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/index.js | 5 ++++- 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 lib/flatten.js diff --git a/lib/flatten.js b/lib/flatten.js new file mode 100644 index 0000000..f10ea84 --- /dev/null +++ b/lib/flatten.js @@ -0,0 +1,61 @@ +module.exports = flatten; + +function flatten(obj) { + var flattened = {}; + + var circlular = []; + var circLoc = []; + + function _route(prefix, value) { + var i, len, type, keys, circularCheck, loc; + + if (value === null) { + flattened[prefix] = null; + return; + } + + type = typeof value; + if (typeof value === 'object') { + circularCheck = circlular.indexOf(value); + if (circularCheck >= 0) { + loc = circLoc[circularCheck] || 'this'; + flattened[prefix] = '[Circular (' + loc + ')]'; + return; + } + circlular.push(value); + circLoc.push(prefix); + + if (Array.isArray(value)) { + len = value.length; + if (len === 0) { + _route(prefix + '[]', null); + } + for (i = 0; i < len; i++) { + _route(prefix + '[' + i + ']', value[i]); + } + return; + } + if (value.hasOwnProperty('_bsontype')) { + flattened[prefix] = value.toString(); + return; + } + keys = Object.keys(value); + len = keys.length; + if (prefix) { + prefix = prefix + '.'; + } + if (len === 0) { + _route(prefix, null); + } + for (i = 0; i < len; i++) { + _route(prefix + keys[i], value[keys[i]]); + } + return; + } + flattened[prefix] = value; + } + + _route('', obj); + + return flattened; +} diff --git a/lib/index.js b/lib/index.js index 712214c..5494be6 100644 --- a/lib/index.js +++ b/lib/index.js @@ -2,7 +2,7 @@ var AmpersandState = require('ampersand-state'); var FieldCollection = require('./models/field-collection'); var debug = require('debug')('mongodb-schema:models:schema'); var es = require('event-stream'); -var flatten = require('flatnest').flatten; +var flatten = require('./flatten'); var _ = require('lodash'); var format = require('util').format; @@ -30,6 +30,9 @@ var Schema = AmpersandState.extend({ default: 0 } }, + initialize: function(options) { + this.ns = options.ns; + }, stream: function() { var schema = this; return es.through(function(doc) { From 45b16c668d2b701d9f83a738a350b4dad643904a Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Tue, 12 May 2015 16:16:26 -0400 Subject: [PATCH 25/79] backout demo debugging --- lib/index.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/index.js b/lib/index.js index 5494be6..d894542 100644 --- a/lib/index.js +++ b/lib/index.js @@ -31,6 +31,7 @@ var Schema = AmpersandState.extend({ } }, initialize: function(options) { + options = options || {}; this.ns = options.ns; }, stream: function() { @@ -57,7 +58,7 @@ var Schema = AmpersandState.extend({ field.types.add({ _id: T, count: 1, - value: format('%j', value).replace(/"/g, '') + value: value }); } else { debug('`%s` is already a known field with %d type(s)', _id, field.types.length); @@ -67,11 +68,11 @@ var Schema = AmpersandState.extend({ field.types.add({ _id: T, count: 1, - value: format('%j', value).replace(/"/g, '') + value: value }); } else { debug('updating existing type %j', existingType); - existingType.values.add(format('%j', value).replace(/"/g, '')); + existingType.values.add(value); existingType.count += 1; } field.count += 1; From 0a85c12d7ba5dc8853ab7ff5bc5cd457e47e00c9 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 13:41:15 -0400 Subject: [PATCH 26/79] fix(test): remove old fixtures --- fixtures/all_types.js | 212 ------------------------------------------ 1 file changed, 212 deletions(-) delete mode 100644 fixtures/all_types.js diff --git a/fixtures/all_types.js b/fixtures/all_types.js deleted file mode 100644 index b80c57d..0000000 --- a/fixtures/all_types.js +++ /dev/null @@ -1,212 +0,0 @@ -module.exports = -[ - { - "_id": "5543129258b9383aab07d0fb", - "x": 123.123, - "comment": "new MongoDB.Double(123.123)", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d0fc", - "x": 456.456, - "comment": "456.456", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d0fd", - "x": "abc", - "comment": "abc", - "btype": 2 - }, - { - "_id": "5543129258b9383aab07d0fe", - "x": { - "z": 5 - }, - "comment": "{\"z\": 5}", - "btype": 3 - }, - { - "_id": "5543129258b9383aab07d0ff", - "x": [ - 9, - 8, - 7 - ], - "comment": "[9, 8, 7]", - "btype": 16 - }, - { - "_id": "5543129258b9383aab07d100", - "x": [ - { - "y": 4 - }, - { - "z": 5 - } - ], - "comment": "[{\"y\": 4}, {\"z\": 5}]", - "btype": 3 - }, - { - "_id": "5543129258b9383aab07d101", - "x": "YmluYXJ5", - "comment": "new MongoDB.Binary(\"binary\")", - "btype": 5 - }, - { - "_id": "5543129258b9383aab07d102", - "x": "5040dc5d40b67c681d000001", - "comment": "new MongoDB.ObjectID(\"5040dc5d40b67c681d000001\")", - "btype": 7 - }, - { - "_id": "5543129258b9383aab07d103", - "x": false, - "comment": "false", - "btype": 8 - }, - { - "_id": "5543129258b9383aab07d104", - "x": true, - "comment": "true", - "btype": 8 - }, - { - "_id": "5543129258b9383aab07d105", - "x": "2012-08-31T12:13:14.156Z", - "comment": "new Date(\"2012-08-31 12:13:14:156 UTC\")", - "btype": 9 - }, - { - "_id": "5543129258b9383aab07d106", - "x": null, - "comment": "null", - "btype": 10 - }, - { - "_id": "5543129258b9383aab07d107", - "x": {}, - "comment": "new RegExp(\"abc\")", - "btype": 11 - }, - { - "_id": "5543129258b9383aab07d108", - "x": {}, - "comment": "new RegExp(\"abc\", \"i\")", - "btype": 11 - }, - { - "_id": "5543129258b9383aab07d109", - "x": { - "$ref": "types", - "$id": "040dc5d40b67c681d000001", - "$db": "types" - }, - "comment": "new MongoDB.DBRef(\"types\", \"5040dc5d40b67c681d000001\", \"types\")", - "btype": 3 - }, - { - "_id": "5543129258b9383aab07d10a", - "x": { - "scope": {}, - "code": "function () { return 'test'; }" - }, - "comment": "new MongoDB.Code(\"function () { return ' test'; }\")", - "btype": 13 - }, - { - "_id": "5543129258b9383aab07d10b", - "x": "def15", - "comment": "new MongoDB.Symbol(\"def15\")", - "btype": 14 - }, - { - "_id": "5543129258b9383aab07d10c", - "x": { - "scope": { - "a": 4 - }, - "code": "function () { return a; }" - }, - "comment": " new MongoDB.Code(\"function () { return a; }\", {\"a\": 4})", - "btype": 15 - }, - { - "_id": "5543129258b9383aab07d10d", - "x": 123456, - "comment": "123456", - "btype": 16 - }, - { - "_id": "5543129258b9383aab07d10e", - "x": "8589934593", - "comment": "new MongoDB.Timestamp(1, 2)", - "btype": 17 - }, - { - "_id": "5543129258b9383aab07d10f", - "x": 1286608618, - "comment": "new MongoDB.Long(\"9876543210\")", - "btype": 18 - }, - { - "_id": "5543129258b9383aab07d110", - "x": { - "_bsontype": "MinKey" - }, - "comment": "new MongoDB.MinKey()", - "btype": 255 - }, - { - "_id": "5543129258b9383aab07d111", - "x": { - "_bsontype": "MaxKey" - }, - "comment": "new MongoDB.MaxKey()", - "btype": 127 - }, - { - "_id": "5543129258b9383aab07d112", - "x": null, - "comment": "undefined", - "btype": 10 - }, - { - "_id": "5543129258b9383aab07d113", - "x": null, - "comment": "Number.NaN", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d114", - "x": null, - "comment": "Infinity", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d115", - "x": null, - "comment": "Number.POSITIVE_INFINITY", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d116", - "x": null, - "comment": "Number.NEGATIVE_INFINITY", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d117", - "x": 5e-324, - "comment": "MIN_VALUE", - "btype": 1 - }, - { - "_id": "5543129258b9383aab07d118", - "x": 1.7976931348623157e+308, - "comment": "MAX_VALUE", - "btype": 1 - } -] From 707d8bcf71f1afbc70f7f530b266ea7aee54c983 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 13:42:15 -0400 Subject: [PATCH 27/79] fix(flatten): proper date + bson type support --- lib/flatten.js | 79 ++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/lib/flatten.js b/lib/flatten.js index f10ea84..7884652 100644 --- a/lib/flatten.js +++ b/lib/flatten.js @@ -1,58 +1,67 @@ module.exports = flatten; +var debug = require('debug')('mongodb-schema:flatten'); function flatten(obj) { var flattened = {}; - var circlular = []; - var circLoc = []; - function _route(prefix, value) { - var i, len, type, keys, circularCheck, loc; + var i; if (value === null) { flattened[prefix] = null; return; } - type = typeof value; - if (typeof value === 'object') { - circularCheck = circlular.indexOf(value); - if (circularCheck >= 0) { - loc = circLoc[circularCheck] || 'this'; - flattened[prefix] = '[Circular (' + loc + ')]'; - return; + var type = typeof value; + + if (type === 'string') { + flattened[prefix] = value; + } + // booleans, null and undefined + else if (type === 'boolean' || obj === null || obj === undefined) { + flattened[prefix] = value; + } + // numbers + else if (type === 'number') { + flattened[prefix] = value; + } + // dates + else if (Object.prototype.toString.call(value) === '[object Date]') { + flattened[prefix] = value; + } else if (Array.isArray(value)) { + len = value.length; + flattened[prefix] = 'Array'; + + if (len === 0) { + _route(prefix + '[]', null); } - circlular.push(value); - circLoc.push(prefix); + for (i = 0; i < len; i++) { + _route(prefix + '[' + i + ']', value[i]); + } + } else if (type === 'object') { + if (value.hasOwnProperty('_bsontype')) { + debug('_bsontype is %s', value._bsontype); + flattened[prefix] = value; + } else { + var keys = Object.keys(value); + var len = keys.length; + if (prefix) { + flattened[prefix] = 'Object'; + } - if (Array.isArray(value)) { - len = value.length; + if (prefix) { + prefix = prefix + '.'; + } if (len === 0) { - _route(prefix + '[]', null); + _route(prefix, null); } for (i = 0; i < len; i++) { - _route(prefix + '[' + i + ']', value[i]); + _route(prefix + keys[i], value[keys[i]]); } - return; - } - if (value.hasOwnProperty('_bsontype')) { - flattened[prefix] = value.toString(); - return; - } - keys = Object.keys(value); - len = keys.length; - if (prefix) { - prefix = prefix + '.'; } - if (len === 0) { - _route(prefix, null); - } - for (i = 0; i < len; i++) { - _route(prefix + keys[i], value[keys[i]]); - } - return; + } else { + throw new Error('Unknown type for ' + JSON.stringify(value)); } - flattened[prefix] = value; } _route('', obj); From d7800bc3f8f927e9459fc92145ee901c89b65729 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 13:43:07 -0400 Subject: [PATCH 28/79] debugging proper handling of falsey values --- lib/models/type-collection.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/models/type-collection.js b/lib/models/type-collection.js index b9e887c..f9ee98c 100644 --- a/lib/models/type-collection.js +++ b/lib/models/type-collection.js @@ -7,14 +7,18 @@ module.exports = AmpersandCollection.extend({ if (attrs && Object.keys(attrs).length === 0) return; var val = attrs.value; - var T = (val && val._bsonType) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, '$1'); + var T = (val && val._bsontype) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, '$1'); var Klass = type[T]; if (!Klass) { throw new TypeError('No value type for ' + T); } + + if (!val) { + val = '' + val; + } return new Klass({ - value: val - }, options); + value: val + }, options); } }); From 1fd6325d0e9e6bc349f3dffe502fa0ec1fb2d708 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 13:44:32 -0400 Subject: [PATCH 29/79] fix(type): Support for Date, ObjectID, Undefined --- lib/models/type.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/models/type.js b/lib/models/type.js index 74fbbd2..cbd81c5 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -76,3 +76,26 @@ exports.Boolean = Type.extend({ } } }); + +exports.Date = Type.extend({ + props: { + _id: { + default: 'Date' + } + } +}); +exports.ObjectID = Type.extend({ + props: { + _id: { + default: 'ObjectID' + } + } +}); + +exports.Undefined = Type.extend({ + props: { + _id: { + default: 'Undefined' + } + } +}); From 02773ab7a2e321110882896a13c1d7073ec4d2e7 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 13:46:06 -0400 Subject: [PATCH 30/79] [wip]: sorting out nesting + wonky counters --- lib/index.js | 49 ++++++++++++++++++++-------------------- lib/models/field.js | 11 +++++++++ lib/models/type.js | 54 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 84 insertions(+), 30 deletions(-) diff --git a/lib/index.js b/lib/index.js index d894542..5d07b8f 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,12 +4,11 @@ var debug = require('debug')('mongodb-schema:models:schema'); var es = require('event-stream'); var flatten = require('./flatten'); var _ = require('lodash'); -var format = require('util').format; function getValueType(value) { var T; - if (_.has(value, '_bsonType')) { - T = value._bsonType; + if (_.has(value, '_bsontype')) { + T = value._bsontype; } else { T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); } @@ -43,45 +42,47 @@ var Schema = AmpersandState.extend({ var value = d[1]; var T = getValueType(d[1]); var field = schema.fields.get(_id); - - if (isArrayKey(_id)) { + var existingType; + if (value === 'Array') { return debug('@todo: smush nested arrays. skipping `%s`', _id); } + if (value === 'Object') { + return debug('@todo: smush nested objects. skipping `%s`', _id); + } + schema.count += 1; + debug(_id, T, value, '' + value); + value = '' + value; if (!field) { - debug('`%s` is a new field', _id); + debug('`%s` is a new field with type %s', _id, T); field = schema.fields.add({ - _id: _id, - count: 1 + _id: _id }); field.types.add({ - _id: T, - count: 1, - value: value - }); + _id: T + }).values.add(value); } else { - debug('`%s` is already a known field with %d type(s)', _id, field.types.length); - var existingType = field.types.get(T); + // debug('`%s` is already a known field with types', _id, field.types.map(function(d) { + // return d.getId(); + // })); + + existingType = field.types.get(T); if (!existingType) { - debug('new type `%s` for field `%s`', T, _id); + // debug('new type `%s` for field `%s`', T, _id); field.types.add({ - _id: T, - count: 1, - value: value - }); + _id: T + }).values.add(value); } else { - debug('updating existing type %j', existingType); + // debug('updating existing type %j', existingType); existingType.values.add(value); - existingType.count += 1; } - field.count += 1; } this.emit('data', field); }, this); - schema.count += 1; }, function() { - debug('finalized schema is', JSON.stringify(schema, null, 2)); + // debug('finalized schema is', JSON.stringify(schema, null, 2)); + console.table(schema.fields.serialize()); this.emit('end'); }); } diff --git a/lib/models/field.js b/lib/models/field.js index a6b76e5..08ffe10 100644 --- a/lib/models/field.js +++ b/lib/models/field.js @@ -1,6 +1,7 @@ var AmpersandState = require('ampersand-state'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); +var debug = require('debug')('mongodb-schema-field'); module.exports = AmpersandState.extend({ idAttribute: '_id', @@ -28,8 +29,18 @@ module.exports = AmpersandState.extend({ values: ValueCollection }, initialize: function() { + this.types.on('all', function(name) { + debug('got a collection event on types', arguments); + }); + + var field = this; this.types.on('add', function(type) { + if (!type) return console.log('WTF is type?', type); + + if (!type.values) return; + type.values.on('add', function(model) { + field.count += 1; this.parent.values.add(model); }.bind(this)); }); diff --git a/lib/models/type.js b/lib/models/type.js index cbd81c5..fb29e4e 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -1,6 +1,7 @@ var AmpersandState = require('ampersand-state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); +var TypeCollection = require('./type-collection'); var Type = AmpersandState.extend({ idAttribute: '_id', @@ -10,7 +11,7 @@ var Type = AmpersandState.extend({ }, count: { type: 'number', - default: 1 + default: 0 } }, collections: { @@ -18,14 +19,13 @@ var Type = AmpersandState.extend({ }, derived: { unique: { - deps: ['values', 'count'], + deps: ['count'], fn: function() { return _.unique(this.values.models).length; } }, probability: { deps: ['count'], - cached: false, fn: function() { var field = this.collection.parent; return this.count / field.count; @@ -33,9 +33,7 @@ var Type = AmpersandState.extend({ } }, initialize: function(options) { - if (options.value && this.values) { - this.values.add(options.value); - } + // this.values.add(options.value); }, serialize: function() { return this.getAttributes({ @@ -84,6 +82,49 @@ exports.Date = Type.extend({ } } }); + +exports.Object = AmpersandState.extend({ + idAttribute: '_id', + props: { + _id: { + type: 'string', + default: 'Object' + } + }, + // collections: { + // children: TypeCollection + // }, + derived: {}, + initialize: function(options) { + // this.values.add(options.value); + }, + serialize: function() { + return this.getAttributes({ + props: true, + derived: true + }, true); + } +}); + +exports.Array = AmpersandState.extend({ + idAttribute: '_id', + props: { + _id: { + type: 'string', + default: 'Array' + } + }, + // collections: { + // children: TypeCollection + // }, + serialize: function() { + return this.getAttributes({ + props: true, + derived: true + }, true); + } +}); + exports.ObjectID = Type.extend({ props: { _id: { @@ -99,3 +140,4 @@ exports.Undefined = Type.extend({ } } }); + From 2a96e2d1586b75656829faf7d1fbde3568c589c3 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 14:21:26 -0400 Subject: [PATCH 31/79] more cleanup --- scripts/read_types.js | 29 ------- scripts/write_types.js | 166 ----------------------------------------- 2 files changed, 195 deletions(-) delete mode 100644 scripts/read_types.js delete mode 100644 scripts/write_types.js diff --git a/scripts/read_types.js b/scripts/read_types.js deleted file mode 100644 index f10a830..0000000 --- a/scripts/read_types.js +++ /dev/null @@ -1,29 +0,0 @@ -// script to read documents from the test.types collection created by `write_types.js` -var BSON = require('bson').BSONPure.BSON; - -var MongoClient = require('mongodb').MongoClient, - assert = require('assert'); - -var url = 'mongodb://localhost:27017/test'; - - -MongoClient.connect(url, function(err, db) { - assert.ifError(err); - - // Get the documents collection - var collection = db.collection('types', {raw: false}); - - // find all documents and print them out - collection.find({}).sort({btype:1}).toArray(function(err, docs) { - assert.ifError(err); - - // don't use JSON.stringify here, it loses a lot of information like _bsontype - docs.forEach(function (doc) { - console.log(doc.x, doc.btype, doc.comment); - console.log() - // console.log(BSON.deserialize(doc)); - }); - db.close(); - }); -}); - diff --git a/scripts/write_types.js b/scripts/write_types.js deleted file mode 100644 index 783c5e0..0000000 --- a/scripts/write_types.js +++ /dev/null @@ -1,166 +0,0 @@ -// adapted from: https://realprogrammer.wordpress.com/2013/02/10/mongodb-and-node-js-part-1-list-of-documents-for-all-types/ - -// writes documents of each bson type to a MongoDB instance running at localhost:27027 in the test.types collection - -var MongoDB = require('mongodb'); - -/* - Type codes - ========== - 1 "\x01" e_name double Floating point - 2 "\x02" e_name string UTF-8 string - 3 "\x03" e_name document Embedded document - 4 "\x04" e_name document Array - 5 "\x05" e_name binary Binary data - - 7 "\x07" e_name (byte*12) ObjectId - 8 "\x08" e_name "\x00" Boolean "false" - 8 "\x08" e_name "\x01" Boolean "true" - 9 "\x09" e_name int64 UTC datetime - 10 "\x0A" e_name Null value - 11 "\x0B" e_name cstring cstring Regular expression - - 13 "\x0D" e_name string JavaScript code - - 15 "\x0F" e_name code_w_s JavaScript code w/ scope - 16 "\x10" e_name int32 32-bit Integer - 17 "\x11" e_name int64 Timestamp - 18 "\x12" e_name int64 64-bit integer - 255 "\xFF" e_name Min key - 127 "\x7F" e_name Max key - - Deprecated type codes - ===================== - 6 "\x06" e_name Undefined — Deprecated - 12 "\x0C" e_name string (byte*12) DBPointer — Deprecated - 14 "\x0E" e_name string Symbol — Deprecated - - */ - -var typeDocuments; - -typeDocuments = [ - {"x": new MongoDB.Double(123.123), - "comment": "new MongoDB.Double(123.123)", - "btype": 1}, - {"x": 456.456, - "comment": "456.456", - "btype": 1}, - {"x": "abc", - "comment": "abc", - "btype": 2}, - {"x": {"z": 5}, - "comment": "{\"z\": 5}", - "btype": 3}, - // this is not type:4 - {"x": [9, 8, 7], - "comment": "[9, 8, 7]", - "btype": 16}, - {"x": [ - {"y": 4}, - {"z": 5} - ], "comment": "[{\"y\": 4}, {\"z\": 5}]", - "btype": 3}, - {"x": new MongoDB.Binary("binary"), - "comment": "new MongoDB.Binary(\"binary\")", - "btype": 5}, - // t:6 deprecated (was 'undefined') - not implemented - {"x": new MongoDB.ObjectID("5040dc5d40b67c681d000001"), - "comment": "new MongoDB.ObjectID(\"5040dc5d40b67c681d000001\")", - "btype": 7}, - {"x": false, - "comment": "false", - "btype": 8}, - {"x": true, - "comment": "true", - "btype": 8}, - {"x": new Date("2012-08-31 12:13:14:156 UTC"), - "comment": "new Date(\"2012-08-31 12:13:14:156 UTC\")", - "btype": 9}, - {"x": null, - "comment": "null", - "btype": 10}, - {"x": new RegExp("abc"), - "comment": "new RegExp(\"abc\")", - "btype": 11}, - {"x": new RegExp("abc", "i"), - "comment": "new RegExp(\"abc\", \"i\")", - "btype": 11}, - // t:12 DBRef deprecated - still implemented - // this is not type:12 - {"x": new MongoDB.DBRef("types", "040dc5d40b67c681d000001", "types"), - "comment": "new MongoDB.DBRef(\"types\", \"5040dc5d40b67c681d000001\", \"types\")", - "btype": 3}, - {"x": new MongoDB.Code("function () { return 'test'; }"), - "comment": "new MongoDB.Code(\"function () { return ' test'; }\")", - "btype": 13}, - // t:14 Symbol deprecated - still implemented - {"x": new MongoDB.Symbol("def15"), - "comment": "new MongoDB.Symbol(\"def15\")", - "btype": 14}, - {"x": new MongoDB.Code("function () { return a; }", {"a": 4}), - "comment": " new MongoDB.Code(\"function () { return a; }\", {\"a\": 4})", - "btype": 15}, - {"x": 123456, - "comment": "123456", - "btype": 16}, - {"x": new MongoDB.Timestamp(1, 2), - "comment": "new MongoDB.Timestamp(1, 2)", - "btype": 17}, - {"x": new MongoDB.Long("9876543210"), - "comment": "new MongoDB.Long(\"9876543210\")", - "btype": 18}, - {"x": new MongoDB.MinKey(), - "comment": "new MongoDB.MinKey()", - "btype": 255}, - {"x": new MongoDB.MaxKey(), - "comment": "new MongoDB.MaxKey()", - "btype": 127}, - // ADDITIONAL POSSIBLE VALUES - // 'undefined' will be converted to 'null'; type will be 'null' (aka 10) also - {"x": undefined, - "comment": "undefined", - "btype": 10}, - {"x": Number.NaN, - "comment": "Number.NaN", - "btype": 1}, - {"x": Infinity, - "comment": "Infinity", - "btype": 1}, - {"x": Number.POSITIVE_INFINITY, - "comment": "Number.POSITIVE_INFINITY", - "btype": 1}, - {"x": Number.NEGATIVE_INFINITY, - "comment": "Number.NEGATIVE_INFINITY", - "btype": 1}, - {"x": Number.MIN_VALUE, - "comment": "MIN_VALUE", - "btype": 1}, - {"x": Number.MAX_VALUE, - "comment": "MAX_VALUE", - "btype": 1} -]; - -var Db = MongoDB.Db, - Server = MongoDB.Server; -var db = new Db('test', new Server("127.0.0.1", 27017, - {auto_reconnect: false, poolSize: 4}), {native_parser: false, safe: false}); - -db.open(function (err, db) { - "use strict"; - db.dropCollection("types", function (err, result) { - if (err) { - console.log(err.toString()); - } - console.log("dropped collection"); - db.collection("types", function (err, collection) { - collection.insert(typeDocuments, {safe: true}, function (err, res) { - if (err) { - console.log(err.toString()); - } - console.log("inserted all types into test.types"); - db.close(); - }); - }); - }); -}); From 87fcd1415128fc939a51e406d07bae81979d5a2e Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 14:21:37 -0400 Subject: [PATCH 32/79] more cleanup --- lib/schema.js | 199 -------------------------------------------------- 1 file changed, 199 deletions(-) delete mode 100644 lib/schema.js diff --git a/lib/schema.js b/lib/schema.js deleted file mode 100644 index e392672..0000000 --- a/lib/schema.js +++ /dev/null @@ -1,199 +0,0 @@ -// var defs = require('./definitions'); -var pkg = require('../package.json'); -var find = require('lodash.find'); -var BSON = require('bson'); -var isInteger = require('is-integer'); -var each = require('lodash.foreach'); -var stream = require('stream'); -var debug = require('debug')('mongodb-schema'); - -// these types have a _bsontype property -var bsontypeMap = { - 'ObjectID': 7, - 'Long': 18, - 'MinKey': 255, - 'MaxKey': 127, - 'Code': 15, // no differentiation to 13 - 'Binary': 5, - 'DBRef': 12, - 'Timestamp': 17 -}; - - -/** - * return the bson type of `value` - * @param {any} value value to get the type for - * @return {number} bson type as decimal number - */ -function _getType(value) { - if (typeof value === 'number') { - // could be int (16) or float (1) - return isInteger(value) ? 16 : 1; - } - - if (typeof value === 'string') { - // could be symbol (14, deprecated) or string (2), assume string - return 2; - } - - if (typeof value === 'boolean') { - return 8; - } - - if (value === null) { - return 10; - } - - if (typeof value === 'object') { - // could be embedded document (3), array (4), binary (5), objectid (7), - // datetime (9), regular expression (11), dbref (12), code (13), - // code with scope (15), timestamp (17), minkey (255), maxkey (127). - - if (value.hasOwnProperty('_bsontype')) { - // objectid, dbref, binary, code, code with scope, timestamp, maxkey, minkey - return bsontypeMap[value._bsontype]; - } - - if (value instanceof Array) { - return 4; - } - - if (value instanceof Date) { - return 9; - } - - if (value instanceof RegExp) { - return 11; - } - - // if nothing matches, it's a nested document - return 3; - } - - // should not get here - throw Error('invalid type'); -} - - - -function _pushValue(value, data_obj) { - if (!data_obj.hasOwnProperty('values')) { - data_obj.values = []; - } - data_obj.values.push(value); -} - -function _addToSet(value, data_obj) { - -} - - -function _aggregate(name, value, type, data_obj) { - - switch (type) { - case 1: _pushValue(value, data_obj); break; - case 2: _pushValue(value, data_obj); break; - case 3: break; - // ... - - } - - if (type === 1) { // float - _pushValue(value, data_obj); - } - - if (type === 2) { - // @todo - } -} - -function _finalize(schema) { - -} - -/** - * analyse property and integrate it into the schema - * @param {array} documents array of sample documents to integrate into schema - * @return {object} resulting schema - */ -function _infer(obj, schema) { - - for (var name in obj) { - if (!obj.hasOwnProperty(name)) continue; - - var value = obj[name]; - - // create schema member if not present yet - if (!(name in schema)) { - schema[name] = {}; - schema[name][defs.ESCAPE + defs.SCHEMA] = []; - } - var tag = schema[name][defs.ESCAPE + defs.SCHEMA]; - - // get type of `value` - var bsontype = _getType(value); - - // find schema array element for correct type or create one - // @review should this be an object rather than array? at least while building the schema? - var type_obj = find(tag, function (el) { - return el[defs.TYPE] === bsontype; - }); - - if (!type_obj) { - // not found, create one - type_obj = {}; - type_obj[defs.TYPE] = bsontype; - type_obj[defs.COUNT] = 0; - type_obj[defs.PROB] = 0.0; - type_obj[defs.UNIQUE] = null; // should be determined at the end - type_obj[defs.DATA] = {}; - - tag.push(type_obj); - } - - // increase counts, add data, check uniqueness - type_obj[defs.COUNT] += 1; - _aggregate(name, value, bsontype, type_obj[defs.DATA]); - - // special handling for arrays (type 4) - - // recursive call for nested documents (type 3) - if (bsontype === 3) { - _infer(value, schema[name]); - } - } -} - -/** - * main schema function - * @param {array} documents array of sample documents to integrate into schema - * @return {object} resulting schema - */ -module.exports = function(documents) { - var schema = {}; - - // @todo: see above on moving this to a class. - - // add root tag and version - var root = defs.ESCAPE + defs.ROOT; - schema[root] = {}; - schema[root][defs.VERSION] = pkg.version; - schema[root][defs.COUNT] = 0; - - // ensure `documents` is array or undefined - if (documents === undefined) { - documents = []; - } - - if (!(documents instanceof Array)) { - throw new TypeError('`documents` must be an array.'); - } - // @todo: finish cleanup - // walk all documents - each(documents, function inspect_document(doc) { - // increase global counter - schema[root][defs.COUNT] += 1; - _infer(doc, schema); - }); - return schema; -}; From f4075086e291c5373bf29e38788b819a0738dc2f Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 14:22:09 -0400 Subject: [PATCH 33/79] more cleanup --- lib/models/field.js | 9 --------- lib/models/type.js | 35 ++++++++++------------------------- lib/models/value.js | 3 --- 3 files changed, 10 insertions(+), 37 deletions(-) diff --git a/lib/models/field.js b/lib/models/field.js index 08ffe10..467e9e1 100644 --- a/lib/models/field.js +++ b/lib/models/field.js @@ -1,7 +1,6 @@ var AmpersandState = require('ampersand-state'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); -var debug = require('debug')('mongodb-schema-field'); module.exports = AmpersandState.extend({ idAttribute: '_id', @@ -29,16 +28,8 @@ module.exports = AmpersandState.extend({ values: ValueCollection }, initialize: function() { - this.types.on('all', function(name) { - debug('got a collection event on types', arguments); - }); - var field = this; this.types.on('add', function(type) { - if (!type) return console.log('WTF is type?', type); - - if (!type.values) return; - type.values.on('add', function(model) { field.count += 1; this.parent.values.add(model); diff --git a/lib/models/type.js b/lib/models/type.js index fb29e4e..4a84115 100644 --- a/lib/models/type.js +++ b/lib/models/type.js @@ -32,9 +32,6 @@ var Type = AmpersandState.extend({ } } }, - initialize: function(options) { - // this.values.add(options.value); - }, serialize: function() { return this.getAttributes({ props: true, @@ -43,6 +40,16 @@ var Type = AmpersandState.extend({ } }); +exports.getNameFromValue = function(value) { + var T; + if (_.has(value, '_bsontype')) { + T = value._bsontype; + } else { + T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); + } + return T; +}; + exports.String = Type.extend({ props: { _id: { @@ -90,19 +97,6 @@ exports.Object = AmpersandState.extend({ type: 'string', default: 'Object' } - }, - // collections: { - // children: TypeCollection - // }, - derived: {}, - initialize: function(options) { - // this.values.add(options.value); - }, - serialize: function() { - return this.getAttributes({ - props: true, - derived: true - }, true); } }); @@ -113,15 +107,6 @@ exports.Array = AmpersandState.extend({ type: 'string', default: 'Array' } - }, - // collections: { - // children: TypeCollection - // }, - serialize: function() { - return this.getAttributes({ - props: true, - derived: true - }, true); } }); diff --git a/lib/models/value.js b/lib/models/value.js index bd4df32..ffab61f 100644 --- a/lib/models/value.js +++ b/lib/models/value.js @@ -8,9 +8,6 @@ module.exports = AmpersandState.extend({ } }, initialize: function(val) { - if (val === '') { - val = ''; - } this._id = val; } }); From 6be9e7a173c405b0bd99a8362c45eb405591bdfc Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 14:34:47 -0400 Subject: [PATCH 34/79] more cleanup + simplify layout --- README.md | 395 +-------------------------- lib/{models => }/field-collection.js | 0 lib/{models => }/field.js | 0 lib/index.js | 100 +------ lib/schema.js | 70 +++++ lib/{models => }/type-collection.js | 0 lib/{models => }/type.js | 0 lib/{models => }/value-collection.js | 0 lib/{models => }/value.js | 0 test/test.js | 55 +--- 10 files changed, 86 insertions(+), 534 deletions(-) rename lib/{models => }/field-collection.js (100%) rename lib/{models => }/field.js (100%) create mode 100644 lib/schema.js rename lib/{models => }/type-collection.js (100%) rename lib/{models => }/type.js (100%) rename lib/{models => }/value-collection.js (100%) rename lib/{models => }/value.js (100%) diff --git a/README.md b/README.md index 51c5d69..ae3ea9a 100644 --- a/README.md +++ b/README.md @@ -1,390 +1,11 @@ -mongodb-schema -============== +# mongodb-schema -Infer probabilistic schema of javascript objects or a MongoDB collection. +Infer probabilistic schema of javascript objects or a MongoDB collection. -**Warning**: This is a complete rewrite of the current master branch (0.6.0) and will introduce significant differences. +## Todo -## Specification - -Author: Matt Kangas, Thomas Rueckstiess
-Last Revised: 2015-04-29
-Status: Draft
- -### 0. Terminology - -Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON taxonomy ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. - -> ##### Example - -> An object with 2 members. The name of the first member is `foo` and the name of the second member is `bar`. Both member values are 1. - -> {"foo": 1, "bar": 1} - - - -### 1. Escape Character - -We shall define `#` (ASCII 0x23) as an _escape character_ to distinguish meta data members from members originating from sampled data. - -Rationale: - -- expressible in one byte of UTF-8 (ASCII) -- Non-numeric (not in `0`..`9`, ASCII range 0x30-0x39), because this conflicts with javascript objects/arrays) -- Not `$` (ASCII character 0x24), because it is not a valid prefix for member names in MongoDB - -We shall then encode member names as follows: - -- Member name begins with no escape character: -literal member name -- Member name begins with single escape character: -encoded metadata member -- Member name begins with double escape character: -literal member name which begins with single escape character - - -### 2. General Structure - -We define a _sample set_ as a number of MongoDB documents from a single collection. The documents may have been selected in random fashion, but this definition does not impose any restrictions on the method of acquiring the documents. The documents comprising the sample set are called _sample documents_. - -We define the _shape_ of a sample set as aggregated characteristics of all members of the documents in the sample set. These characteristics are further described below. - -We define a _schema_ as a JSON representation of the _shape_ of a sample set. - -The schema must be strict, valid [JSON](http://www.json.org/). MongoDB-specific types must be converted into strict JSON as per [MongoDB's extended JSON](http://docs.mongodb.org/manual/reference/mongodb-extended-json/) definition, "strict" variant. - -The schema follows the combined structure of all documents in the sample set. This means, that for every member in any sample document, a member with the same name exists in the schema at the same nesting depth. This rule applies to members at all nesting depths. The schema can thus be seen as a superposition of all sample documents. - -Within the schema, the value of any such member is an object. This is explicitly also true for leaf members in a sample document, i.e. values that are neither arrays (BSON type 4) nor nested documents (BSON type 3). Every such object contains an encoded meta-data member with the name `#schema` (note the escape character), in addition to potential nested children. This meta-data member with the name `#schema` is called a _tag_, and its value is an array that contains one element for each [BSON type](http://bsonspec.org/spec.html) encountered in the sample set for this particular member. - - -> ##### Example - -> Sample set: - -> {a: "foo"} -> {a: {b: 10, c: true}} -> {c: null} - -> Schema (with `...` placeholders for the tag arrays) - -> { -> "a": { -> "#schema": [...], // tag for a -> "b": { -> "#schema": [...], // tag for a.b -> }, -> "counts": { -> "#schema": [...], // tag for a.c -> } -> }, -> "counts": { -> "#schema": [...], // tag c -> } -> } - -### 3. Tags - -While the schema object itself describes the overall structure of the sample set, the aggregated characteristics of each member are contained within its tag. - -The tag array contains one element for each distinct type encountered in the sample set for the given field. The order of this array is not defined and considered an implementation detail. If a field is missing in a sample document, it is treated as type _undefined_, and we use the (deprecated) BSON type 6 to represent it. - -Each element in the array is an object with the following members: - -- `type`: integer representing the (decimal) BSON type, unique within each schema tag -- `number`: integer representing the number of documents encountered in the sample set that contain this field -- `prob`: float representing the (relative) probability of this field being present given its parent field is present -- `unique`: boolean representing whether or not the values of this field are unique under the given type -- `data`: object containing type-specific additional data - - -> ##### Example - -> Field with its tag (`...` is placeholder for type-specific data field) - -> "a": { -> "#schema": [ // tag for a -> { -> "type": 2, // "string" type -> "number": 160, // 160 encounters -> "prob": 0.8, // relative probability 0.8 means 200 parent objects -> "unique": false, // the values contain duplicates -> "data": {...} // placeholder, defined further below -> }, -> { -> "type": 3, // "nested document" type -> ... -> } -> ] -> } - - -### 4. Type-Specific Data - -Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `data` member, which carries additional information specific for the type. - - -#### Type 1: float - -The `data` object contains the following members: - -- `min`: The smallest value encountered in any sample document -- `max`: The largest value encountered in any sample document -- `avg`: The mean of all sample document values -- `med`: The median of all sample document values -- `values`: An array of all values encountered, in order of traversal - - -> ##### Example - -> "data": { -> "min": 0.0 -> "max": 32.8, -> "avg": 9.3499999, -> "med": 5.25, -> "values": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] -> } - - -#### Type 2: string - - -The `data` object contains the following members: - -- `min`: The smallest value encountered in any sample document -- `max`: The largest value encountered in any sample document -- `values`: Unique set of all values encountered, ordered by counts descending -- `counts`: count for each value, same order as above - - -> ##### Example - -> "data": { -> "min": "atlas", -> "max": "zoo", -> "values": [ "atlas", "song", "bird", "zoo", "breakfast" ], -> "counts": [ 15, 9, 7, 5, 2 ] -> } - - -#### Type 3: nested document - -The `data` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. - - -#### Type 4: array - -The `data` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. - -> ##### Example - -> This array contains only strings (there is only a single element with type `2` in the `#schema` array). This element follows the normal rules for string types, as described above. - -> "data": { -> "#array": [ -> { -> "type": 2, -> "number": 490, -> "prob": 1.0, -> "unique": false, -> "data": { -> "min": "AUH", -> "max": "ZRH", -> "values": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], -> "counts": [ 171, 110, 82, 40, 29, 23, 21, 14 ] -> } -> } -> ] -> } - - -#### Type 5: binary - -The `data` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `counts` member is an array of counts of the encountered sub-types. - -> ##### Example - -> "data": { -> "sub": [ 4, 3 ] -> "counts": [ 3004, 2554 ] -> } - - -#### Type 6: undefined (deprecated) - -The `data` object is empty. - - -#### Type 7: ObjectId - -The `data` object contains the following fields: - -- `min`: The smallest ObjectId value found, encoded as strict extended JSON. -- `max`: The largest ObjectId value found, encoded as strict extended JSON. - -Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `data` field further contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$oid": "553f06eb1fc10e8d93515abb"}, -> "max": {"$oid": "553f06fbbeefcf581c232257"}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 86400, -> "values": [14, 4, 6, 23, ...], -> "labels": [] -> } -> } - - -#### Type 8: boolean - -The `data` field contains the distribution of `true` and `false` values. - -> ##### Example - -> "data": { -> "true": 48, -> "false": 13, -> } - - -#### Type 9: datetime - -the `data` field contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$date": 1434933322}, -> "max": {"$date": 1434939935}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 30758400, -> "values": [14, 4, 6, 23] -> } -> } - - -#### Type 10: null - -The `data` object is empty. - -#### Type 11: regular expression - -The `data` object is empty. - -#### Type 12: DBPointer (deprecated) - -The `data` object is empty. - -#### Type 13: javascript code - -The `data` object is empty. - -#### Type 15: javascript code with scope - -The `data` object is empty. - -#### Type 16: 32-bit integer - -The `data` object contains the following members: - -- `min`: The minimum value encountered -- `max`: The maximum value encountered -- `med`: The median of all encoutered values -- `avg`: The mean of all encountered values -- `values`: Unique set of all values encountered, ordered by values -- `counts`: count for each value, same order as above - -> ##### Example - -> "data" : { -> "min": 3, -> "max": 72, -> "med": 20, -> "avg": 30.5, -> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] -> } - -#### Type 17: timestamp - -the `data` field contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$date": 1434933322}, -> "max": {"$date": 1434939935}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 30758400, -> "values": [14, 4, 6, 23] -> } -> } - - -#### Type 18: 64-bit integer - -The `data` object contains the following members: - -- `min`: The minimum value encountered -- `max`: The maximum value encountered -- `med`: The median of all encoutered values -- `avg`: The mean of all encountered values -- `values`: Unique set of all values encountered, ordered by values -- `counts`: count for each value, same order as above - -> ##### Example - -> "data" : { -> "min": 3, -> "max": 72, -> "med": 20, -> "avg": 30.5, -> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] -> } - -#### Type 127: minkey - -The `data` object is empty. - -#### Type 255: maxkey - -The `data` object is empty. - - -### 5. Adaptive Binning - -Some data types contain a field `bins`, where the data is discretized into bins with a variablebin size, depending on the data distribution. - -A _bin_ is defined as ... @TODO - -The `bins` object consists of the following members: - -- `size`: this is the size of an individual bin. For numbers (types 1, 16, 18), this is a unitless number that describes the size of a bin. - - -> "bins": { // adaptive binning -> "size": 86400, // number of seconds per bucket -> "values": [14, 4, 6, 23, ...] // values per bin -> "labels": ["Apr 30", "May 1", "May 2", "May 3", ...] -> } +- [ ] another pass at improving data structures (rename fields -> children?) +- [ ] nested objects +- [ ] nested arrays +- [ ] get sample dataset for test fixures +- [ ] update bin/mongodb-schema.js to do something real diff --git a/lib/models/field-collection.js b/lib/field-collection.js similarity index 100% rename from lib/models/field-collection.js rename to lib/field-collection.js diff --git a/lib/models/field.js b/lib/field.js similarity index 100% rename from lib/models/field.js rename to lib/field.js diff --git a/lib/index.js b/lib/index.js index 5d07b8f..c4565ad 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,101 +1,7 @@ -var AmpersandState = require('ampersand-state'); -var FieldCollection = require('./models/field-collection'); -var debug = require('debug')('mongodb-schema:models:schema'); -var es = require('event-stream'); -var flatten = require('./flatten'); -var _ = require('lodash'); - -function getValueType(value) { - var T; - if (_.has(value, '_bsontype')) { - T = value._bsontype; - } else { - T = Object.prototype.toString.call(value).replace(/\[object (\w+)\]/, '$1'); - } - return T; -} - -function isArrayKey(_id) { - return /\[(\d+)\]/.test(_id); -} - -var Schema = AmpersandState.extend({ - collections: { - fields: FieldCollection - }, - props: { - count: { - type: 'number', - default: 0 - } - }, - initialize: function(options) { - options = options || {}; - this.ns = options.ns; - }, - stream: function() { - var schema = this; - return es.through(function(doc) { - debug('updating based on %j', doc); - _.each(_.pairs(flatten(doc)), function(d) { - var _id = d[0]; - var value = d[1]; - var T = getValueType(d[1]); - var field = schema.fields.get(_id); - var existingType; - if (value === 'Array') { - return debug('@todo: smush nested arrays. skipping `%s`', _id); - } - if (value === 'Object') { - return debug('@todo: smush nested objects. skipping `%s`', _id); - } - schema.count += 1; - - debug(_id, T, value, '' + value); - value = '' + value; - if (!field) { - debug('`%s` is a new field with type %s', _id, T); - field = schema.fields.add({ - _id: _id - }); - - field.types.add({ - _id: T - }).values.add(value); - } else { - // debug('`%s` is already a known field with types', _id, field.types.map(function(d) { - // return d.getId(); - // })); - - existingType = field.types.get(T); - if (!existingType) { - // debug('new type `%s` for field `%s`', T, _id); - field.types.add({ - _id: T - }).values.add(value); - } else { - // debug('updating existing type %j', existingType); - existingType.values.add(value); - } - } - this.emit('data', field); - }, this); - }, function() { - // debug('finalized schema is', JSON.stringify(schema, null, 2)); - console.table(schema.fields.serialize()); - this.emit('end'); - }); - } -}); - +var Schema = require('./schema'); module.exports = function() { return new Schema(); }; - -module.exports.extend = Schema.extend; - -module.exports.stream = function() { - return new Schema().stream(); -}; +module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; -module.exports.FieldCollection = require('./models/field-collection'); +module.exports.FieldCollection = require('./field-collection'); diff --git a/lib/schema.js b/lib/schema.js new file mode 100644 index 0000000..810f3c9 --- /dev/null +++ b/lib/schema.js @@ -0,0 +1,70 @@ +var AmpersandState = require('ampersand-state'); +var debug = require('debug')('mongodb-schema'); +var es = require('event-stream'); +var flatten = require('./flatten'); +var _ = require('lodash'); + +var FieldCollection = require('./field-collection'); +var Type = require('./type'); + +module.exports = AmpersandState.extend({ + collections: { + fields: FieldCollection + }, + props: { + count: { + type: 'number', + default: 0 + } + }, + initialize: function(options) { + options = options || {}; + this.ns = options.ns; + }, + stream: function() { + var schema = this; + return es.through(function(doc) { + debug('updating based on %j', doc); + _.each(_.pairs(flatten(doc)), function(d) { + var _id = d[0]; + var value = d[1]; + var T = Type.getNameFromValue(d[1]); + var field = schema.fields.get(_id); + var existingType; + if (value === 'Array') { + return debug('@todo: smush nested arrays. skipping `%s`', _id); + } + if (value === 'Object') { + return debug('@todo: smush nested objects. skipping `%s`', _id); + } + schema.count += 1; + + debug(_id, T, value, '' + value); + value = '' + value; + if (!field) { + debug('`%s` is a new field with type %s', _id, T); + field = schema.fields.add({ + _id: _id + }); + + field.types.add({ + _id: T + }).values.add(value); + } else { + existingType = field.types.get(T); + if (!existingType) { + field.types.add({ + _id: T + }).values.add(value); + } else { + existingType.values.add(value); + } + } + this.emit('data', field); + }, this); + }, function() { + debug('finalized schema is', JSON.stringify(schema, null, 2)); + this.emit('end'); + }); + } +}); diff --git a/lib/models/type-collection.js b/lib/type-collection.js similarity index 100% rename from lib/models/type-collection.js rename to lib/type-collection.js diff --git a/lib/models/type.js b/lib/type.js similarity index 100% rename from lib/models/type.js rename to lib/type.js diff --git a/lib/models/value-collection.js b/lib/value-collection.js similarity index 100% rename from lib/models/value-collection.js rename to lib/value-collection.js diff --git a/lib/models/value.js b/lib/value.js similarity index 100% rename from lib/models/value.js rename to lib/value.js diff --git a/test/test.js b/test/test.js index 73f3545..f834ec5 100644 --- a/test/test.js +++ b/test/test.js @@ -1,55 +1,10 @@ -var schema = require('../'), - assert = require('assert'), - allTypes = require('../fixtures/all_types'), - BSON = require('bson'), - pkg = require('../package.json'); +var schema = require('../'); +var assert = require('assert'); -describe.skip('mongodb-schema', function() { - // var root = defs.ESCAPE + defs.ROOT; - - it('should import correctly', function() { +describe('mongodb-schema', function() { + it('should work', function() { assert.ok(schema); - }); - - it('should have a root object with the correct version', function() { - var result = schema(); - assert.ok(result[root] !== undefined); - assert.equal(result[root][defs.VERSION], pkg.version); - }); - - it('should have 0 count without any documents', function() { - var result = schema([]); - assert.equal(result[root][defs.COUNT], 0); - }); - - it('should throw an error if documents is not an array or undefined', function() { - assert.throws(function() { - schema('i\'m not an array'); - }, TypeError); - assert.doesNotThrow(function() { - schema(); - }); - }); - - it('should parse documents of all types without error', function() { - assert.ok(schema(allTypes)); - }); - - it('should detect the correct type for every type', function() { - var result = schema(allTypes); - console.log(JSON.stringify(result, null, 2)); - }); - - it('should create the correct type objects inside #schema tag', function() { - var result = schema([{ - a: 'foo' - }, { - a: 1, - b: { - c: BSON.ObjectId() - } - }]); - // @todo + assert.ok(schema.Schema); }); }); From cf58216ec0fd72062e233eb52ba88c5672f6f536 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 14:50:22 -0400 Subject: [PATCH 35/79] Add basic tests for type detection --- lib/index.js | 1 + lib/type-collection.js | 2 +- test/test.js | 40 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/lib/index.js b/lib/index.js index c4565ad..9b6bf04 100644 --- a/lib/index.js +++ b/lib/index.js @@ -5,3 +5,4 @@ module.exports = function() { module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; module.exports.FieldCollection = require('./field-collection'); +module.exports.getType = require('./type').getNameFromValue; diff --git a/lib/type-collection.js b/lib/type-collection.js index f9ee98c..5dc5c91 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -7,7 +7,7 @@ module.exports = AmpersandCollection.extend({ if (attrs && Object.keys(attrs).length === 0) return; var val = attrs.value; - var T = (val && val._bsontype) || Object.prototype.toString.call(val).replace(/\[object (\w+)\]/, '$1'); + var T = type.getNameFromValue(val); var Klass = type[T]; if (!Klass) { diff --git a/test/test.js b/test/test.js index f834ec5..dcbd23b 100644 --- a/test/test.js +++ b/test/test.js @@ -1,10 +1,44 @@ var schema = require('../'); var assert = require('assert'); +var BSON = require('bson'); + describe('mongodb-schema', function() { - it('should work', function() { - assert.ok(schema); - assert.ok(schema.Schema); + describe('value type detection', function() { + it('should identify ObjectIDs', function() { + assert.equal(schema.getType(BSON.ObjectID()), 'ObjectID'); + }); + + it('should identify booleans', function() { + assert.equal(schema.getType(false), 'Boolean'); + assert.equal(schema.getType(true), 'Boolean'); + }); + it('should identify numbers', function() { + assert.equal(schema.getType(1), 'Number'); + assert.equal(schema.getType(0), 'Number'); + }); + + it('should identify nulls', function() { + assert.equal(schema.getType(null), 'Null'); + }); + it('should identify undefineds', function() { + assert.equal(schema.getType(undefined), 'Undefined'); + }); + it('should identify strings', function() { + assert.equal(schema.getType('Brian'), 'String'); + }); + it('should identify dates', function() { + assert.equal(schema.getType(new Date()), 'Date'); + }); + it('should identify arrays', function() { + assert.equal(schema.getType([]), 'Array'); + }); + it('should identify objects', function() { + assert.equal(schema.getType({}), 'Object'); + }); + it('should identify regexes', function() { + assert.equal(schema.getType(new RegExp('\d')), 'RegExp'); + }); }); }); From 18138caae2f71782c29da9826e49888006659968 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 15:06:14 -0400 Subject: [PATCH 36/79] straighten out type + probability wonk [closes #16] [closes #15] [closes #17] --- lib/field.js | 1 + lib/schema.js | 7 ------- lib/type-collection.js | 13 +++---------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/lib/field.js b/lib/field.js index 467e9e1..3001caa 100644 --- a/lib/field.js +++ b/lib/field.js @@ -32,6 +32,7 @@ module.exports = AmpersandState.extend({ this.types.on('add', function(type) { type.values.on('add', function(model) { field.count += 1; + type.count += 1; this.parent.values.add(model); }.bind(this)); }); diff --git a/lib/schema.js b/lib/schema.js index 810f3c9..969fe27 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -11,12 +11,6 @@ module.exports = AmpersandState.extend({ collections: { fields: FieldCollection }, - props: { - count: { - type: 'number', - default: 0 - } - }, initialize: function(options) { options = options || {}; this.ns = options.ns; @@ -37,7 +31,6 @@ module.exports = AmpersandState.extend({ if (value === 'Object') { return debug('@todo: smush nested objects. skipping `%s`', _id); } - schema.count += 1; debug(_id, T, value, '' + value); value = '' + value; diff --git a/lib/type-collection.js b/lib/type-collection.js index 5dc5c91..7a38809 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -6,19 +6,12 @@ module.exports = AmpersandCollection.extend({ model: function(attrs, options) { if (attrs && Object.keys(attrs).length === 0) return; - var val = attrs.value; - var T = type.getNameFromValue(val); - var Klass = type[T]; + var Klass = type[attrs._id]; if (!Klass) { - throw new TypeError('No value type for ' + T); + throw new TypeError('No value type for ' + attrs._id); } - if (!val) { - val = '' + val; - } - return new Klass({ - value: val - }, options); + return new Klass({}, options); } }); From 5cd518ee66c3ecea9369ef4c5cd4f63c48c55e61 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 15:16:25 -0400 Subject: [PATCH 37/79] updating notes --- README.md | 6 +++--- lib/schema.js | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ae3ea9a..ac47811 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ Infer probabilistic schema of javascript objects or a MongoDB collection. ## Todo -- [ ] another pass at improving data structures (rename fields -> children?) -- [ ] nested objects -- [ ] nested arrays +- [x] another pass at improving data structures +- [ ] rename fields -> properties like json schema. +- [ ] nested objects and arrays (will essentially make the Object/Array type models what schema model is currently) - [ ] get sample dataset for test fixures - [ ] update bin/mongodb-schema.js to do something real diff --git a/lib/schema.js b/lib/schema.js index 969fe27..fdc39be 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -25,6 +25,7 @@ module.exports = AmpersandState.extend({ var T = Type.getNameFromValue(d[1]); var field = schema.fields.get(_id); var existingType; + if (value === 'Array') { return debug('@todo: smush nested arrays. skipping `%s`', _id); } From 10d2788ebe848b0ad05eea876615ac113852a015 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 15 May 2015 20:08:48 -0400 Subject: [PATCH 38/79] nested subdocuments working. --- lib/field-collection.js | 5 -- lib/field.js | 40 ---------- lib/flatten.js | 70 ------------------ lib/index.js | 1 - lib/schema.js | 158 ++++++++++++++++++++++++++++++---------- lib/type-collection.js | 2 - lib/type.js | 1 - lib/value-collection.js | 4 +- lib/value.js | 5 +- 9 files changed, 124 insertions(+), 162 deletions(-) delete mode 100644 lib/field-collection.js delete mode 100644 lib/field.js delete mode 100644 lib/flatten.js diff --git a/lib/field-collection.js b/lib/field-collection.js deleted file mode 100644 index 1f8f15a..0000000 --- a/lib/field-collection.js +++ /dev/null @@ -1,5 +0,0 @@ -module.exports = require('ampersand-collection').extend({ - model: require('./field'), - mainIndex: '_id', - comparator: '_id' -}); diff --git a/lib/field.js b/lib/field.js deleted file mode 100644 index 3001caa..0000000 --- a/lib/field.js +++ /dev/null @@ -1,40 +0,0 @@ -var AmpersandState = require('ampersand-state'); -var TypeCollection = require('./type-collection'); -var ValueCollection = require('./value-collection'); - -module.exports = AmpersandState.extend({ - idAttribute: '_id', - props: { - _id: { - type: 'string', - required: true - }, - displayName: { - type: 'string', - default: function() { - return this._id; - } - }, - description: { - type: 'string' - }, - count: { - type: 'number', - default: 0 - }, - }, - collections: { - types: TypeCollection, - values: ValueCollection - }, - initialize: function() { - var field = this; - this.types.on('add', function(type) { - type.values.on('add', function(model) { - field.count += 1; - type.count += 1; - this.parent.values.add(model); - }.bind(this)); - }); - } -}); diff --git a/lib/flatten.js b/lib/flatten.js deleted file mode 100644 index 7884652..0000000 --- a/lib/flatten.js +++ /dev/null @@ -1,70 +0,0 @@ -module.exports = flatten; -var debug = require('debug')('mongodb-schema:flatten'); - -function flatten(obj) { - var flattened = {}; - - function _route(prefix, value) { - var i; - - if (value === null) { - flattened[prefix] = null; - return; - } - - var type = typeof value; - - if (type === 'string') { - flattened[prefix] = value; - } - // booleans, null and undefined - else if (type === 'boolean' || obj === null || obj === undefined) { - flattened[prefix] = value; - } - // numbers - else if (type === 'number') { - flattened[prefix] = value; - } - // dates - else if (Object.prototype.toString.call(value) === '[object Date]') { - flattened[prefix] = value; - } else if (Array.isArray(value)) { - len = value.length; - flattened[prefix] = 'Array'; - - if (len === 0) { - _route(prefix + '[]', null); - } - for (i = 0; i < len; i++) { - _route(prefix + '[' + i + ']', value[i]); - } - } else if (type === 'object') { - if (value.hasOwnProperty('_bsontype')) { - debug('_bsontype is %s', value._bsontype); - flattened[prefix] = value; - } else { - var keys = Object.keys(value); - var len = keys.length; - if (prefix) { - flattened[prefix] = 'Object'; - } - - if (prefix) { - prefix = prefix + '.'; - } - if (len === 0) { - _route(prefix, null); - } - for (i = 0; i < len; i++) { - _route(prefix + keys[i], value[keys[i]]); - } - } - } else { - throw new Error('Unknown type for ' + JSON.stringify(value)); - } - } - - _route('', obj); - - return flattened; -} diff --git a/lib/index.js b/lib/index.js index 9b6bf04..dac17d8 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,5 +4,4 @@ module.exports = function() { }; module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; -module.exports.FieldCollection = require('./field-collection'); module.exports.getType = require('./type').getNameFromValue; diff --git a/lib/schema.js b/lib/schema.js index fdc39be..0bdd8bb 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,13 +1,122 @@ var AmpersandState = require('ampersand-state'); -var debug = require('debug')('mongodb-schema'); +var AmpersandCollection = require('ampersand-collection'); var es = require('event-stream'); -var flatten = require('./flatten'); var _ = require('lodash'); +var debug = require('debug')('mongodb-schema'); -var FieldCollection = require('./field-collection'); var Type = require('./type'); +var TypeCollection = require('./type-collection'); +var ValueCollection = require('./value-collection'); + +var FieldCollection = AmpersandCollection.extend({ + mainIndex: '_id', + comparator: '_id' +}); -module.exports = AmpersandState.extend({ +var Field = AmpersandState.extend({ + idAttribute: '_id', + props: { + _id: { + type: 'string', + required: true + }, + displayName: { + type: 'string', + default: function() { + return this._id; + } + }, + description: { + type: 'string' + }, + count: { + type: 'number', + default: 0 + }, + has_children: { + type: 'boolean', + default: false + }, + }, + collections: { + types: TypeCollection, + values: ValueCollection, + fields: FieldCollection + }, + initialize: function() { + var field = this; + this.types.on('add', function(type) { + type.values.on('add', function(model) { + field.count += 1; + type.count += 1; + this.parent.values.add(model); + }.bind(this)); + }); + + this.fields.on('add', function() { + this.has_children = true; + }.bind(this)); + } +}); + +FieldCollection.prototype.model = Field; + +function add_value(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + var type; + + if (!field) { + debug('`%s` is a new field with type %s', _id); + field = schema.fields.add({ + _id: _id + }); + } + + type = field.types.get(type_id); + if (!type) { + type = field.types.add({ + _id: type_id + }); + } + + type.values.add({ + _id: value + }); +} + +function deflate(schema, _id, value) { + var type_id = Type.getNameFromValue(value); + var field; + + if (type_id === 'Array') { + field = schema.fields.get(_id); + + if (!field) { + debug('`%s` is a new field with type %s', _id); + field = schema.fields.add({ + _id: _id + }); + } + return; + } + if (type_id === 'Object') { + field = schema.fields.get(_id); + + if (!field) { + debug('`%s` is a new field with type %s', _id); + field = schema.fields.add({ + _id: _id + }); + } + _.each(_.pairs(value), function(d) { + deflate(field, d[0], d[1]); + }); + return; + } + add_value(schema, _id, type_id, value); +} + +var Schema = AmpersandState.extend({ collections: { fields: FieldCollection }, @@ -19,42 +128,9 @@ module.exports = AmpersandState.extend({ var schema = this; return es.through(function(doc) { debug('updating based on %j', doc); - _.each(_.pairs(flatten(doc)), function(d) { - var _id = d[0]; - var value = d[1]; - var T = Type.getNameFromValue(d[1]); - var field = schema.fields.get(_id); - var existingType; - - if (value === 'Array') { - return debug('@todo: smush nested arrays. skipping `%s`', _id); - } - if (value === 'Object') { - return debug('@todo: smush nested objects. skipping `%s`', _id); - } - - debug(_id, T, value, '' + value); - value = '' + value; - if (!field) { - debug('`%s` is a new field with type %s', _id, T); - field = schema.fields.add({ - _id: _id - }); - - field.types.add({ - _id: T - }).values.add(value); - } else { - existingType = field.types.get(T); - if (!existingType) { - field.types.add({ - _id: T - }).values.add(value); - } else { - existingType.values.add(value); - } - } - this.emit('data', field); + _.each(_.pairs(doc), function(d) { + deflate(schema, d[0], d[1]); + this.emit('data', doc); }, this); }, function() { debug('finalized schema is', JSON.stringify(schema, null, 2)); @@ -62,3 +138,5 @@ module.exports = AmpersandState.extend({ }); } }); + +module.exports = Schema; diff --git a/lib/type-collection.js b/lib/type-collection.js index 7a38809..b900d68 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -4,8 +4,6 @@ var type = require('./type'); module.exports = AmpersandCollection.extend({ mainIndex: '_id', model: function(attrs, options) { - if (attrs && Object.keys(attrs).length === 0) return; - var Klass = type[attrs._id]; if (!Klass) { diff --git a/lib/type.js b/lib/type.js index 4a84115..2327ee8 100644 --- a/lib/type.js +++ b/lib/type.js @@ -1,7 +1,6 @@ var AmpersandState = require('ampersand-state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); -var TypeCollection = require('./type-collection'); var Type = AmpersandState.extend({ idAttribute: '_id', diff --git a/lib/value-collection.js b/lib/value-collection.js index 4f4980b..41ff55c 100644 --- a/lib/value-collection.js +++ b/lib/value-collection.js @@ -1,4 +1,6 @@ var AmpersandCollection = require('ampersand-collection'); +var Value = require('./value'); + module.exports = AmpersandCollection.extend({ - model: require('./value') + model: Value }); diff --git a/lib/value.js b/lib/value.js index ffab61f..54436f2 100644 --- a/lib/value.js +++ b/lib/value.js @@ -7,7 +7,8 @@ module.exports = AmpersandState.extend({ type: 'any' } }, - initialize: function(val) { - this._id = val; + initialize: function(attrs) { + this.value = attrs._id; + this._id = '' + attrs._id; } }); From d8512f4f1a9046b1fe937cc3320e67b2bfa0ef28 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Sun, 17 May 2015 02:03:21 -0400 Subject: [PATCH 39/79] proper handling for embedded arrays and docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit just need to debug probability not being recalculated properly for polymorphic case, rewrite the docs, mj check, and we’re ready to publish. --- README.md | 9 +- bin/mongodb-schema.docopt | 10 -- bin/mongodb-schema.js | 7 - lib/index.js | 15 +- lib/schema.js | 197 +++++++++++++++------ lib/type.js | 28 +-- package.json | 18 +- test/fixture-basic-following.json | 1 + test/fixture-basic-users.json | 1 + test/fixture-embedded-documents-users.json | 1 + test/mocha.opts | 1 + test/test.js | 131 +++++++++++--- 12 files changed, 284 insertions(+), 135 deletions(-) delete mode 100644 bin/mongodb-schema.docopt delete mode 100644 bin/mongodb-schema.js create mode 100644 test/fixture-basic-following.json create mode 100644 test/fixture-basic-users.json create mode 100644 test/fixture-embedded-documents-users.json create mode 100644 test/mocha.opts diff --git a/README.md b/README.md index ac47811..170bcbe 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ Infer probabilistic schema of javascript objects or a MongoDB collection. ## Todo -- [x] another pass at improving data structures -- [ ] rename fields -> properties like json schema. -- [ ] nested objects and arrays (will essentially make the Object/Array type models what schema model is currently) -- [ ] get sample dataset for test fixures +- [ ] fix probabilities for "evolving case" + +### Punted + - [ ] update bin/mongodb-schema.js to do something real +- [ ] http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#enumerated-values diff --git a/bin/mongodb-schema.docopt b/bin/mongodb-schema.docopt deleted file mode 100644 index 0c9052f..0000000 --- a/bin/mongodb-schema.docopt +++ /dev/null @@ -1,10 +0,0 @@ -mongodb-schema - -Infer the probabilistic schema for a MongoDB collection. - -Usage: - mongodb-schema [] [] - -Options: - -h --help Show this screen. - --version Show version. diff --git a/bin/mongodb-schema.js b/bin/mongodb-schema.js deleted file mode 100644 index 9ce1386..0000000 --- a/bin/mongodb-schema.js +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env node - -var schema = require('../'); -var fs = require('fs'); -var docopt = require('docopt').docopt; -var pkg = require('../package.json'); -var argv = docopt(fs.readFileSync(__dirname + '/m.docopt', 'utf-8'), version: pkg.version}); diff --git a/lib/index.js b/lib/index.js index dac17d8..4c87616 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,6 +1,17 @@ var Schema = require('./schema'); -module.exports = function() { - return new Schema(); +var _ = require('lodash'); + +module.exports = function(ns, docs) { + if (!Array.isArray(docs)) { + docs = [docs]; + } + var schema = new Schema({ + ns: ns + }); + _.each(docs, function(doc) { + schema.sample(doc); + }); + return schema; }; module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; diff --git a/lib/schema.js b/lib/schema.js index 0bdd8bb..1e6f59d 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,5 +1,5 @@ -var AmpersandState = require('ampersand-state'); -var AmpersandCollection = require('ampersand-collection'); +var State = require('ampersand-state'); +var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); var es = require('event-stream'); var _ = require('lodash'); var debug = require('debug')('mongodb-schema'); @@ -8,27 +8,24 @@ var Type = require('./type'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); -var FieldCollection = AmpersandCollection.extend({ +var FieldCollection = Collection.extend({ mainIndex: '_id', comparator: '_id' }); -var Field = AmpersandState.extend({ +var Field = State.extend({ idAttribute: '_id', props: { + /** + * The key in the `parent`. + */ _id: { type: 'string', required: true }, - displayName: { - type: 'string', - default: function() { - return this._id; - } - }, - description: { - type: 'string' - }, + /** + * Number of times this field has been seen in a sample of documents. + */ count: { type: 'number', default: 0 @@ -37,20 +34,60 @@ var Field = AmpersandState.extend({ type: 'boolean', default: false }, + /** + * Title, description and default from JSON Schema: + * http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata + */ + /** + * If using shortened keys to save space, it is expected this be the "real" + * name of the field that could be input by the user. For example, + * if `u` is the field's `_id`, `username` is the field's title + * and is much friendlier for humans. + */ + title: { + type: 'string', + default: function() { + return this._id; + } + }, + default: 'any', + description: 'string' + }, + derived: { + /** + * The most common type seen for this field. + * + * http://spacetelescope.github.io/understanding-json-schema/reference/type.html + */ + type: { + deps: ['types'], + fn: function() { + if (this.types.length === 0) { + return undefined; + } + if (this.types.length === 1) { + return this.types.at(0)._id; + } + return this.types.pluck('_id'); + } + } }, collections: { types: TypeCollection, + /** + * A sample of values seen for this field. + */ values: ValueCollection, fields: FieldCollection }, initialize: function() { var field = this; this.types.on('add', function(type) { - type.values.on('add', function(model) { + type.values.on('add', function(value) { field.count += 1; type.count += 1; - this.parent.values.add(model); - }.bind(this)); + field.values.add(value); + }); }); this.fields.on('add', function() { @@ -59,20 +96,74 @@ var Field = AmpersandState.extend({ } }); -FieldCollection.prototype.model = Field; +/** + * A basic field has no descendant fields, such as `String`, `ObjectID`, + * `Boolean`, or `Date`. + */ +var BasicField = Field.extend({}); -function add_value(schema, _id, type_id, value) { +var EmbeddedArray = Field.extend({ + props: { + type: { + type: 'string', + default: 'array' + } + } +}); + +var EmbeddedDocument = Field.extend({ + props: { + type: { + type: 'string', + default: 'object' + } + } +}); + +FieldCollection.prototype.model = function(attrs, options) { + return new attrs.klass(attrs, options); +}; + +function onFieldSampled(schema, _id, value) { + var type_id = Type.getNameFromValue(value); + if (type_id === 'Array') { + onEmbeddedArray(schema, _id, type_id, value); + } else if (type_id === 'Object') { + onEmbeddedDocument(schema, _id, type_id, value); + } else { + onBasicField(schema, _id, type_id, value); + } +} + +function onEmbeddedDocument(schema, _id, type_id, value) { var field = schema.fields.get(_id); - var type; if (!field) { - debug('`%s` is a new field with type %s', _id); + debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ - _id: _id + _id: _id, + klass: EmbeddedDocument }); } + field.count += 1; + _.each(_.pairs(value), function(d) { + onFieldSampled(field, d[0], d[1]); + }); +} - type = field.types.get(type_id); +function onBasicField(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + if (!field) { + debug('`%s` is a new field with type %s', _id, type_id); + field = schema.fields.add({ + _id: _id, + klass: BasicField + }); + } else { + debug('`%s` is an existing field with type %s', _id, field.type); + } + + var type = field.types.get(type_id); if (!type) { type = field.types.add({ _id: type_id @@ -84,54 +175,46 @@ function add_value(schema, _id, type_id, value) { }); } -function deflate(schema, _id, value) { - var type_id = Type.getNameFromValue(value); - var field; - - if (type_id === 'Array') { - field = schema.fields.get(_id); - - if (!field) { - debug('`%s` is a new field with type %s', _id); - field = schema.fields.add({ - _id: _id - }); - } - return; - } - if (type_id === 'Object') { - field = schema.fields.get(_id); +function onEmbeddedArray(schema, _id, type_id, value) { + var field = schema.fields.get(_id); - if (!field) { - debug('`%s` is a new field with type %s', _id); - field = schema.fields.add({ - _id: _id - }); - } - _.each(_.pairs(value), function(d) { - deflate(field, d[0], d[1]); + if (!field) { + debug('`%s` is a new field with type %s', _id, type_id); + field = schema.fields.add({ + _id: _id, + klass: EmbeddedArray }); - return; } - add_value(schema, _id, type_id, value); + field.count += 1; + _.each(value, function(d) { + onFieldSampled(field, _id, d); + }); } -var Schema = AmpersandState.extend({ +var Schema = State.extend({ + idAttribute: 'ns', + props: { + ns: { + type: 'string' + }, + }, collections: { fields: FieldCollection }, - initialize: function(options) { - options = options || {}; - this.ns = options.ns; + sample: function(doc) { + var schema = this; + debug('--- begin sample'); + _.each(_.pairs(doc), function(d) { + onFieldSampled(schema, d[0], d[1]); + }); + debug('--- end sample'); }, stream: function() { var schema = this; return es.through(function(doc) { debug('updating based on %j', doc); - _.each(_.pairs(doc), function(d) { - deflate(schema, d[0], d[1]); - this.emit('data', doc); - }, this); + schema.sample(doc); + this.emit('data', doc); }, function() { debug('finalized schema is', JSON.stringify(schema, null, 2)); this.emit('end'); diff --git a/lib/type.js b/lib/type.js index 2327ee8..d9baec3 100644 --- a/lib/type.js +++ b/lib/type.js @@ -1,8 +1,8 @@ -var AmpersandState = require('ampersand-state'); +var State = require('ampersand-state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); -var Type = AmpersandState.extend({ +var Type = State.extend({ idAttribute: '_id', props: { _id: { @@ -89,38 +89,38 @@ exports.Date = Type.extend({ } }); -exports.Object = AmpersandState.extend({ - idAttribute: '_id', +exports.ObjectID = Type.extend({ props: { _id: { - type: 'string', - default: 'Object' + default: 'ObjectID' } } }); -exports.Array = AmpersandState.extend({ - idAttribute: '_id', +exports.Undefined = Type.extend({ props: { _id: { - type: 'string', - default: 'Array' + default: 'Undefined' } } }); -exports.ObjectID = Type.extend({ +exports.Object = State.extend({ + idAttribute: '_id', props: { _id: { - default: 'ObjectID' + type: 'string', + default: 'Object' } } }); -exports.Undefined = Type.extend({ +exports.Array = State.extend({ + idAttribute: '_id', props: { _id: { - default: 'Undefined' + type: 'string', + default: 'Array' } } }); diff --git a/package.json b/package.json index 0c8aac4..8388f78 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,6 @@ "bugs": { "url": "https://github.com/mongodb-js/mongodb-schema/issues" }, - "main": "index.js", "scripts": { "test": "mocha" }, @@ -22,24 +21,15 @@ ], "dependencies": { "ampersand-collection": "^1.4.5", - "ampersand-model": "^5.0.3", + "ampersand-collection-lodash-mixin": "^2.0.1", "ampersand-state": "^4.5.4", "bson": "^0.3.1", "debug": "^2.1.3", - "docopt": "^0.6.2", "event-stream": "^3.3.0", - "flatnest": "^0.2.2", - "is-integer": "^1.0.4", - "lodash": "^3.8.0", - "lodash.find": "^3.2.0", - "lodash.foreach": "^3.0.3", - "mongodb": "^2.0.28" + "lodash": "^3.8.0" }, "devDependencies": { - "mocha": "^2.0.1" - }, - "directories": { - "doc": "docs", - "test": "test" + "mocha": "^2.0.1", + "mongodb-extended-json": "^1.3.0" } } diff --git a/test/fixture-basic-following.json b/test/fixture-basic-following.json new file mode 100644 index 0000000..a942ec9 --- /dev/null +++ b/test/fixture-basic-following.json @@ -0,0 +1 @@ +[{"_id":{"$oid":"55581e0a9bf712d0c2b48d71"},"following_ids": [{"$oid": "55582407aafa8fbbc57196e2"}]}] diff --git a/test/fixture-basic-users.json b/test/fixture-basic-users.json new file mode 100644 index 0000000..29efcaf --- /dev/null +++ b/test/fixture-basic-users.json @@ -0,0 +1 @@ +[{"_id":{"$oid":"55581e0a9bf712d0c2b48d71"},"email":"tupjud@weigehib.gov","is_verified":false,"twitter_username":"@zaetisi","name":"Hunter Maxwell","stats_friends":2163,"apple_push_token":"d4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6","android_push_token":{"$undefined":true},"last_address_latitude":null,"last_address_longitude":null,"created_at":{"$date":"0115-07-26T05:51:21.284Z"}}] diff --git a/test/fixture-embedded-documents-users.json b/test/fixture-embedded-documents-users.json new file mode 100644 index 0000000..345fe7c --- /dev/null +++ b/test/fixture-embedded-documents-users.json @@ -0,0 +1 @@ +[{"_id":{"$oid":"55582407aafa8fbbc57196e2"},"name":"Brett Flowers","email":{"_id":"gohu@pum.io","is_verified":false},"twitter":{"username":"@lekbisova"},"stats":{"friends":7584},"push_token":{"android":{"$undefined":true},"apple":"4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c"},"last_address":{"latitude":null,"longitude":null},"created_at":{"$date":"0115-11-15T13:22:18.387Z"}}] diff --git a/test/mocha.opts b/test/mocha.opts new file mode 100644 index 0000000..ec648f2 --- /dev/null +++ b/test/mocha.opts @@ -0,0 +1 @@ +-R spec diff --git a/test/test.js b/test/test.js index dcbd23b..ea80852 100644 --- a/test/test.js +++ b/test/test.js @@ -1,44 +1,121 @@ var schema = require('../'); var assert = require('assert'); -var BSON = require('bson'); +var EJSON = require('mongodb-extended-json'); +var _ = require('lodash'); +var FIXTURES = { + basic: { + users: EJSON.deflate(require('./fixture-basic-users.json')), + following: EJSON.deflate(require('./fixture-basic-following.json')) + }, + embedded_documents: { + users: EJSON.deflate(require('./fixture-embedded-documents-users.json')) + } +}; describe('mongodb-schema', function() { - describe('value type detection', function() { - it('should identify ObjectIDs', function() { - assert.equal(schema.getType(BSON.ObjectID()), 'ObjectID'); + describe('using only basic fields', function() { + var users; + it('should work', function() { + assert.doesNotThrow(function() { + users = schema('users', FIXTURES.basic.users); + }); }); + it('should detect all fields', function() { + assert.equal(users.fields.length, 11); - it('should identify booleans', function() { - assert.equal(schema.getType(false), 'Boolean'); - assert.equal(schema.getType(true), 'Boolean'); - }); - it('should identify numbers', function() { - assert.equal(schema.getType(1), 'Number'); - assert.equal(schema.getType(0), 'Number'); + var field_ids = [ + '_id', + 'android_push_token', + 'apple_push_token', + 'created_at', + 'email', + 'is_verified', + 'last_address_latitude', + 'last_address_longitude', + 'name', + 'stats_friends', + 'twitter_username' + ]; + assert.deepEqual(users.fields.pluck('_id'), field_ids); }); - it('should identify nulls', function() { - assert.equal(schema.getType(null), 'Null'); - }); - it('should identify undefineds', function() { - assert.equal(schema.getType(undefined), 'Undefined'); + it('should detect the correct type for each field', function() { + assert.equal(users.fields.get('_id').type, 'ObjectID'); + assert.equal(users.fields.get('android_push_token').type, 'Undefined'); + assert.equal(users.fields.get('apple_push_token').type, 'String'); + assert.equal(users.fields.get('created_at').type, 'Date'); + assert.equal(users.fields.get('email').type, 'String'); + assert.equal(users.fields.get('is_verified').type, 'Boolean'); + assert.equal(users.fields.get('last_address_latitude').type, 'Null'); + assert.equal(users.fields.get('last_address_longitude').type, 'Null'); + assert.equal(users.fields.get('name').type, 'String'); + assert.equal(users.fields.get('stats_friends').type, 'Number'); + assert.equal(users.fields.get('twitter_username').type, 'String'); }); - it('should identify strings', function() { - assert.equal(schema.getType('Brian'), 'String'); + }); + + describe('using basic fields and embedded documents', function() { + var users; + it('should work', function() { + assert.doesNotThrow(function() { + users = schema('users', FIXTURES.embedded_documents.users); + }); }); - it('should identify dates', function() { - assert.equal(schema.getType(new Date()), 'Date'); + + it('should detect all fields', function() { + assert.equal(users.fields.length, 8); + + var field_ids = [ + '_id', + 'created_at', + 'email', + 'last_address', + 'name', + 'push_token', + 'stats', + 'twitter' + ]; + assert.deepEqual(users.fields.pluck('_id'), field_ids); }); - it('should identify arrays', function() { - assert.equal(schema.getType([]), 'Array'); + }); + + describe('embedded array of basic properties', function() { + var following; + it('should work', function() { + assert.doesNotThrow(function() { + following = schema('following', FIXTURES.basic.following); + }); }); - it('should identify objects', function() { - assert.equal(schema.getType({}), 'Object'); + // @todo: write more tests when not so tired... + }); + + describe('evolving schema', function() { + // The hardest case and really why this module exists at all: proper + // handling for polymorphic schemas. Consider the following scenario: + // + // 1. started out with schema in `only basic fields`. + // 2. then read a blog post about how awesome embedded documents are. + // 3. then realized what a pain embedded documents are. + var users; + it('should work', function() { + assert.doesNotThrow(function() { + users = schema('users', _.union(FIXTURES.basic.users, FIXTURES.embedded_documents.users)); + //console.log('users schema', JSON.stringify(users, null, 2)); + }); }); - it('should identify regexes', function() { - assert.equal(schema.getType(new RegExp('\d')), 'RegExp'); + // @todo: figure out where we're not hitting a counter when not so tired... + it.skip('should have the correct probabilities for a field that was moved', function() { + var apple_push_token = users.fields.get('apple_push_token'); + assert.equal(apple_push_token.count, 1); + assert.equal(apple_push_token.has_children, false); + assert.equal(apple_push_token.type, 'String'); + assert.equal(apple_push_token.types.get('String').count, 1); + assert.equal(apple_push_token.types.get('String').unique, 1); + assert.equal(apple_push_token.types.get('String').probability, 0.5, + '`apple_push_token` only appeared in 50% of documents but thinks it ' + + 'has a probability of ' + + (apple_push_token.types.get('String').probability * 100) + '%'); }); }); }); - From cc5cd7c520d3f71d09e7704de446dbfec7641495 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 18 May 2015 14:17:56 -0400 Subject: [PATCH 40/79] debugging exports --- lib/index.js | 5 +++++ lib/schema.js | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/index.js b/lib/index.js index 4c87616..e48bef7 100644 --- a/lib/index.js +++ b/lib/index.js @@ -13,6 +13,11 @@ module.exports = function(ns, docs) { }); return schema; }; + module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; module.exports.getType = require('./type').getNameFromValue; +module.exports.FieldCollection = Schema.FieldCollection; +module.exports.BasicField = Schema.BasicField; +module.exports.EmbeddedArrayField = Schema.EmbeddedArrayField; +module.exports.EmbeddedDocumentField = Schema.EmbeddedDocumentField; diff --git a/lib/schema.js b/lib/schema.js index 1e6f59d..db69bd5 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -102,7 +102,7 @@ var Field = State.extend({ */ var BasicField = Field.extend({}); -var EmbeddedArray = Field.extend({ +var EmbeddedArrayField = Field.extend({ props: { type: { type: 'string', @@ -111,7 +111,7 @@ var EmbeddedArray = Field.extend({ } }); -var EmbeddedDocument = Field.extend({ +var EmbeddedDocumentField = Field.extend({ props: { type: { type: 'string', @@ -142,7 +142,7 @@ function onEmbeddedDocument(schema, _id, type_id, value) { debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, - klass: EmbeddedDocument + klass: EmbeddedDocumentField }); } field.count += 1; @@ -182,13 +182,14 @@ function onEmbeddedArray(schema, _id, type_id, value) { debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, - klass: EmbeddedArray + klass: EmbeddedArrayField }); } field.count += 1; - _.each(value, function(d) { - onFieldSampled(field, _id, d); - }); + debug('value is', value); + // _.each(value, function(d) { + // onFieldSampled(field, _id, d); + // }); } var Schema = State.extend({ @@ -223,3 +224,7 @@ var Schema = State.extend({ }); module.exports = Schema; +module.exports.FieldCollection = FieldCollection; +module.exports.BasicField = BasicField; +module.exports.EmbeddedArrayField = EmbeddedArrayField; +module.exports.EmbeddedDocumentField = EmbeddedDocumentField; From 06c1f52f452849a59c158cfda3912e747256f1f9 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 18 May 2015 20:03:04 -0400 Subject: [PATCH 41/79] so close... think fields need a derived prop probability --- lib/schema.js | 102 +++++++++++++++++++++++++--------------- lib/type-collection.js | 6 +-- lib/type.js | 5 +- lib/value-collection.js | 9 ++-- lib/value.js | 3 ++ test/test.js | 1 + 6 files changed, 81 insertions(+), 45 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index db69bd5..dacac64 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,5 +1,5 @@ -var State = require('ampersand-state'); var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); +var State = require('ampersand-state'); var es = require('event-stream'); var _ = require('lodash'); var debug = require('debug')('mongodb-schema'); @@ -30,10 +30,6 @@ var Field = State.extend({ type: 'number', default: 0 }, - has_children: { - type: 'boolean', - default: false - }, /** * Title, description and default from JSON Schema: * http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata @@ -51,7 +47,10 @@ var Field = State.extend({ } }, default: 'any', - description: 'string' + description: 'string', + }, + session: { + parent: 'state' }, derived: { /** @@ -66,8 +65,10 @@ var Field = State.extend({ return undefined; } if (this.types.length === 1) { + debug('%s has a single type `%s`', this._id, this.types.at(0)._id); return this.types.at(0)._id; } + debug('%s has %d types `%s`', this._id, this.types.length, this.types.pluck('_id')); return this.types.pluck('_id'); } } @@ -83,17 +84,28 @@ var Field = State.extend({ initialize: function() { var field = this; this.types.on('add', function(type) { + var T = field.types.at(0)._id; + field.trigger('change:types'); type.values.on('add', function(value) { field.count += 1; type.count += 1; field.values.add(value); }); }); - - this.fields.on('add', function() { - this.has_children = true; - }.bind(this)); - } + }, + serialize: function() { + var res = this.getAttributes({ + props: true, + derived: true + }, true); + if (this.fields.length > 0) { + res.fields = this.fields.serialize(); + } else { + res.values = this.values.serialize(); + res.types = this.types.serialize(); + } + return res; + }, }); /** @@ -106,7 +118,7 @@ var EmbeddedArrayField = Field.extend({ props: { type: { type: 'string', - default: 'array' + default: 'Array' } } }); @@ -115,7 +127,7 @@ var EmbeddedDocumentField = Field.extend({ props: { type: { type: 'string', - default: 'object' + default: 'Object' } } }); @@ -126,6 +138,7 @@ FieldCollection.prototype.model = function(attrs, options) { function onFieldSampled(schema, _id, value) { var type_id = Type.getNameFromValue(value); + debug('field `%s` sampled with value %j of type %s on schema', _id, value, type_id, schema); if (type_id === 'Array') { onEmbeddedArray(schema, _id, type_id, value); } else if (type_id === 'Object') { @@ -135,29 +148,14 @@ function onFieldSampled(schema, _id, value) { } } -function onEmbeddedDocument(schema, _id, type_id, value) { - var field = schema.fields.get(_id); - - if (!field) { - debug('`%s` is a new field with type %s', _id, type_id); - field = schema.fields.add({ - _id: _id, - klass: EmbeddedDocumentField - }); - } - field.count += 1; - _.each(_.pairs(value), function(d) { - onFieldSampled(field, d[0], d[1]); - }); -} - function onBasicField(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, - klass: BasicField + klass: BasicField, + parent: schema }); } else { debug('`%s` is an existing field with type %s', _id, field.type); @@ -166,7 +164,8 @@ function onBasicField(schema, _id, type_id, value) { var type = field.types.get(type_id); if (!type) { type = field.types.add({ - _id: type_id + _id: type_id, + }); } @@ -182,14 +181,38 @@ function onEmbeddedArray(schema, _id, type_id, value) { debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, - klass: EmbeddedArrayField + klass: EmbeddedArrayField, + parent: schema }); } field.count += 1; - debug('value is', value); - // _.each(value, function(d) { - // onFieldSampled(field, _id, d); - // }); + _.each(value, function(d) { + var type_id = Type.getNameFromValue(d); + if (type_id === 'Object') { + _.each(d, function(val, key) { + onBasicField(field, key, Type.getNameFromValue(val), val); + }); + } else { + onBasicField(field, '__basic__', type_id, d); + } + }); +} + +function onEmbeddedDocument(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + + if (!field) { + debug('`%s` is a new field with type %s', _id, type_id); + field = schema.fields.add({ + _id: _id, + klass: EmbeddedDocumentField, + parent: schema + }); + } + field.count += 1; + _.each(_.pairs(value), function(d) { + onFieldSampled(field, d[0], d[1]); + }); } var Schema = State.extend({ @@ -198,6 +221,10 @@ var Schema = State.extend({ ns: { type: 'string' }, + count: { + type: 'number', + default: 0 + } }, collections: { fields: FieldCollection @@ -205,6 +232,8 @@ var Schema = State.extend({ sample: function(doc) { var schema = this; debug('--- begin sample'); + debug('updating based on', doc); + schema.count += 1; _.each(_.pairs(doc), function(d) { onFieldSampled(schema, d[0], d[1]); }); @@ -213,7 +242,6 @@ var Schema = State.extend({ stream: function() { var schema = this; return es.through(function(doc) { - debug('updating based on %j', doc); schema.sample(doc); this.emit('data', doc); }, function() { diff --git a/lib/type-collection.js b/lib/type-collection.js index b900d68..84c1bfa 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -1,7 +1,7 @@ -var AmpersandCollection = require('ampersand-collection'); +var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); var type = require('./type'); -module.exports = AmpersandCollection.extend({ +module.exports = Collection.extend({ mainIndex: '_id', model: function(attrs, options) { var Klass = type[attrs._id]; @@ -10,6 +10,6 @@ module.exports = AmpersandCollection.extend({ throw new TypeError('No value type for ' + attrs._id); } - return new Klass({}, options); + return new Klass(attrs, options); } }); diff --git a/lib/type.js b/lib/type.js index d9baec3..2556960 100644 --- a/lib/type.js +++ b/lib/type.js @@ -24,10 +24,11 @@ var Type = State.extend({ } }, probability: { - deps: ['count'], + deps: ['count', 'collection.parent.parent'], fn: function() { var field = this.collection.parent; - return this.count / field.count; + var schema = field.parent; + return this.count / schema.count; } } }, diff --git a/lib/value-collection.js b/lib/value-collection.js index 41ff55c..1b8c919 100644 --- a/lib/value-collection.js +++ b/lib/value-collection.js @@ -1,6 +1,9 @@ -var AmpersandCollection = require('ampersand-collection'); +var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); var Value = require('./value'); -module.exports = AmpersandCollection.extend({ - model: Value +module.exports = Collection.extend({ + model: Value, + serialize: function() { + return this.pluck('value'); + } }); diff --git a/lib/value.js b/lib/value.js index 54436f2..abc9e39 100644 --- a/lib/value.js +++ b/lib/value.js @@ -5,6 +5,9 @@ module.exports = AmpersandState.extend({ props: { _id: { type: 'any' + }, + value: { + type: 'any' } }, initialize: function(attrs) { diff --git a/test/test.js b/test/test.js index ea80852..e7cb8f7 100644 --- a/test/test.js +++ b/test/test.js @@ -85,6 +85,7 @@ describe('mongodb-schema', function() { it('should work', function() { assert.doesNotThrow(function() { following = schema('following', FIXTURES.basic.following); + // console.log('following schema', JSON.stringify(following, null, 2)); }); }); // @todo: write more tests when not so tired... From 8255b9ff4a9f1045bc291e9d8432d010fdea5952 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 18 May 2015 20:19:26 -0400 Subject: [PATCH 42/79] debugging against mongo-perf raw schema --- lib/schema.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/schema.js b/lib/schema.js index dacac64..b8c2345 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -85,12 +85,14 @@ var Field = State.extend({ var field = this; this.types.on('add', function(type) { var T = field.types.at(0)._id; - field.trigger('change:types'); + console.warn('wtf? type has no values?', type); + if (!type.values) return; type.values.on('add', function(value) { field.count += 1; type.count += 1; field.values.add(value); }); + field.trigger('change:types'); }); }, serialize: function() { @@ -168,6 +170,7 @@ function onBasicField(schema, _id, type_id, value) { }); } + if (!type.values) return console.warn('wtf? type has no values?', type); type.values.add({ _id: value From fbdd51a0edf02e1fa40f772e07914ce4b16047be Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 17:05:22 -0400 Subject: [PATCH 43/79] formatting for fixture files --- test/fixture-basic-following.json | 13 ++++++++- test/fixture-basic-users.json | 22 ++++++++++++++- test/fixture-embedded-documents-users.json | 32 +++++++++++++++++++++- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/test/fixture-basic-following.json b/test/fixture-basic-following.json index a942ec9..807839e 100644 --- a/test/fixture-basic-following.json +++ b/test/fixture-basic-following.json @@ -1 +1,12 @@ -[{"_id":{"$oid":"55581e0a9bf712d0c2b48d71"},"following_ids": [{"$oid": "55582407aafa8fbbc57196e2"}]}] +[ + { + "_id": { + "$oid": "55581e0a9bf712d0c2b48d71" + }, + "following_ids": [ + { + "$oid": "55582407aafa8fbbc57196e2" + } + ] + } +] diff --git a/test/fixture-basic-users.json b/test/fixture-basic-users.json index 29efcaf..1929c70 100644 --- a/test/fixture-basic-users.json +++ b/test/fixture-basic-users.json @@ -1 +1,21 @@ -[{"_id":{"$oid":"55581e0a9bf712d0c2b48d71"},"email":"tupjud@weigehib.gov","is_verified":false,"twitter_username":"@zaetisi","name":"Hunter Maxwell","stats_friends":2163,"apple_push_token":"d4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6","android_push_token":{"$undefined":true},"last_address_latitude":null,"last_address_longitude":null,"created_at":{"$date":"0115-07-26T05:51:21.284Z"}}] +[ + { + "_id": { + "$oid": "55581e0a9bf712d0c2b48d71" + }, + "email": "tupjud@weigehib.gov", + "is_verified": false, + "twitter_username": "@zaetisi", + "name": "Hunter Maxwell", + "stats_friends": 2163, + "apple_push_token": "d4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6", + "android_push_token": { + "$undefined": true + }, + "last_address_latitude": null, + "last_address_longitude": null, + "created_at": { + "$date": "0115-07-26T05:51:21.284Z" + } + } +] diff --git a/test/fixture-embedded-documents-users.json b/test/fixture-embedded-documents-users.json index 345fe7c..47e9c5a 100644 --- a/test/fixture-embedded-documents-users.json +++ b/test/fixture-embedded-documents-users.json @@ -1 +1,31 @@ -[{"_id":{"$oid":"55582407aafa8fbbc57196e2"},"name":"Brett Flowers","email":{"_id":"gohu@pum.io","is_verified":false},"twitter":{"username":"@lekbisova"},"stats":{"friends":7584},"push_token":{"android":{"$undefined":true},"apple":"4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c"},"last_address":{"latitude":null,"longitude":null},"created_at":{"$date":"0115-11-15T13:22:18.387Z"}}] +[ + { + "_id": { + "$oid": "55582407aafa8fbbc57196e2" + }, + "name": "Brett Flowers", + "email": { + "_id": "gohu@pum.io", + "is_verified": false + }, + "twitter": { + "username": "@lekbisova" + }, + "stats": { + "friends": 7584 + }, + "push_token": { + "android": { + "$undefined": true + }, + "apple": "4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c" + }, + "last_address": { + "latitude": null, + "longitude": null + }, + "created_at": { + "$date": "0115-11-15T13:22:18.387Z" + } + } +] From d911e9f82b9ddff96f00eee3a01632800cf6da79 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 17:07:21 -0400 Subject: [PATCH 44/79] fix probability and unique [closes #15] [closes #6] --- lib/schema.js | 46 ++++++++++++++++-------- lib/type.js | 7 ++-- test/test.js | 96 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 116 insertions(+), 33 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index b8c2345..6a89f26 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -30,6 +30,10 @@ var Field = State.extend({ type: 'number', default: 0 }, + unique: { + type: 'number', + default: 0 + }, /** * Title, description and default from JSON Schema: * http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata @@ -65,12 +69,23 @@ var Field = State.extend({ return undefined; } if (this.types.length === 1) { - debug('%s has a single type `%s`', this._id, this.types.at(0)._id); return this.types.at(0)._id; } - debug('%s has %d types `%s`', this._id, this.types.length, this.types.pluck('_id')); return this.types.pluck('_id'); } + }, + probability: { + deps: ['count', 'parent.count'], + fn: function() { + var schema = this.parent; + return this.count / schema.count; + } + }, + has_duplicates: { + deps: ['unique', 'count'], + fn: function() { + return this.unique < this.count; + } } }, collections: { @@ -84,9 +99,15 @@ var Field = State.extend({ initialize: function() { var field = this; this.types.on('add', function(type) { - var T = field.types.at(0)._id; - console.warn('wtf? type has no values?', type); - if (!type.values) return; + if (!type.values) { + console.warn('wtf? type has no values?', type); + return; + } + + type.on('change:unique', function() { + field.unique = _.sum(field.types.pluck('unique')); + }); + type.values.on('add', function(value) { field.count += 1; type.count += 1; @@ -140,7 +161,7 @@ FieldCollection.prototype.model = function(attrs, options) { function onFieldSampled(schema, _id, value) { var type_id = Type.getNameFromValue(value); - debug('field `%s` sampled with value %j of type %s on schema', _id, value, type_id, schema); + //debug('field `%s` sampled with value %j of type %s on schema', _id, value, type_id, schema); if (type_id === 'Array') { onEmbeddedArray(schema, _id, type_id, value); } else if (type_id === 'Object') { @@ -153,14 +174,11 @@ function onFieldSampled(schema, _id, value) { function onBasicField(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { - debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, klass: BasicField, parent: schema }); - } else { - debug('`%s` is an existing field with type %s', _id, field.type); } var type = field.types.get(type_id); @@ -181,13 +199,13 @@ function onEmbeddedArray(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { - debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, klass: EmbeddedArrayField, parent: schema }); } + field.count += 1; _.each(value, function(d) { var type_id = Type.getNameFromValue(d); @@ -205,7 +223,6 @@ function onEmbeddedDocument(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { - debug('`%s` is a new field with type %s', _id, type_id); field = schema.fields.add({ _id: _id, klass: EmbeddedDocumentField, @@ -234,13 +251,12 @@ var Schema = State.extend({ }, sample: function(doc) { var schema = this; - debug('--- begin sample'); - debug('updating based on', doc); schema.count += 1; _.each(_.pairs(doc), function(d) { - onFieldSampled(schema, d[0], d[1]); + process.nextTick(function() { + onFieldSampled(schema, d[0], d[1]); + }); }); - debug('--- end sample'); }, stream: function() { var schema = this; diff --git a/lib/type.js b/lib/type.js index 2556960..ceb9eb8 100644 --- a/lib/type.js +++ b/lib/type.js @@ -20,15 +20,14 @@ var Type = State.extend({ unique: { deps: ['count'], fn: function() { - return _.unique(this.values.models).length; + return _.unique(this.values.pluck('value')).length; } }, probability: { - deps: ['count', 'collection.parent.parent'], + deps: ['count', 'collection.parent'], fn: function() { var field = this.collection.parent; - var schema = field.parent; - return this.count / schema.count; + return this.count / field.count; } } }, diff --git a/test/test.js b/test/test.js index e7cb8f7..48a293b 100644 --- a/test/test.js +++ b/test/test.js @@ -1,4 +1,4 @@ -var schema = require('../'); +var getSchema = require('../'); var assert = require('assert'); var EJSON = require('mongodb-extended-json'); var _ = require('lodash'); @@ -18,7 +18,7 @@ describe('mongodb-schema', function() { var users; it('should work', function() { assert.doesNotThrow(function() { - users = schema('users', FIXTURES.basic.users); + users = getSchema('users', FIXTURES.basic.users); }); }); it('should detect all fields', function() { @@ -59,7 +59,7 @@ describe('mongodb-schema', function() { var users; it('should work', function() { assert.doesNotThrow(function() { - users = schema('users', FIXTURES.embedded_documents.users); + users = getSchema('users', FIXTURES.embedded_documents.users); }); }); @@ -84,8 +84,7 @@ describe('mongodb-schema', function() { var following; it('should work', function() { assert.doesNotThrow(function() { - following = schema('following', FIXTURES.basic.following); - // console.log('following schema', JSON.stringify(following, null, 2)); + following = getSchema('following', FIXTURES.basic.following); }); }); // @todo: write more tests when not so tired... @@ -93,30 +92,99 @@ describe('mongodb-schema', function() { describe('evolving schema', function() { // The hardest case and really why this module exists at all: proper - // handling for polymorphic schemas. Consider the following scenario: + // handling for polymorphic schemas. Consider the followi;ng scenario: // // 1. started out with schema in `only basic fields`. - // 2. then read a blog post about how awesome embedded documents are. + // 2. then read a blog post about how awesome; embedded documents are. // 3. then realized what a pain embedded documents are. var users; it('should work', function() { assert.doesNotThrow(function() { - users = schema('users', _.union(FIXTURES.basic.users, FIXTURES.embedded_documents.users)); - //console.log('users schema', JSON.stringify(users, null, 2)); + users = getSchema('users', _.union(FIXTURES.basic.users, FIXTURES.embedded_documents.users)); }); }); - // @todo: figure out where we're not hitting a counter when not so tired... - it.skip('should have the correct probabilities for a field that was moved', function() { + it('should have the correct probabilities for a field that was moved', function() { var apple_push_token = users.fields.get('apple_push_token'); assert.equal(apple_push_token.count, 1); - assert.equal(apple_push_token.has_children, false); assert.equal(apple_push_token.type, 'String'); assert.equal(apple_push_token.types.get('String').count, 1); assert.equal(apple_push_token.types.get('String').unique, 1); - assert.equal(apple_push_token.types.get('String').probability, 0.5, + assert.equal(apple_push_token.probability, 0.5, '`apple_push_token` only appeared in 50% of documents but thinks it ' + 'has a probability of ' + - (apple_push_token.types.get('String').probability * 100) + '%'); + (apple_push_token.probability * 100) + '%'); + }); + }); + + describe('probability', function() { + var docs = [ + { + _id: 1, + registered: true + }, + { + _id: 2 + } + ]; + + var schema; + it('should load the schema', function() { + assert.doesNotThrow(function() { + schema = getSchema('probability', docs); + }); + }); + + it('should have a field level probability of 50% for `registered`', function() { + assert.equal(schema.fields.get('registered').probability, 0.5); + }); + it('should have a probability of 100% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').type, 'Boolean'); + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, 1); + }); + }); + + describe('unique', function() { + var docs = [ + { + _id: 1, + registered: true + }, + { + _id: 2, + registered: true + } + ]; + + var schema; + it('should load the schema', function() { + assert.doesNotThrow(function() { + schema = getSchema('probability', docs); + }); + }); + + it('should have count of 2 for `_id`', function() { + assert.equal(schema.fields.get('_id').count, 2); + }); + + it('should have unique of 2 for `_id`', function() { + assert.equal(schema.fields.get('_id').unique, 2); + assert.equal(schema.fields.get('_id').types.get('Number').unique, 2); + }); + + it('should not have duplicates for `_id`', function() { + assert.equal(schema.fields.get('_id').has_duplicates, false); + }); + + it('should have count of 2 for `registered`', function() { + assert.equal(schema.fields.get('registered').count, 2); + }); + + it('should have unique of 1 for `registered`', function() { + assert.equal(schema.fields.get('registered').unique, 1); + }); + + it('should have duplicates for `registered`', function() { + assert.equal(schema.fields.get('registered').has_duplicates, true); }); }); }); From 0e91317d11bb2506c7b230c599ef3daac2b8f3e8 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 17:07:51 -0400 Subject: [PATCH 45/79] bump version to avoid unintentional breakage --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 8388f78..c657b6c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "0.7.0", + "version": "1.0.0", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", From bc8b52ff4356854ca998c78af7890cd63b52e97d Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 17:14:24 -0400 Subject: [PATCH 46/79] Add Binary type support --- lib/type.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/type.js b/lib/type.js index ceb9eb8..08fbb03 100644 --- a/lib/type.js +++ b/lib/type.js @@ -105,6 +105,14 @@ exports.Undefined = Type.extend({ } }); +exports.Binary = Type.extend({ + props: { + _id: { + default: 'Binary' + } + } +}); + exports.Object = State.extend({ idAttribute: '_id', props: { From 6aadc0bb3f0c3cb8fe92faa118674945b8aa948e Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 18:51:51 -0400 Subject: [PATCH 47/79] make module.exports use streams + optional callback --- lib/index.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/index.js b/lib/index.js index e48bef7..230a9bd 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,16 +1,19 @@ var Schema = require('./schema'); -var _ = require('lodash'); +var es = require('event-stream'); -module.exports = function(ns, docs) { +module.exports = function(ns, docs, fn) { if (!Array.isArray(docs)) { docs = [docs]; } var schema = new Schema({ ns: ns }); - _.each(docs, function(doc) { - schema.sample(doc); - }); + + var stream = es.readArray(docs).pipe(schema.stream()); + if (fn) { + stream.on('end', fn); + } + return schema; }; From 2505123bdca4894d485d3ba018f1813cf3c35c78 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 18:55:06 -0400 Subject: [PATCH 48/79] adding test for mixed type probability synchronization --- test/test.js | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/test/test.js b/test/test.js index 48a293b..57c8701 100644 --- a/test/test.js +++ b/test/test.js @@ -116,7 +116,7 @@ describe('mongodb-schema', function() { }); }); - describe('probability', function() { + describe('simple probability', function() { var docs = [ { _id: 1, @@ -143,6 +143,45 @@ describe('mongodb-schema', function() { }); }); + describe('mixed type probability', function() { + var docs = [ + { + _id: 1, + registered: 1 + }, + { + _id: 2, + registered: '1' + }, + { + _id: 3, + registered: true + } + ]; + + var schema; + it('should load the schema', function(done) { + assert.doesNotThrow(function() { + schema = getSchema('probability', docs, done); + }); + }); + + it('should have a field level probability of 100% for `registered`', function() { + assert.equal(schema.fields.get('registered').probability, 1); + }); + it('should have 3 types for `registered`', function() { + assert.equal(schema.fields.get('registered').types.length, 3); + }); + it('should have a probability of 33% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, (1 / 3)); + }); + it('should have a probability of 33% for `registered` to be a number', function() { + assert.equal(schema.fields.get('registered').types.get('Number').probability, (1 / 3)); + }); + it('should have a probability of 33% for `registered` to be a string', function() { + assert.equal(schema.fields.get('registered').types.get('String').probability, (1 / 3)); + }); + }); describe('unique', function() { var docs = [ { From dce5b37825873c2ab4af13c5d074e4a4d3c31f42 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 21 May 2015 19:02:14 -0400 Subject: [PATCH 49/79] manually calculate derived props for Type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes mixed types probability issues and simplifies some of the “htf does this work?” factors. field.types will now also fire a `sync` event when probabilities are synchronized. --- lib/schema.js | 70 +++++++++++++++++++++++++++++---------------------- lib/type.js | 53 ++++++++++++++++++++++++-------------- 2 files changed, 74 insertions(+), 49 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index 6a89f26..628691d 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -112,8 +112,16 @@ var Field = State.extend({ field.count += 1; type.count += 1; field.values.add(value); + + field.types.map(function(type) { + type.set({ + probability: type.count / field.count, + unique: _.unique(type.values.pluck('value')).length + }); + }); + + field.types.trigger('sync'); }); - field.trigger('change:types'); }); }, serialize: function() { @@ -160,38 +168,41 @@ FieldCollection.prototype.model = function(attrs, options) { }; function onFieldSampled(schema, _id, value) { - var type_id = Type.getNameFromValue(value); - //debug('field `%s` sampled with value %j of type %s on schema', _id, value, type_id, schema); - if (type_id === 'Array') { - onEmbeddedArray(schema, _id, type_id, value); - } else if (type_id === 'Object') { - onEmbeddedDocument(schema, _id, type_id, value); - } else { - onBasicField(schema, _id, type_id, value); - } + process.nextTick(function() { + var type_id = Type.getNameFromValue(value); + if (type_id === 'Array') { + onEmbeddedArray(schema, _id, type_id, value); + } else if (type_id === 'Object') { + onEmbeddedDocument(schema, _id, type_id, value); + } else { + onBasicField(schema, _id, type_id, value); + } + }); } function onBasicField(schema, _id, type_id, value) { - var field = schema.fields.get(_id); - if (!field) { - field = schema.fields.add({ - _id: _id, - klass: BasicField, - parent: schema - }); - } + process.nextTick(function() { + var field = schema.fields.get(_id); + if (!field) { + field = schema.fields.add({ + _id: _id, + klass: BasicField, + parent: schema + }); + } - var type = field.types.get(type_id); - if (!type) { - type = field.types.add({ - _id: type_id, + var type = field.types.get(type_id); + if (!type) { + type = field.types.add({ + _id: type_id, - }); - } - if (!type.values) return console.warn('wtf? type has no values?', type); + }); + } + if (!type.values) return console.warn('wtf? type has no values?', type); - type.values.add({ - _id: value + type.values.add({ + _id: value + }); }); } @@ -253,10 +264,9 @@ var Schema = State.extend({ var schema = this; schema.count += 1; _.each(_.pairs(doc), function(d) { - process.nextTick(function() { - onFieldSampled(schema, d[0], d[1]); - }); + onFieldSampled(schema, d[0], d[1]); }); + }, stream: function() { var schema = this; diff --git a/lib/type.js b/lib/type.js index 08fbb03..b2e3d49 100644 --- a/lib/type.js +++ b/lib/type.js @@ -11,26 +11,19 @@ var Type = State.extend({ count: { type: 'number', default: 0 + }, + probability: { + type: 'number', + default: 0 + }, + unique: { + type: 'number', + default: 0 } }, collections: { values: ValueCollection }, - derived: { - unique: { - deps: ['count'], - fn: function() { - return _.unique(this.values.pluck('value')).length; - } - }, - probability: { - deps: ['count', 'collection.parent'], - fn: function() { - var field = this.collection.parent; - return this.count / field.count; - } - } - }, serialize: function() { return this.getAttributes({ props: true, @@ -73,6 +66,14 @@ exports.Null = Type.extend({ } }); +exports.Timestamp = Type.extend({ + props: { + _id: { + default: 'Timestamp' + } + } +}); + exports.Boolean = Type.extend({ props: { _id: { @@ -113,8 +114,23 @@ exports.Binary = Type.extend({ } }); -exports.Object = State.extend({ - idAttribute: '_id', +exports.MaxKey = Type.extend({ + props: { + _id: { + default: 'MaxKey' + } + } +}); + +exports.MinKey = Type.extend({ + props: { + _id: { + default: 'MinKey' + } + } +}); + +exports.Object = Type.extend({ props: { _id: { type: 'string', @@ -123,8 +139,7 @@ exports.Object = State.extend({ } }); -exports.Array = State.extend({ - idAttribute: '_id', +exports.Array = Type.extend({ props: { _id: { type: 'string', From 07c70fff4e4628228517d9dc26358200c79c41dd Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 22 May 2015 15:04:08 -0400 Subject: [PATCH 50/79] Failing tests @rueckstiess was about to write! --- test/test.js | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/test/test.js b/test/test.js index 57c8701..71ff765 100644 --- a/test/test.js +++ b/test/test.js @@ -156,6 +156,9 @@ describe('mongodb-schema', function() { { _id: 3, registered: true + }, + { + _id: 4 } ]; @@ -167,19 +170,22 @@ describe('mongodb-schema', function() { }); it('should have a field level probability of 100% for `registered`', function() { - assert.equal(schema.fields.get('registered').probability, 1); + assert.equal(schema.fields.get('registered').probability, 3 / 4); + }); + it('should have 4 types for `registered`', function() { + assert.equal(schema.fields.get('registered').types.length, 4); }); - it('should have 3 types for `registered`', function() { - assert.equal(schema.fields.get('registered').types.length, 3); + it('should have a probability of 25% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, (1 / 4)); }); - it('should have a probability of 33% for `registered` to be a boolean', function() { - assert.equal(schema.fields.get('registered').types.get('Boolean').probability, (1 / 3)); + it('should have a probability of 25% for `registered` to be a number', function() { + assert.equal(schema.fields.get('registered').types.get('Number').probability, (1 / 4)); }); - it('should have a probability of 33% for `registered` to be a number', function() { - assert.equal(schema.fields.get('registered').types.get('Number').probability, (1 / 3)); + it('should have a probability of 25% for `registered` to be a string', function() { + assert.equal(schema.fields.get('registered').types.get('String').probability, (1 / 4)); }); - it('should have a probability of 33% for `registered` to be a string', function() { - assert.equal(schema.fields.get('registered').types.get('String').probability, (1 / 3)); + it('should have a probability of 25% for `registered` to be undefined', function() { + assert.equal(schema.fields.get('registered').types.get('Undefined').probability, (1 / 4)); }); }); describe('unique', function() { From 74306d230d35a0e04cde950700f7e29d043ce77a Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Fri, 22 May 2015 15:08:01 -0400 Subject: [PATCH 51/79] mroe test case updates for Undefined being added to values collection --- test/test.js | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/test/test.js b/test/test.js index 71ff765..c92b1f8 100644 --- a/test/test.js +++ b/test/test.js @@ -133,13 +133,11 @@ describe('mongodb-schema', function() { schema = getSchema('probability', docs); }); }); - - it('should have a field level probability of 50% for `registered`', function() { - assert.equal(schema.fields.get('registered').probability, 0.5); + it('should have a probability of 50% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, 1 / 2); }); - it('should have a probability of 100% for `registered` to be a boolean', function() { - assert.equal(schema.fields.get('registered').type, 'Boolean'); - assert.equal(schema.fields.get('registered').types.get('Boolean').probability, 1); + it('should have a probability of 50% for `registered` to be undefined', function() { + assert.equal(schema.fields.get('registered').types.get('Undefined').probability, 1 / 2); }); }); @@ -168,10 +166,6 @@ describe('mongodb-schema', function() { schema = getSchema('probability', docs, done); }); }); - - it('should have a field level probability of 100% for `registered`', function() { - assert.equal(schema.fields.get('registered').probability, 3 / 4); - }); it('should have 4 types for `registered`', function() { assert.equal(schema.fields.get('registered').types.length, 4); }); From 74b99b4d9e6ec6b67c0f66370e87d2bcfd5a04cc Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 11:02:41 -0400 Subject: [PATCH 52/79] lots and lots of testing --- .zuul.yml | 12 ++ lib/collection.js | 7 + lib/index.js | 12 +- lib/schema.js | 197 ++++++++++-------- lib/state.js | 5 + lib/type-collection.js | 3 +- lib/type.js | 4 +- lib/value-collection.js | 2 +- lib/value.js | 5 +- package.json | 5 +- test/basic-embedded-array.test.js | 29 +++ test/basic-embedded-document-array.test.js | 28 +++ test/basic-embedded-documents.test.js | 55 +++++ test/basic-probability.test.js | 31 +++ test/basic-unique.test.js | 51 +++++ test/basic.test.js | 73 +++++++ test/fixture-basic-following.json | 12 -- test/fixture-basic-users.json | 21 -- test/fixture-embedded-documents-users.json | 31 --- test/mixed-type-evolving-schema.test.js | 72 +++++++ test/mixed-type-nested.test.js | 77 +++++++ test/mixed-type-probability.test.js | 51 +++++ test/test.js | 229 --------------------- 23 files changed, 617 insertions(+), 395 deletions(-) create mode 100644 .zuul.yml create mode 100644 lib/collection.js create mode 100644 lib/state.js create mode 100644 test/basic-embedded-array.test.js create mode 100644 test/basic-embedded-document-array.test.js create mode 100644 test/basic-embedded-documents.test.js create mode 100644 test/basic-probability.test.js create mode 100644 test/basic-unique.test.js create mode 100644 test/basic.test.js delete mode 100644 test/fixture-basic-following.json delete mode 100644 test/fixture-basic-users.json delete mode 100644 test/fixture-embedded-documents-users.json create mode 100644 test/mixed-type-evolving-schema.test.js create mode 100644 test/mixed-type-nested.test.js create mode 100644 test/mixed-type-probability.test.js delete mode 100644 test/test.js diff --git a/.zuul.yml b/.zuul.yml new file mode 100644 index 0000000..0d48e29 --- /dev/null +++ b/.zuul.yml @@ -0,0 +1,12 @@ +ui: mocha-bdd +browsers: + - name: chrome + version: latest + - name: safari + version: latest + - name: firefox + version: latest + - name: ie + version: latest + - name: iphone + version: latest diff --git a/lib/collection.js b/lib/collection.js new file mode 100644 index 0000000..0b85d85 --- /dev/null +++ b/lib/collection.js @@ -0,0 +1,7 @@ +var lodashMixin = require('ampersand-collection-lodash-mixin'); +var Collection = require('ampersand-collection'); + +module.exports = Collection.extend(lodashMixin, { + mainIndex: '_id', + comparator: '_id' +}); diff --git a/lib/index.js b/lib/index.js index 230a9bd..aadfaf6 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,19 +1,14 @@ var Schema = require('./schema'); var es = require('event-stream'); +var assert = require('assert'); module.exports = function(ns, docs, fn) { - if (!Array.isArray(docs)) { - docs = [docs]; - } + assert(Array.isArray(docs), 'docs must be an array'); var schema = new Schema({ ns: ns }); - var stream = es.readArray(docs).pipe(schema.stream()); - if (fn) { - stream.on('end', fn); - } - + es.readArray(docs).pipe(schema.stream()).on('end', fn); return schema; }; @@ -24,3 +19,4 @@ module.exports.FieldCollection = Schema.FieldCollection; module.exports.BasicField = Schema.BasicField; module.exports.EmbeddedArrayField = Schema.EmbeddedArrayField; module.exports.EmbeddedDocumentField = Schema.EmbeddedDocumentField; +module.exports.TypeCollection = require('./type-collection'); diff --git a/lib/schema.js b/lib/schema.js index 628691d..aeec63b 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,20 +1,15 @@ -var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); -var State = require('ampersand-state'); var es = require('event-stream'); var _ = require('lodash'); -var debug = require('debug')('mongodb-schema'); - +var async = require('async'); +var Collection = require('./collection'); +var State = require('./state'); var Type = require('./type'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); - -var FieldCollection = Collection.extend({ - mainIndex: '_id', - comparator: '_id' -}); +var debug = require('debug')('mongodb-schema'); +var FieldCollection = Collection.extend({}); var Field = State.extend({ - idAttribute: '_id', props: { /** * The key in the `parent`. @@ -30,6 +25,10 @@ var Field = State.extend({ type: 'number', default: 0 }, + probability: { + type: 'number', + default: 0 + }, unique: { type: 'number', default: 0 @@ -74,11 +73,11 @@ var Field = State.extend({ return this.types.pluck('_id'); } }, - probability: { - deps: ['count', 'parent.count'], + total: { + deps: ['count', 'probability'], fn: function() { - var schema = this.parent; - return this.count / schema.count; + if (this.probability === 1) return this.count; + return (this.count / this.probability); } }, has_duplicates: { @@ -99,30 +98,51 @@ var Field = State.extend({ initialize: function() { var field = this; this.types.on('add', function(type) { - if (!type.values) { - console.warn('wtf? type has no values?', type); - return; + type.values.on('add', function(value) { + field.values.add(value); + }); + }); + }, + _updateUndefined: function() { + var newprob = this.count / this.parent.count; + if (newprob !== this.probability) { + this.probability = newprob; + } + var undef = this.types.get('Undefined'); + if ((this.total - this.count) <= 0) { + if (undef) { + debug('removing extraneous Undefined for `%s`', this.getId()); + this.types.remove({ + _id: 'Undefined' + }); } + return; + } - type.on('change:unique', function() { - field.unique = _.sum(field.types.pluck('unique')); + if (!undef) { + debug('adding Undefined for `%s`', this.getId()); + undef = this.types.add({ + _id: 'Undefined', + unique: 1 }); + } - type.values.on('add', function(value) { - field.count += 1; - type.count += 1; - field.values.add(value); - - field.types.map(function(type) { - type.set({ - probability: type.count / field.count, - unique: _.unique(type.values.pluck('value')).length - }); - }); + undef.count = (this.total - this.count); + undef.probability = (undef.count - this.count); + }, + commit: function() { + this._updateUndefined(); + this.types.map(function(type) { + type.probability = type.count / this.total; + type.unique = _.unique(type.values.pluck('value')).length; + }.bind(this)); + this.unique = _.sum(this.types.pluck('unique')); - field.types.trigger('sync'); + if (this.fields.length > 0) { + this.fields.map(function(field) { + field.commit(); }); - }); + } }, serialize: function() { var res = this.getAttributes({ @@ -167,46 +187,44 @@ FieldCollection.prototype.model = function(attrs, options) { return new attrs.klass(attrs, options); }; -function onFieldSampled(schema, _id, value) { - process.nextTick(function() { - var type_id = Type.getNameFromValue(value); - if (type_id === 'Array') { - onEmbeddedArray(schema, _id, type_id, value); - } else if (type_id === 'Object') { - onEmbeddedDocument(schema, _id, type_id, value); - } else { - onBasicField(schema, _id, type_id, value); - } - }); +function onFieldSampled(schema, _id, value, done) { + var type_id = Type.getNameFromValue(value); + if (type_id === 'Array') { + onEmbeddedArray(schema, _id, type_id, value, done); + } else if (type_id === 'Object') { + onEmbeddedDocument(schema, _id, type_id, value, done); + } else { + onBasicField(schema, _id, type_id, value, done); + } } -function onBasicField(schema, _id, type_id, value) { - process.nextTick(function() { - var field = schema.fields.get(_id); - if (!field) { - field = schema.fields.add({ - _id: _id, - klass: BasicField, - parent: schema - }); - } - - var type = field.types.get(type_id); - if (!type) { - type = field.types.add({ - _id: type_id, - - }); - } - if (!type.values) return console.warn('wtf? type has no values?', type); +function onBasicField(schema, _id, type_id, value, done) { + var field = schema.fields.get(_id); + if (!field) { + field = schema.fields.add({ + _id: _id, + klass: BasicField, + parent: schema + }); + } + field.count += 1; - type.values.add({ - _id: value + var type = field.types.get(type_id); + if (!type) { + type = field.types.add({ + _id: type_id, }); + } + type.count += 1; + + type.values.add({ + _id: value }); + + done(null, _id); } -function onEmbeddedArray(schema, _id, type_id, value) { +function onEmbeddedArray(schema, _id, type_id, value, done) { var field = schema.fields.get(_id); if (!field) { @@ -218,19 +236,22 @@ function onEmbeddedArray(schema, _id, type_id, value) { } field.count += 1; - _.each(value, function(d) { + var tasks = []; + _.map(value, function(d) { var type_id = Type.getNameFromValue(d); if (type_id === 'Object') { - _.each(d, function(val, key) { - onBasicField(field, key, Type.getNameFromValue(val), val); + return _.map(d, function(val, key) { + tasks.push(onBasicField.bind(null, field, key, Type.getNameFromValue(val), val)); }); } else { - onBasicField(field, '__basic__', type_id, d); + tasks.push(onBasicField.bind(null, field, '__basic__', type_id, d)); } }); + + async.series(tasks, done); } -function onEmbeddedDocument(schema, _id, type_id, value) { +function onEmbeddedDocument(schema, _id, type_id, value, done) { var field = schema.fields.get(_id); if (!field) { @@ -241,9 +262,9 @@ function onEmbeddedDocument(schema, _id, type_id, value) { }); } field.count += 1; - _.each(_.pairs(value), function(d) { - onFieldSampled(field, d[0], d[1]); - }); + async.series(_.map(value, function(val, key) { + return onFieldSampled.bind(null, field, key, val); + }), done); } var Schema = State.extend({ @@ -260,23 +281,29 @@ var Schema = State.extend({ collections: { fields: FieldCollection }, - sample: function(doc) { + parse: function(doc, done) { + debug('parse'); var schema = this; schema.count += 1; - _.each(_.pairs(doc), function(d) { - onFieldSampled(schema, d[0], d[1]); - }); - + async.series(_.map(doc, function(val, key) { + return onFieldSampled.bind(null, schema, key, val); + }), function(err, res) { + schema.fields.map(function(field) { + field.commit(); + }); + debug('parse complete'); + done(err, res); + }); }, stream: function() { var schema = this; - return es.through(function(doc) { - schema.sample(doc); - this.emit('data', doc); - }, function() { - debug('finalized schema is', JSON.stringify(schema, null, 2)); - this.emit('end'); - }); + return es.map(function(doc, done) { + setTimeout(function() { + schema.parse(doc, function(err) { + done(err, doc); + }); + }, 500); + }); } }); diff --git a/lib/state.js b/lib/state.js new file mode 100644 index 0000000..4219d92 --- /dev/null +++ b/lib/state.js @@ -0,0 +1,5 @@ +var State = require('ampersand-state'); + +module.exports = State.extend({ + idAttribute: '_id' +}); diff --git a/lib/type-collection.js b/lib/type-collection.js index 84c1bfa..85f96a7 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -1,8 +1,7 @@ -var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); +var Collection = require('./collection'); var type = require('./type'); module.exports = Collection.extend({ - mainIndex: '_id', model: function(attrs, options) { var Klass = type[attrs._id]; diff --git a/lib/type.js b/lib/type.js index b2e3d49..fb416b2 100644 --- a/lib/type.js +++ b/lib/type.js @@ -1,9 +1,9 @@ -var State = require('ampersand-state'); +var State = require('./state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); +var debug = require('debug')('mongodb-schema:type'); var Type = State.extend({ - idAttribute: '_id', props: { _id: { type: 'string' diff --git a/lib/value-collection.js b/lib/value-collection.js index 1b8c919..e7cee5d 100644 --- a/lib/value-collection.js +++ b/lib/value-collection.js @@ -1,4 +1,4 @@ -var Collection = require('ampersand-collection').extend(require('ampersand-collection-lodash-mixin')); +var Collection = require('./collection'); var Value = require('./value'); module.exports = Collection.extend({ diff --git a/lib/value.js b/lib/value.js index abc9e39..4ff5629 100644 --- a/lib/value.js +++ b/lib/value.js @@ -1,7 +1,6 @@ -var AmpersandState = require('ampersand-state'); +var State = require('./state'); -module.exports = AmpersandState.extend({ - _idAttribute: '_id', +module.exports = State.extend({ props: { _id: { type: 'any' diff --git a/package.json b/package.json index c657b6c..c325138 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "url": "https://github.com/mongodb-js/mongodb-schema/issues" }, "scripts": { + "start": "zuul --local 3001 --open -- test/*.test.js", "test": "mocha" }, "keywords": [ @@ -23,6 +24,7 @@ "ampersand-collection": "^1.4.5", "ampersand-collection-lodash-mixin": "^2.0.1", "ampersand-state": "^4.5.4", + "async": "^1.0.0", "bson": "^0.3.1", "debug": "^2.1.3", "event-stream": "^3.3.0", @@ -30,6 +32,7 @@ }, "devDependencies": { "mocha": "^2.0.1", - "mongodb-extended-json": "^1.3.0" + "mongodb-extended-json": "^1.3.0", + "zuul": "^3.0.0" } } diff --git a/test/basic-embedded-array.test.js b/test/basic-embedded-array.test.js new file mode 100644 index 0000000..5fe9450 --- /dev/null +++ b/test/basic-embedded-array.test.js @@ -0,0 +1,29 @@ +var getSchema = require('../'); +var assert = require('assert'); +var BSON = require('bson'); + +describe('basic embedded array', function() { + var following; + var docs = [ + { + '_id': BSON.ObjectID('55581e0a9bf712d0c2b48d71'), + 'following_ids': [BSON.ObjectID('55582407aafa8fbbc57196e2')] + }, + { + '_id': BSON.ObjectID('55582407aafa8fbbc57196e2'), + 'following_ids': [BSON.ObjectID('55581e0a9bf712d0c2b48d71')] + }, + + ]; + + before(function(done) { + following = getSchema('following', docs, done); + }); + + it('should serialize correctly', function() { + assert.doesNotThrow(function() { + following.toJSON(); + }); + }); + // @todo: write more tests when not so tired... +}); diff --git a/test/basic-embedded-document-array.test.js b/test/basic-embedded-document-array.test.js new file mode 100644 index 0000000..7406c42 --- /dev/null +++ b/test/basic-embedded-document-array.test.js @@ -0,0 +1,28 @@ +var getSchema = require('../'); +var assert = require('assert'); +var BSON = require('bson'); + +describe('basic embedded document array', function() { + var following; + var docs = [ + { + '_id': BSON.ObjectID('55581e0a9bf712d0c2b48d71'), + 'following': [ + { + _id: BSON.ObjectID('55582407aafa8fbbc57196e2') + } + ] + } + ]; + + before(function(done) { + following = getSchema('following', docs, done); + }); + + it('should serialize correctly', function() { + assert.doesNotThrow(function() { + following.toJSON(); + }); + }); + // @todo: write more tests when not so tired... +}); diff --git a/test/basic-embedded-documents.test.js b/test/basic-embedded-documents.test.js new file mode 100644 index 0000000..f975d0e --- /dev/null +++ b/test/basic-embedded-documents.test.js @@ -0,0 +1,55 @@ +var getSchema = require('../'); +var assert = require('assert'); +var BSON = require('bson'); + +describe('basic embedded documents', function() { + var users; + var docs = [ + { + '_id': BSON.ObjectID('55582407aafa8fbbc57196e2'), + 'name': 'Brett Flowers', + 'email': { + '_id': 'gohu@pum.io', + 'is_verified': false + }, + 'twitter': { + 'username': '@lekbisova' + }, + 'stats': { + 'friends': 7584 + }, + 'push_token': { + 'android': undefined, + 'apple': '4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c' + }, + 'last_address': { + 'latitude': null, + 'longitude': null + }, + 'created_at': new Date() + } + ]; + + before(function(done) { + users = getSchema('users', docs, done); + }); + + it('should detect all fields', function() { + var field_ids = [ + '_id', + 'created_at', + 'email', + 'last_address', + 'name', + 'push_token', + 'stats', + 'twitter' + ]; + assert.deepEqual(users.fields.pluck('_id'), field_ids); + }); + it('should serialize correctly', function() { + assert.doesNotThrow(function() { + users.toJSON(); + }); + }); +}); diff --git a/test/basic-probability.test.js b/test/basic-probability.test.js new file mode 100644 index 0000000..5336e1c --- /dev/null +++ b/test/basic-probability.test.js @@ -0,0 +1,31 @@ +var getSchema = require('../'); +var assert = require('assert'); + +describe('simple probability', function() { + var docs = [ + { + _id: 1, + registered: true + }, + { + _id: 2 + } + ]; + + var schema; + before(function(done) { + schema = getSchema('probability', docs, function(err) { + if (err) return done(err); + if (!schema.fields.get('registered')) { + return done(new Error('Did not pick up `registered` field')); + } + done(); + }); + }); + it('should have a probability of 50% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, 1 / 2); + }); + it('should have a probability of 50% for `registered` to be undefined', function() { + assert.equal(schema.fields.get('registered').types.get('Undefined').probability, 1 / 2); + }); +}); diff --git a/test/basic-unique.test.js b/test/basic-unique.test.js new file mode 100644 index 0000000..f3d71cf --- /dev/null +++ b/test/basic-unique.test.js @@ -0,0 +1,51 @@ +var getSchema = require('../'); +var assert = require('assert'); + +describe('unique', function() { + var docs = [ + { + _id: 1, + registered: true + }, + { + _id: 2, + registered: true + } + ]; + + var schema; + before(function(done) { + schema = getSchema('unique', docs, function(err) { + if (err) return done(err); + if (!schema.fields.get('_id')) { + return done(new Error('Did not pick up `_id` field')); + } + done(); + }); + }); + + it('should have count of 2 for `_id`', function() { + assert.equal(schema.fields.get('_id').count, 2); + }); + + it('should have unique of 2 for `_id`', function() { + assert.equal(schema.fields.get('_id').unique, 2); + assert.equal(schema.fields.get('_id').types.get('Number').unique, 2); + }); + + it('should not have duplicates for `_id`', function() { + assert.equal(schema.fields.get('_id').has_duplicates, false); + }); + + it('should have count of 2 for `registered`', function() { + assert.equal(schema.fields.get('registered').count, 2); + }); + + it('should have unique of 1 for `registered`', function() { + assert.equal(schema.fields.get('registered').unique, 1); + }); + + it('should have duplicates for `registered`', function() { + assert.equal(schema.fields.get('registered').has_duplicates, true); + }); +}); diff --git a/test/basic.test.js b/test/basic.test.js new file mode 100644 index 0000000..ef84ef9 --- /dev/null +++ b/test/basic.test.js @@ -0,0 +1,73 @@ +var getSchema = require('../'); +var assert = require('assert'); +var BSON = require('bson'); + +describe('using only basic fields', function() { + var docs = [ + { + '_id': BSON.ObjectID('55581e0a9bf712d0c2b48d71'), + 'email': 'tupjud@weigehib.gov', + 'is_verified': false, + 'twitter_username': '@zaetisi', + 'name': 'Hunter Maxwell', + 'stats_friends': 2163, + 'apple_push_token': 'd4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6', + 'android_push_token': undefined, + 'last_address_latitude': null, + 'last_address_longitude': null, + 'created_at': new Date() + } + ]; + + var users; + it('should work', function(done) { + assert.doesNotThrow(function() { + users = getSchema('users', docs, done); + }); + }); + it('should detect all fields', function() { + // assert.equal(users.fields.length, 11); + + var field_ids = [ + '_id', + 'android_push_token', + 'apple_push_token', + 'created_at', + 'email', + 'is_verified', + 'last_address_latitude', + 'last_address_longitude', + 'name', + 'stats_friends', + 'twitter_username' + ]; + assert.deepEqual(users.fields.pluck('_id'), field_ids); + }); + + it('should detect the correct type for each field', function() { + assert.equal(users.fields.get('_id').type, 'ObjectID'); + //assert.equal(users.fields.get('android_push_token').type, 'Undefined'); + assert.equal(users.fields.get('apple_push_token').type, 'String'); + assert.equal(users.fields.get('created_at').type, 'Date'); + assert.equal(users.fields.get('email').type, 'String'); + assert.equal(users.fields.get('is_verified').type, 'Boolean'); + assert.equal(users.fields.get('last_address_latitude').type, 'Null'); + assert.equal(users.fields.get('last_address_longitude').type, 'Null'); + assert.equal(users.fields.get('name').type, 'String'); + assert.equal(users.fields.get('stats_friends').type, 'Number'); + assert.equal(users.fields.get('twitter_username').type, 'String'); + }); + + it('should serialize correctly', function() { + assert.doesNotThrow(function() { + users.toJSON(); + }); + }); + it('should raise a TypeError for unknown types', function() { + assert.throws(function() { + new getSchema.TypeCollection({ + model: 'Image' + }); + }); + }); +}); diff --git a/test/fixture-basic-following.json b/test/fixture-basic-following.json deleted file mode 100644 index 807839e..0000000 --- a/test/fixture-basic-following.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "_id": { - "$oid": "55581e0a9bf712d0c2b48d71" - }, - "following_ids": [ - { - "$oid": "55582407aafa8fbbc57196e2" - } - ] - } -] diff --git a/test/fixture-basic-users.json b/test/fixture-basic-users.json deleted file mode 100644 index 1929c70..0000000 --- a/test/fixture-basic-users.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "_id": { - "$oid": "55581e0a9bf712d0c2b48d71" - }, - "email": "tupjud@weigehib.gov", - "is_verified": false, - "twitter_username": "@zaetisi", - "name": "Hunter Maxwell", - "stats_friends": 2163, - "apple_push_token": "d4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6", - "android_push_token": { - "$undefined": true - }, - "last_address_latitude": null, - "last_address_longitude": null, - "created_at": { - "$date": "0115-07-26T05:51:21.284Z" - } - } -] diff --git a/test/fixture-embedded-documents-users.json b/test/fixture-embedded-documents-users.json deleted file mode 100644 index 47e9c5a..0000000 --- a/test/fixture-embedded-documents-users.json +++ /dev/null @@ -1,31 +0,0 @@ -[ - { - "_id": { - "$oid": "55582407aafa8fbbc57196e2" - }, - "name": "Brett Flowers", - "email": { - "_id": "gohu@pum.io", - "is_verified": false - }, - "twitter": { - "username": "@lekbisova" - }, - "stats": { - "friends": 7584 - }, - "push_token": { - "android": { - "$undefined": true - }, - "apple": "4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c" - }, - "last_address": { - "latitude": null, - "longitude": null - }, - "created_at": { - "$date": "0115-11-15T13:22:18.387Z" - } - } -] diff --git a/test/mixed-type-evolving-schema.test.js b/test/mixed-type-evolving-schema.test.js new file mode 100644 index 0000000..72b2ba7 --- /dev/null +++ b/test/mixed-type-evolving-schema.test.js @@ -0,0 +1,72 @@ +var getSchema = require('../'); +var assert = require('assert'); +var BSON = require('bson'); + +describe('evolving schema', function() { + // The hardest case and really why this module exists at all: proper + // handling for polymorphic schemas. Consider the followi;ng scenario: + // + // 1. started out with schema in `only basic fields`. + // 2. then read a blog post about how awesome; embedded documents are. + // 3. then realized what a pain embedded documents are. + var users; + var apple_push_token; + var docs = [ + { + '_id': BSON.ObjectID('55582407aafa8fbbc57196e2'), + 'name': 'Brett Flowers', + 'email': { + '_id': 'gohu@pum.io', + 'is_verified': false + }, + 'twitter': { + 'username': '@lekbisova' + }, + 'stats': { + 'friends': 7584 + }, + 'push_token': { + 'android': undefined, + 'apple': '4e2e068cd281cfe924ff3174dfe363bd3108a5852ca5197f37c40c1bca6e1a4c' + }, + 'last_address': { + 'latitude': null, + 'longitude': null + }, + 'created_at': new Date() + }, + { + '_id': BSON.ObjectID('55581e0a9bf712d0c2b48d71'), + 'email': 'tupjud@weigehib.gov', + 'is_verified': false, + 'twitter_username': '@zaetisi', + 'name': 'Hunter Maxwell', + 'stats_friends': 2163, + 'apple_push_token': 'd4b4e7f3361cec05fae848575d7e6e1da2f0dccdf8ccc86a8ff2124d8b0542f6', + 'android_push_token': undefined, + 'last_address_latitude': null, + 'last_address_longitude': null, + 'created_at': new Date() + } + ]; + + before(function(done) { + users = getSchema('users', docs, done); + }); + it('should have the `apple_push_token` field', function() { + apple_push_token = users.fields.get('apple_push_token'); + assert(apple_push_token); + }); + it('should have seen `apple_push_token` 1 time', function() { + assert.equal(apple_push_token.count, 1); + }); + it('should have seen `apple_push_token` in 50% of documents', function() { + assert.equal(apple_push_token.probability, 0.5); + }); + it('should have seen `apple_push_token` 1 time as a string', function() { + assert.equal(apple_push_token.types.get('String').count, 1); + }); + it('should have seen 1 unique string value for `apple_push_token`', function() { + assert.equal(apple_push_token.types.get('String').unique, 1); + }); +}); diff --git a/test/mixed-type-nested.test.js b/test/mixed-type-nested.test.js new file mode 100644 index 0000000..e706386 --- /dev/null +++ b/test/mixed-type-nested.test.js @@ -0,0 +1,77 @@ +var getSchema = require('../'); +var assert = require('assert'); + +describe('mixed types nested', function() { + var docs = [ + { + _id: 1, + address: { + valid: 0 + } + }, + { + _id: 2, + address: { + valid: false + } + }, + { + _id: 3, + address: { + valid: 'None' + } + }, + { + _id: 4, + address: {} + }, + { + _id: 5, + address: { + valid: true + } + }, + ]; + + var schema; + var valid; + + before(function(done) { + schema = getSchema('contacts', docs, function(err) { + if (err) return done(err); + if (!schema.fields.get('_id')) { + return done(new Error('Did not pick up `_id` field')); + } + valid = schema.fields.get('address').fields.get('valid'); + if (!valid) { + return done(new Error('Did not pick up `address.valid` field')); + } + if (!valid.get('types').get('Undefined')) { + return done(new Error('Missing Undefined type on `address.valid`')); + } + done(); + }); + }); + + it('should see the `address` field is always present', function() { + assert.equal(schema.fields.get('address').probability, 1); + }); + it('should see the `valid` field in 80% of documents', function() { + assert.equal(schema.fields.get('address').fields.get('valid').probability, 0.8); + }); + it('should see there are 4 possible types for `valid`', function() { + assert.equal(schema.fields.get('address').fields.get('valid').get('types').length, 4); + }); + it('should see `Number` was used in 20% of documents', function() { + assert.equal(schema.fields.get('address').fields.get('valid').get('types').get('Number').probability, 0.2); + }); + it('should see `Boolean` was used in 40% of documents', function() { + assert.equal(schema.fields.get('address').fields.get('valid').get('types').get('Boolean').probability, 0.4); + }); + it('should see `Undefined` was used in 20% of documents', function() { + assert.equal(valid.get('types').get('Undefined').probability, 0.2); + }); + it('should see `String` was used in 20% of documents', function() { + assert.equal(schema.fields.get('address').fields.get('valid').get('types').get('String').probability, 0.2); + }); +}); diff --git a/test/mixed-type-probability.test.js b/test/mixed-type-probability.test.js new file mode 100644 index 0000000..6a5d0a4 --- /dev/null +++ b/test/mixed-type-probability.test.js @@ -0,0 +1,51 @@ +var getSchema = require('../'); +var assert = require('assert'); + +describe('mixed type probability', function() { + var docs = [ + { + _id: 1, + registered: 1 + }, + { + _id: 2, + registered: '1' + }, + { + _id: 3, + registered: true + }, + { + _id: 4 + } + ]; + + var schema; + before(function(done) { + schema = getSchema('probability', docs, function(err) { + if (err) return done(err); + if (!schema.fields.get('registered')) { + return done(new Error('Did not pick up `registered` field')); + } + if (!schema.fields.get('registered').types.get('Undefined')) { + return done(new Error('Missing Undefined type on `registered`')); + } + done(); + }); + }); + it('should have 4 types for `registered`', function() { + assert.equal(schema.fields.get('registered').types.length, 4); + }); + it('should have a probability of 25% for `registered` to be a boolean', function() { + assert.equal(schema.fields.get('registered').types.get('Boolean').probability, (1 / 4)); + }); + it('should have a probability of 25% for `registered` to be a number', function() { + assert.equal(schema.fields.get('registered').types.get('Number').probability, (1 / 4)); + }); + it('should have a probability of 25% for `registered` to be a string', function() { + assert.equal(schema.fields.get('registered').types.get('String').probability, (1 / 4)); + }); + it('should have a probability of 25% for `registered` to be undefined', function() { + assert.equal(schema.fields.get('registered').types.get('Undefined').probability, (1 / 4)); + }); +}); diff --git a/test/test.js b/test/test.js deleted file mode 100644 index c92b1f8..0000000 --- a/test/test.js +++ /dev/null @@ -1,229 +0,0 @@ -var getSchema = require('../'); -var assert = require('assert'); -var EJSON = require('mongodb-extended-json'); -var _ = require('lodash'); - -var FIXTURES = { - basic: { - users: EJSON.deflate(require('./fixture-basic-users.json')), - following: EJSON.deflate(require('./fixture-basic-following.json')) - }, - embedded_documents: { - users: EJSON.deflate(require('./fixture-embedded-documents-users.json')) - } -}; - -describe('mongodb-schema', function() { - describe('using only basic fields', function() { - var users; - it('should work', function() { - assert.doesNotThrow(function() { - users = getSchema('users', FIXTURES.basic.users); - }); - }); - it('should detect all fields', function() { - assert.equal(users.fields.length, 11); - - var field_ids = [ - '_id', - 'android_push_token', - 'apple_push_token', - 'created_at', - 'email', - 'is_verified', - 'last_address_latitude', - 'last_address_longitude', - 'name', - 'stats_friends', - 'twitter_username' - ]; - assert.deepEqual(users.fields.pluck('_id'), field_ids); - }); - - it('should detect the correct type for each field', function() { - assert.equal(users.fields.get('_id').type, 'ObjectID'); - assert.equal(users.fields.get('android_push_token').type, 'Undefined'); - assert.equal(users.fields.get('apple_push_token').type, 'String'); - assert.equal(users.fields.get('created_at').type, 'Date'); - assert.equal(users.fields.get('email').type, 'String'); - assert.equal(users.fields.get('is_verified').type, 'Boolean'); - assert.equal(users.fields.get('last_address_latitude').type, 'Null'); - assert.equal(users.fields.get('last_address_longitude').type, 'Null'); - assert.equal(users.fields.get('name').type, 'String'); - assert.equal(users.fields.get('stats_friends').type, 'Number'); - assert.equal(users.fields.get('twitter_username').type, 'String'); - }); - }); - - describe('using basic fields and embedded documents', function() { - var users; - it('should work', function() { - assert.doesNotThrow(function() { - users = getSchema('users', FIXTURES.embedded_documents.users); - }); - }); - - it('should detect all fields', function() { - assert.equal(users.fields.length, 8); - - var field_ids = [ - '_id', - 'created_at', - 'email', - 'last_address', - 'name', - 'push_token', - 'stats', - 'twitter' - ]; - assert.deepEqual(users.fields.pluck('_id'), field_ids); - }); - }); - - describe('embedded array of basic properties', function() { - var following; - it('should work', function() { - assert.doesNotThrow(function() { - following = getSchema('following', FIXTURES.basic.following); - }); - }); - // @todo: write more tests when not so tired... - }); - - describe('evolving schema', function() { - // The hardest case and really why this module exists at all: proper - // handling for polymorphic schemas. Consider the followi;ng scenario: - // - // 1. started out with schema in `only basic fields`. - // 2. then read a blog post about how awesome; embedded documents are. - // 3. then realized what a pain embedded documents are. - var users; - it('should work', function() { - assert.doesNotThrow(function() { - users = getSchema('users', _.union(FIXTURES.basic.users, FIXTURES.embedded_documents.users)); - }); - }); - it('should have the correct probabilities for a field that was moved', function() { - var apple_push_token = users.fields.get('apple_push_token'); - assert.equal(apple_push_token.count, 1); - assert.equal(apple_push_token.type, 'String'); - assert.equal(apple_push_token.types.get('String').count, 1); - assert.equal(apple_push_token.types.get('String').unique, 1); - assert.equal(apple_push_token.probability, 0.5, - '`apple_push_token` only appeared in 50% of documents but thinks it ' + - 'has a probability of ' + - (apple_push_token.probability * 100) + '%'); - }); - }); - - describe('simple probability', function() { - var docs = [ - { - _id: 1, - registered: true - }, - { - _id: 2 - } - ]; - - var schema; - it('should load the schema', function() { - assert.doesNotThrow(function() { - schema = getSchema('probability', docs); - }); - }); - it('should have a probability of 50% for `registered` to be a boolean', function() { - assert.equal(schema.fields.get('registered').types.get('Boolean').probability, 1 / 2); - }); - it('should have a probability of 50% for `registered` to be undefined', function() { - assert.equal(schema.fields.get('registered').types.get('Undefined').probability, 1 / 2); - }); - }); - - describe('mixed type probability', function() { - var docs = [ - { - _id: 1, - registered: 1 - }, - { - _id: 2, - registered: '1' - }, - { - _id: 3, - registered: true - }, - { - _id: 4 - } - ]; - - var schema; - it('should load the schema', function(done) { - assert.doesNotThrow(function() { - schema = getSchema('probability', docs, done); - }); - }); - it('should have 4 types for `registered`', function() { - assert.equal(schema.fields.get('registered').types.length, 4); - }); - it('should have a probability of 25% for `registered` to be a boolean', function() { - assert.equal(schema.fields.get('registered').types.get('Boolean').probability, (1 / 4)); - }); - it('should have a probability of 25% for `registered` to be a number', function() { - assert.equal(schema.fields.get('registered').types.get('Number').probability, (1 / 4)); - }); - it('should have a probability of 25% for `registered` to be a string', function() { - assert.equal(schema.fields.get('registered').types.get('String').probability, (1 / 4)); - }); - it('should have a probability of 25% for `registered` to be undefined', function() { - assert.equal(schema.fields.get('registered').types.get('Undefined').probability, (1 / 4)); - }); - }); - describe('unique', function() { - var docs = [ - { - _id: 1, - registered: true - }, - { - _id: 2, - registered: true - } - ]; - - var schema; - it('should load the schema', function() { - assert.doesNotThrow(function() { - schema = getSchema('probability', docs); - }); - }); - - it('should have count of 2 for `_id`', function() { - assert.equal(schema.fields.get('_id').count, 2); - }); - - it('should have unique of 2 for `_id`', function() { - assert.equal(schema.fields.get('_id').unique, 2); - assert.equal(schema.fields.get('_id').types.get('Number').unique, 2); - }); - - it('should not have duplicates for `_id`', function() { - assert.equal(schema.fields.get('_id').has_duplicates, false); - }); - - it('should have count of 2 for `registered`', function() { - assert.equal(schema.fields.get('registered').count, 2); - }); - - it('should have unique of 1 for `registered`', function() { - assert.equal(schema.fields.get('registered').unique, 1); - }); - - it('should have duplicates for `registered`', function() { - assert.equal(schema.fields.get('registered').has_duplicates, true); - }); - }); -}); From 4180c5b5dc0c521b55d1e73387ffdcf70ad9934b Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 14:02:00 -0400 Subject: [PATCH 53/79] yay. can now remove async and everything is fast again. --- lib/schema.js | 51 +++++++++++++++++++++------------------------------ package.json | 1 - 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index aeec63b..c596e00 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,6 +1,5 @@ var es = require('event-stream'); var _ = require('lodash'); -var async = require('async'); var Collection = require('./collection'); var State = require('./state'); var Type = require('./type'); @@ -187,18 +186,18 @@ FieldCollection.prototype.model = function(attrs, options) { return new attrs.klass(attrs, options); }; -function onFieldSampled(schema, _id, value, done) { +function onFieldSampled(schema, _id, value) { var type_id = Type.getNameFromValue(value); if (type_id === 'Array') { - onEmbeddedArray(schema, _id, type_id, value, done); + onEmbeddedArray(schema, _id, type_id, value); } else if (type_id === 'Object') { - onEmbeddedDocument(schema, _id, type_id, value, done); + onEmbeddedDocument(schema, _id, type_id, value); } else { - onBasicField(schema, _id, type_id, value, done); + onBasicField(schema, _id, type_id, value); } } -function onBasicField(schema, _id, type_id, value, done) { +function onBasicField(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { field = schema.fields.add({ @@ -220,11 +219,9 @@ function onBasicField(schema, _id, type_id, value, done) { type.values.add({ _id: value }); - - done(null, _id); } -function onEmbeddedArray(schema, _id, type_id, value, done) { +function onEmbeddedArray(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { @@ -236,22 +233,19 @@ function onEmbeddedArray(schema, _id, type_id, value, done) { } field.count += 1; - var tasks = []; - _.map(value, function(d) { + _.each(value, function(d) { var type_id = Type.getNameFromValue(d); if (type_id === 'Object') { - return _.map(d, function(val, key) { - tasks.push(onBasicField.bind(null, field, key, Type.getNameFromValue(val), val)); + _.each(d, function(val, key) { + onBasicField(field, key, Type.getNameFromValue(val), val); }); } else { - tasks.push(onBasicField.bind(null, field, '__basic__', type_id, d)); + onBasicField(field, '__basic__', type_id, d); } }); - - async.series(tasks, done); } -function onEmbeddedDocument(schema, _id, type_id, value, done) { +function onEmbeddedDocument(schema, _id, type_id, value) { var field = schema.fields.get(_id); if (!field) { @@ -262,9 +256,9 @@ function onEmbeddedDocument(schema, _id, type_id, value, done) { }); } field.count += 1; - async.series(_.map(value, function(val, key) { - return onFieldSampled.bind(null, field, key, val); - }), done); + _.each(value, function(val, key) { + onFieldSampled(field, key, val); + }); } var Schema = State.extend({ @@ -282,18 +276,15 @@ var Schema = State.extend({ fields: FieldCollection }, parse: function(doc, done) { - debug('parse'); var schema = this; schema.count += 1; - async.series(_.map(doc, function(val, key) { - return onFieldSampled.bind(null, schema, key, val); - }), function(err, res) { - schema.fields.map(function(field) { - field.commit(); - }); - debug('parse complete'); - done(err, res); - }); + _.each(doc, function(val, key) { + onFieldSampled(schema, key, val); + }); + schema.fields.map(function(field) { + field.commit(); + }); + done(); }, stream: function() { var schema = this; diff --git a/package.json b/package.json index c325138..8c5218b 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,6 @@ "ampersand-collection": "^1.4.5", "ampersand-collection-lodash-mixin": "^2.0.1", "ampersand-state": "^4.5.4", - "async": "^1.0.0", "bson": "^0.3.1", "debug": "^2.1.3", "event-stream": "^3.3.0", From 67976e8cf0514e7ac538d055d74a4e172a63c5ba Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 15:59:13 -0400 Subject: [PATCH 54/79] switch setTimeout -> raf lots of docs --- lib/schema.js | 77 ++++++++++++++++++++++++++++++++++++++++++++------- package.json | 3 +- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index c596e00..3f74cc2 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,5 +1,7 @@ var es = require('event-stream'); var _ = require('lodash'); +var raf = require('raf'); + var Collection = require('./collection'); var State = require('./state'); var Type = require('./type'); @@ -61,7 +63,7 @@ var Field = State.extend({ * http://spacetelescope.github.io/understanding-json-schema/reference/type.html */ type: { - deps: ['types'], + deps: ['types.length'], fn: function() { if (this.types.length === 0) { return undefined; @@ -95,12 +97,65 @@ var Field = State.extend({ fields: FieldCollection }, initialize: function() { - var field = this; - this.types.on('add', function(type) { - type.values.on('add', function(value) { - field.values.add(value); - }); - }); + this.listenTo(this.types, 'add', this.onTypeAdded); + this.listenTo(this.types, 'remove', this.onTypeRemoved); + this.listenTo(this.types, 'reset refresh', this.onTypeReset); + }, + /** + * When new types are added, trigger a change event to recalculate `this.type` + * and add listeners so any operations on `type.values` are relfected on + * `this.values`. + * + * @oaram {Type} type that's being added. + * @oaram {TypeCollection} collection the type was added to. + * @param {Object} options + */ + onTypeAdded: function(type) { + /** + * Currently have to manually trigger events on collections so + * derived properties are recalculated at the right time. + * In this case, triggering `change:types.length` will cause + * the `type` property to be recalculated correctly. + */ + this.trigger('change:types.length'); + this.listenTo(type.values, 'add', this.onValueAdded); + this.listenTo(type.values, 'remove', this.onValueRemoved); + this.listenTo(type.values, 'reset', this.onValueReset); + }, + /** + * @see Schema#onTypeAdded + * + * @oaram {Type} type being removed. + * @oaram {TypeCollection} collection it was removed from. + * @param {Object} options + */ + onTypeRemoved: function(type) { + this.trigger('change:types.length'); + this.stopListening(type.values, 'add', this.onValueAdded); + this.stopListening(type.values, 'remove', this.onValueRemoved); + this.stopListening(type.values, 'reset', this.onValueReset); + }, + onTypeReset: function() { + this.trigger('change:types.length'); + }, + /** + * @oaram {ValueCollection} collection the value was added to. + * @oaram {Value} value being added. + * @param {Object} options + */ + onValueAdded: function(collection, value) { + this.values.add(value); + }, + /** + * @oaram {ValueCollection} collection the value was removed from. + * @oaram {Value} value being removed. + * @param {Object} options + */ + onValueRemoved: function(collection, value) { + this.values.remove(value); + }, + onValueReset: function() { + this.values.reset(); }, _updateUndefined: function() { var newprob = this.count / this.parent.count; @@ -284,16 +339,18 @@ var Schema = State.extend({ schema.fields.map(function(field) { field.commit(); }); - done(); + if (_.isFunction(done)) { + done(); + } }, stream: function() { var schema = this; return es.map(function(doc, done) { - setTimeout(function() { + raf(function() { schema.parse(doc, function(err) { done(err, doc); }); - }, 500); + }); }); } }); diff --git a/package.json b/package.json index 8c5218b..227a30b 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,8 @@ "bson": "^0.3.1", "debug": "^2.1.3", "event-stream": "^3.3.0", - "lodash": "^3.8.0" + "lodash": "^3.8.0", + "raf": "^3.0.0" }, "devDependencies": { "mocha": "^2.0.1", From 662585869a68533eaf17f837f336ea1a89e646fb Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 15:59:43 -0400 Subject: [PATCH 55/79] add initial test for events make sure change:type fired correctly --- test/events.test.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 test/events.test.js diff --git a/test/events.test.js b/test/events.test.js new file mode 100644 index 0000000..3471100 --- /dev/null +++ b/test/events.test.js @@ -0,0 +1,18 @@ +var Schema = require('../').Schema; +var assert = require('assert'); + +describe('events', function() { + it('should fire a change:type event', function(done) { + var schema = new Schema(); + schema.fields.on('add', function(field) { + assert.equal(field.getId(), '_id'); + field.on('change:type', function(field, newType) { + assert.equal(newType, 'Number'); + done(); + }); + }); + schema.parse({ + _id: 1 + }); + }); +}); From f6ecc27d7a09a83689761b432c224c977f48a473 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 16:32:17 -0400 Subject: [PATCH 56/79] fix(schema): undefined values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test that values added to the field’s synched copy aren’t always undefined. Just an args out of order bug. HT @rueckstiess --- lib/schema.js | 4 ++-- test/values.test.js | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 test/values.test.js diff --git a/lib/schema.js b/lib/schema.js index 3f74cc2..a63ba66 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -143,7 +143,7 @@ var Field = State.extend({ * @oaram {Value} value being added. * @param {Object} options */ - onValueAdded: function(collection, value) { + onValueAdded: function(value) { this.values.add(value); }, /** @@ -151,7 +151,7 @@ var Field = State.extend({ * @oaram {Value} value being removed. * @param {Object} options */ - onValueRemoved: function(collection, value) { + onValueRemoved: function(value) { this.values.remove(value); }, onValueReset: function() { diff --git a/test/values.test.js b/test/values.test.js new file mode 100644 index 0000000..364e569 --- /dev/null +++ b/test/values.test.js @@ -0,0 +1,19 @@ +var Schema = require('../').Schema; +var assert = require('assert'); + +describe('values', function() { + it('should keep a simple collection in sync', function(done) { + var schema = new Schema(); + schema.fields.on('add', function(field) { + assert.equal(field.getId(), '_id'); + field.values.on('add', function(value, collection) { + assert.equal(value.getId(), 1); + assert.equal(value.value, 1); + done(); + }); + }); + schema.parse({ + _id: 1 + }); + }); +}); From 9284c3868369e6eeb841ea7d7d3f38023fcebf01 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 16:55:51 -0400 Subject: [PATCH 57/79] fix(schema): stub test for @rueckstiess --- ...sion-strings-have-same-probability.test.js | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test/regression-strings-have-same-probability.test.js diff --git a/test/regression-strings-have-same-probability.test.js b/test/regression-strings-have-same-probability.test.js new file mode 100644 index 0000000..e9e08cb --- /dev/null +++ b/test/regression-strings-have-same-probability.test.js @@ -0,0 +1,23 @@ +var getSchema = require('../'); +var assert = require('assert'); + +describe('regression', function() { + describe('strings have same probability', function() { + var docs = [ + // Add some object literals here + ]; + var schema; + before(function(done) { + schema = getSchema('probability', docs, function(err) { + if (err) return done(err); + done(); + }); + }); + + // Replace X, Y, and Z in `it` + it('should have a probability of `X%` for the field `Y` to be Z', function() { + // Replace X, Y, and Z below and uncomment it + // assert.equal(schema.fields.get('X').types.get('Y').probability, Z); + }); + }); +}); From 5d314cbb6056b37da30e7eaa483770d7e7afad9f Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 17:11:36 -0400 Subject: [PATCH 58/79] fix(schema): don't dedupe values. use cid instead --- lib/value.js | 5 ++++- test/events.test.js | 1 + ...sion-strings-have-same-probability.test.js | 21 ++++++++++++++----- test/values.test.js | 1 - 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/value.js b/lib/value.js index 4ff5629..735f5c4 100644 --- a/lib/value.js +++ b/lib/value.js @@ -11,6 +11,9 @@ module.exports = State.extend({ }, initialize: function(attrs) { this.value = attrs._id; - this._id = '' + attrs._id; + this._id = this.cid + '-' + attrs._id; + }, + valueOf: function() { + return this.value; } }); diff --git a/test/events.test.js b/test/events.test.js index 3471100..8f31ccc 100644 --- a/test/events.test.js +++ b/test/events.test.js @@ -5,6 +5,7 @@ describe('events', function() { it('should fire a change:type event', function(done) { var schema = new Schema(); schema.fields.on('add', function(field) { + assert.equal(field.getId(), '_id'); field.on('change:type', function(field, newType) { assert.equal(newType, 'Number'); diff --git a/test/regression-strings-have-same-probability.test.js b/test/regression-strings-have-same-probability.test.js index e9e08cb..a824ce5 100644 --- a/test/regression-strings-have-same-probability.test.js +++ b/test/regression-strings-have-same-probability.test.js @@ -4,8 +4,20 @@ var assert = require('assert'); describe('regression', function() { describe('strings have same probability', function() { var docs = [ - // Add some object literals here + { + _id: 1, + value: 'DUPE' + }, + { + _id: 2, + value: 'DUPE' + }, + { + _id: 3, + value: 'DUPE' + } ]; + var schema; before(function(done) { schema = getSchema('probability', docs, function(err) { @@ -14,10 +26,9 @@ describe('regression', function() { }); }); - // Replace X, Y, and Z in `it` - it('should have a probability of `X%` for the field `Y` to be Z', function() { - // Replace X, Y, and Z below and uncomment it - // assert.equal(schema.fields.get('X').types.get('Y').probability, Z); + + it('should not dedupe values but return all 3 of them', function() { + assert.equal(schema.fields.get('value').types.get('String').probability, 1); }); }); }); diff --git a/test/values.test.js b/test/values.test.js index 364e569..de68186 100644 --- a/test/values.test.js +++ b/test/values.test.js @@ -7,7 +7,6 @@ describe('values', function() { schema.fields.on('add', function(field) { assert.equal(field.getId(), '_id'); field.values.on('add', function(value, collection) { - assert.equal(value.getId(), 1); assert.equal(value.value, 1); done(); }); From 3146d0a3d80e2b1f539cad654df8138418012b58 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 17:27:18 -0400 Subject: [PATCH 59/79] fix(test): real assert --- test/regression-strings-have-same-probability.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/regression-strings-have-same-probability.test.js b/test/regression-strings-have-same-probability.test.js index a824ce5..690bbf7 100644 --- a/test/regression-strings-have-same-probability.test.js +++ b/test/regression-strings-have-same-probability.test.js @@ -28,7 +28,7 @@ describe('regression', function() { it('should not dedupe values but return all 3 of them', function() { - assert.equal(schema.fields.get('value').types.get('String').probability, 1); + assert.equal(schema.fields.get('value').types.get('String').values.length, 3); }); }); }); From 6a20cc888904762e50af6e6f8d703b5f4fa203eb Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Thu, 28 May 2015 17:46:47 -0400 Subject: [PATCH 60/79] fix sort order of types --- lib/type-collection.js | 5 ++++ test/mixed-type-order.test.js | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/mixed-type-order.test.js diff --git a/lib/type-collection.js b/lib/type-collection.js index 85f96a7..928c20c 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -10,5 +10,10 @@ module.exports = Collection.extend({ } return new Klass(attrs, options); + }, + comparator: function(a, b) { + if (a._id === 'Undefined') return 1; + if (b._id === 'Undefined') return -1; + return a.probability - b.probability; } }); diff --git a/test/mixed-type-order.test.js b/test/mixed-type-order.test.js new file mode 100644 index 0000000..72c27a0 --- /dev/null +++ b/test/mixed-type-order.test.js @@ -0,0 +1,43 @@ +var getSchema = require('../'); +var assert = require('assert'); +var _ = require('lodash'); + +describe('mixed type order', function() { + var docs = [ + { + _id: 1, + registered: 1 + }, + { + _id: 2 + }, + { + _id: 3, + registered: '1' + }, + { + _id: 4, + registered: '1' + } + ]; + + var schema; + before(function(done) { + schema = getSchema('type.order', docs, function(err) { + if (err) return done(err); + if (!schema.fields.get('registered')) { + return done(new Error('Did not pick up `registered` field')); + } + if (!schema.fields.get('registered').types.get('Undefined')) { + return done(new Error('Missing Undefined type on `registered`')); + } + done(); + }); + }); + it('should have 3 types for `registered`', function() { + assert.equal(schema.fields.get('registered').types.length, 3); + }); + it('should return the order of types as ["String", "Number", "Undefined"]', function() { + assert.deepEqual(schema.fields.get('registered').types.pluck('_id'), ['String', 'Number', 'Undefined']); + }); +}); From 4e97b4d289b3db3523f7e36abffcbc5c6d274732 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 18:05:30 -0400 Subject: [PATCH 61/79] 2.0.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 227a30b..dc06c3c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "1.0.0", + "version": "2.0.0", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", From 9fbf436c72a7eda6eb885c0c16143dc2aba0c5da Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 18:28:55 -0400 Subject: [PATCH 62/79] fix(schema): sort types, yo. --- lib/schema.js | 6 ++++++ lib/type-collection.js | 10 ++++++---- test/mixed-type-order.test.js | 11 ++++++----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index a63ba66..6d2c809 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -184,6 +184,11 @@ var Field = State.extend({ undef.count = (this.total - this.count); undef.probability = (undef.count - this.count); }, + /** + * We've finished parsing a new document! Finalize all of the probabilities + * and make sure all of our child collections are nicely sorted. + * If we have any subfields, call `commit()` on each of those as well. + */ commit: function() { this._updateUndefined(); this.types.map(function(type) { @@ -191,6 +196,7 @@ var Field = State.extend({ type.unique = _.unique(type.values.pluck('value')).length; }.bind(this)); this.unique = _.sum(this.types.pluck('unique')); + this.types.sort(); if (this.fields.length > 0) { this.fields.map(function(field) { diff --git a/lib/type-collection.js b/lib/type-collection.js index 928c20c..66c27aa 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -11,9 +11,11 @@ module.exports = Collection.extend({ return new Klass(attrs, options); }, - comparator: function(a, b) { - if (a._id === 'Undefined') return 1; - if (b._id === 'Undefined') return -1; - return a.probability - b.probability; + /** + * Sort by probability descending, with Undefined always last. + */ + comparator: function(model) { + if (model.getId() === 'Undefined') return 0; + return model.probability * -1; } }); diff --git a/test/mixed-type-order.test.js b/test/mixed-type-order.test.js index 72c27a0..ed2e906 100644 --- a/test/mixed-type-order.test.js +++ b/test/mixed-type-order.test.js @@ -4,10 +4,6 @@ var _ = require('lodash'); describe('mixed type order', function() { var docs = [ - { - _id: 1, - registered: 1 - }, { _id: 2 }, @@ -18,6 +14,10 @@ describe('mixed type order', function() { { _id: 4, registered: '1' + }, + { + _id: 1, + registered: 1 } ]; @@ -37,7 +37,8 @@ describe('mixed type order', function() { it('should have 3 types for `registered`', function() { assert.equal(schema.fields.get('registered').types.length, 3); }); - it('should return the order of types as ["String", "Number", "Undefined"]', function() { + it('should return the order of types as ["String", "Number", "Undefined"]', function(done) { assert.deepEqual(schema.fields.get('registered').types.pluck('_id'), ['String', 'Number', 'Undefined']); + done(); }); }); From 1974b13df0fa5296e5f47887880b9e759b47afe4 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 18:29:29 -0400 Subject: [PATCH 63/79] 2.0.1 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index dc06c3c..6f4c9ca 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "2.0.0", + "version": "2.0.1", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", From d0f8cf20bf4b45d3a9123aaeb9beb874a68bbf45 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 18:39:59 -0400 Subject: [PATCH 64/79] test for @rueckstiess to state what probabilities should be for embedded arrays --- test/basic-embedded-array.test.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/basic-embedded-array.test.js b/test/basic-embedded-array.test.js index 5fe9450..2257aff 100644 --- a/test/basic-embedded-array.test.js +++ b/test/basic-embedded-array.test.js @@ -11,7 +11,10 @@ describe('basic embedded array', function() { }, { '_id': BSON.ObjectID('55582407aafa8fbbc57196e2'), - 'following_ids': [BSON.ObjectID('55581e0a9bf712d0c2b48d71')] + 'following_ids': [ + BSON.ObjectID('55581e0a9bf712d0c2b48d71'), + '55581e0a9bf712d0c2b48d71' + ] }, ]; @@ -20,6 +23,14 @@ describe('basic embedded array', function() { following = getSchema('following', docs, done); }); + it('should have 33% String for following_ids', function() { + assert(following.fields.get('following_ids').fields.get('__basic__').types.get('String'), 1 / 3); + }); + + it('should have 66% ObjectID for following_ids', function() { + assert(following.fields.get('following_ids').fields.get('__basic__').types.get('ObjectID'), 2 / 3); + }); + it('should serialize correctly', function() { assert.doesNotThrow(function() { following.toJSON(); From 8953f6a3b7e11e1897e53c126429c807de7d9d52 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 19:27:09 -0400 Subject: [PATCH 65/79] fix(schema): fix embedded array probability --- lib/schema.js | 69 +++++++++++++++++++------------ test/basic-embedded-array.test.js | 22 ++++++++-- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/lib/schema.js b/lib/schema.js index 6d2c809..c5779db 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -78,6 +78,10 @@ var Field = State.extend({ deps: ['count', 'probability'], fn: function() { if (this.probability === 1) return this.count; + var parentIsArray = this.collection.parent.lengths !== undefined; + if (parentIsArray) { + return _.sum(this.types.pluck('count')); + } return (this.count / this.probability); } }, @@ -157,40 +161,35 @@ var Field = State.extend({ onValueReset: function() { this.values.reset(); }, - _updateUndefined: function() { - var newprob = this.count / this.parent.count; + /** + * We've finished parsing a new document! Finalize all of the probabilities + * and make sure all of our child collections are nicely sorted. + * If we have any subfields, call `commit()` on each of those as well. + */ + commit: function() { + var newprob; + var parentIsArray = this.collection.parent.lengths !== undefined; + newprob = this.count / this.parent.count; if (newprob !== this.probability) { this.probability = newprob; } var undef = this.types.get('Undefined'); - if ((this.total - this.count) <= 0) { - if (undef) { - debug('removing extraneous Undefined for `%s`', this.getId()); - this.types.remove({ - _id: 'Undefined' + if ((this.total - this.count) <= 0 && undef) { + debug('removing extraneous Undefined for `%s`', this.getId()); + this.types.remove({ + _id: 'Undefined' + }); + } else { + if (!undef) { + debug('adding Undefined for `%s`', this.getId()); + undef = this.types.add({ + _id: 'Undefined', + unique: 1 }); } - return; + undef.count = (this.total - this.count); + undef.probability = (undef.count - this.count); } - - if (!undef) { - debug('adding Undefined for `%s`', this.getId()); - undef = this.types.add({ - _id: 'Undefined', - unique: 1 - }); - } - - undef.count = (this.total - this.count); - undef.probability = (undef.count - this.count); - }, - /** - * We've finished parsing a new document! Finalize all of the probabilities - * and make sure all of our child collections are nicely sorted. - * If we have any subfields, call `commit()` on each of those as well. - */ - commit: function() { - this._updateUndefined(); this.types.map(function(type) { type.probability = type.count / this.total; type.unique = _.unique(type.values.pluck('value')).length; @@ -230,6 +229,20 @@ var EmbeddedArrayField = Field.extend({ type: { type: 'string', default: 'Array' + }, + lengths: { + type: 'array', + default: function() { + return []; + } + } + }, + derived: { + average_length: { + deps: ['lengths'], + fn: function() { + return _.sum(this.lengths) / this.lengths.length; + } } } }); @@ -294,6 +307,8 @@ function onEmbeddedArray(schema, _id, type_id, value) { } field.count += 1; + field.lengths.push(value.length); + field.trigger('change:lengths'); _.each(value, function(d) { var type_id = Type.getNameFromValue(d); if (type_id === 'Object') { diff --git a/test/basic-embedded-array.test.js b/test/basic-embedded-array.test.js index 2257aff..cd16420 100644 --- a/test/basic-embedded-array.test.js +++ b/test/basic-embedded-array.test.js @@ -1,9 +1,11 @@ var getSchema = require('../'); var assert = require('assert'); var BSON = require('bson'); +var _ = require('lodash'); describe('basic embedded array', function() { var following; + var following_ids; var docs = [ { '_id': BSON.ObjectID('55581e0a9bf712d0c2b48d71'), @@ -20,15 +22,29 @@ describe('basic embedded array', function() { ]; before(function(done) { - following = getSchema('following', docs, done); + following = getSchema('following', docs, function() { + following_ids = following.fields.get('following_ids').fields.get('__basic__'); + done(); + }); + }); + it('should have 2 lengths for following_ids', function() { + assert.deepEqual(following.fields.get('following_ids').lengths, [1, 2]); + }); + + it('should have an average length of 1.5 for following_ids', function() { + assert.equal(following.fields.get('following_ids').average_length, 1.5); + }); + + it('should have a sum of probability for following_ids of 1', function() { + assert.equal(_.sum(following_ids.types.pluck('probability')), 1); }); it('should have 33% String for following_ids', function() { - assert(following.fields.get('following_ids').fields.get('__basic__').types.get('String'), 1 / 3); + assert.equal(following_ids.types.get('String').probability, 1 / 3); }); it('should have 66% ObjectID for following_ids', function() { - assert(following.fields.get('following_ids').fields.get('__basic__').types.get('ObjectID'), 2 / 3); + assert.equal(following_ids.types.get('ObjectID').probability, 2 / 3); }); it('should serialize correctly', function() { From 2932251c46ebda539c593b5cff45e07df885da8b Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 19:31:51 -0400 Subject: [PATCH 66/79] 2.1.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6f4c9ca..7adb621 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "2.0.1", + "version": "2.1.0", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", From b7920589904d21fa7af02b1fd3ce95e4e34838fe Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 28 May 2015 19:41:37 -0400 Subject: [PATCH 67/79] update todos --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 170bcbe..e439e77 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,6 @@ Infer probabilistic schema of javascript objects or a MongoDB collection. ## Todo -- [ ] fix probabilities for "evolving case" - ### Punted - [ ] update bin/mongodb-schema.js to do something real From d65ca1135c75a8b04f9b94452874019f5e3e898c Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Tue, 9 Jun 2015 13:29:52 -0400 Subject: [PATCH 68/79] fix(type): Add Long type closes #18 --- lib/type.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/type.js b/lib/type.js index fb416b2..8113e62 100644 --- a/lib/type.js +++ b/lib/type.js @@ -58,6 +58,14 @@ exports.Number = Type.extend({ } }); +exports.Long = Type.extend({ + props: { + _id: { + default: 'Long' + } + } +}); + exports.Null = Type.extend({ props: { _id: { From 5537c9abf826f8bdae112a77eb026e3199038911 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Tue, 9 Jun 2015 13:30:15 -0400 Subject: [PATCH 69/79] 2.1.1 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 7adb621..74aea30 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mongodb-schema", "description": "Infer the probabilistic schema for a MongoDB collection.", - "version": "2.1.0", + "version": "2.1.1", "author": "Thomas Rueckstiess ", "license": "MIT", "homepage": "http://github.com/mongodb-js/mongodb-schema", From c52a58ffd6eb711c9258043b06fefa25d482af49 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Tue, 16 Jun 2015 17:32:10 -0400 Subject: [PATCH 70/79] fix(project): license -> apache 2 --- LICENSE | 222 ++++++++++++++++++++++++++++++++++++++++++++++----- package.json | 2 +- 2 files changed, 202 insertions(+), 22 deletions(-) diff --git a/LICENSE b/LICENSE index a346f50..8dada3e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,201 @@ -The MIT License (MIT) - -Copyright (c) 2015 Thomas Rueckstiess - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/package.json b/package.json index 74aea30..0c26802 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "description": "Infer the probabilistic schema for a MongoDB collection.", "version": "2.1.1", "author": "Thomas Rueckstiess ", - "license": "MIT", + "license": "Apache2", "homepage": "http://github.com/mongodb-js/mongodb-schema", "repository": { "type": "git", From bc06be9cbd60b8ca47140cb760fec0464c57a4cf Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 11:15:26 -0400 Subject: [PATCH 71/79] fix(schema): more docs + fix perf --- README.md | 28 +++++++++++--- lib/schema.js | 103 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 101 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index e439e77..1ce2b76 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,28 @@ # mongodb-schema -Infer probabilistic schema of javascript objects or a MongoDB collection. +[![build status](https://secure.travis-ci.org/mongodb-js/mongodb-schema.png)](http://travis-ci.org/mongodb-js/mongodb-schema) +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -## Todo +Infer a probabilistic schema for a MongoDB collection. -### Punted +## Example -- [ ] update bin/mongodb-schema.js to do something real -- [ ] http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#enumerated-values +```javascript + +``` + +## Installation + +``` +npm install --save mongodb-schema +``` + +## Testing + +``` +npm test +``` + +## License + +Apache 2.0 diff --git a/lib/schema.js b/lib/schema.js index c5779db..df0f9eb 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -2,14 +2,26 @@ var es = require('event-stream'); var _ = require('lodash'); var raf = require('raf'); +var debug = require('debug')('mongodb-schema'); + var Collection = require('./collection'); var State = require('./state'); var Type = require('./type'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); -var debug = require('debug')('mongodb-schema'); -var FieldCollection = Collection.extend({}); +/** + * Container for a list of Fields. + */ +var FieldCollection = Collection.extend({ + model: function(attrs, options) { + return new attrs.klass(attrs, options); + } +}); + +/** + * Like a property. + */ var Field = State.extend({ props: { /** @@ -26,18 +38,20 @@ var Field = State.extend({ type: 'number', default: 0 }, + /** + * Probability of this field being set. + */ probability: { type: 'number', default: 0 }, + /** + * Number of unique values seen. + */ unique: { type: 'number', default: 0 }, - /** - * Title, description and default from JSON Schema: - * http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata - */ /** * If using shortened keys to save space, it is expected this be the "real" * name of the field that could be input by the user. For example, @@ -50,7 +64,16 @@ var Field = State.extend({ return this._id; } }, + /** + * Place holder for future annotation of the default value for this field. + * @see http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata + */ default: 'any', + /** + * User input annotation about what this field does. + * + * @see http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata + */ description: 'string', }, session: { @@ -60,7 +83,7 @@ var Field = State.extend({ /** * The most common type seen for this field. * - * http://spacetelescope.github.io/understanding-json-schema/reference/type.html + * @see http://spacetelescope.github.io/understanding-json-schema/reference/type.html */ type: { deps: ['types.length'], @@ -74,6 +97,9 @@ var Field = State.extend({ return this.types.pluck('_id'); } }, + /** + * The total number of documents we would see if always set. + */ total: { deps: ['count', 'probability'], fn: function() { @@ -85,6 +111,11 @@ var Field = State.extend({ return (this.count / this.probability); } }, + /** + * Does this field contain any duplicate values? + * @returns {Boolean} + * @prop + */ has_duplicates: { deps: ['unique', 'count'], fn: function() { @@ -93,13 +124,22 @@ var Field = State.extend({ } }, collections: { + /** + * Types seen for this field. + */ types: TypeCollection, /** * A sample of values seen for this field. */ values: ValueCollection, + /** + * Child fields. + */ fields: FieldCollection }, + /** + * @constructs Field + */ initialize: function() { this.listenTo(this.types, 'add', this.onTypeAdded); this.listenTo(this.types, 'remove', this.onTypeRemoved); @@ -110,8 +150,8 @@ var Field = State.extend({ * and add listeners so any operations on `type.values` are relfected on * `this.values`. * - * @oaram {Type} type that's being added. - * @oaram {TypeCollection} collection the type was added to. + * @param {Type} type that's being added. + * @param {TypeCollection} collection the type was added to. * @param {Object} options */ onTypeAdded: function(type) { @@ -129,8 +169,8 @@ var Field = State.extend({ /** * @see Schema#onTypeAdded * - * @oaram {Type} type being removed. - * @oaram {TypeCollection} collection it was removed from. + * @param {Type} type being removed. + * @param {TypeCollection} collection it was removed from. * @param {Object} options */ onTypeRemoved: function(type) { @@ -143,16 +183,16 @@ var Field = State.extend({ this.trigger('change:types.length'); }, /** - * @oaram {ValueCollection} collection the value was added to. - * @oaram {Value} value being added. + * @param {ValueCollection} collection the value was added to. + * @param {Value} value being added. * @param {Object} options */ onValueAdded: function(value) { this.values.add(value); }, /** - * @oaram {ValueCollection} collection the value was removed from. - * @oaram {Value} value being removed. + * @param {ValueCollection} collection the value was removed from. + * @param {Value} value being removed. * @param {Object} options */ onValueRemoved: function(value) { @@ -168,17 +208,19 @@ var Field = State.extend({ */ commit: function() { var newprob; - var parentIsArray = this.collection.parent.lengths !== undefined; newprob = this.count / this.parent.count; if (newprob !== this.probability) { this.probability = newprob; } var undef = this.types.get('Undefined'); - if ((this.total - this.count) <= 0 && undef) { - debug('removing extraneous Undefined for `%s`', this.getId()); - this.types.remove({ - _id: 'Undefined' - }); + if ((this.total - this.count) <= 0) { + if(undef){ + debug('removing extraneous Undefined for `%s`', this.getId()); + this.types.remove({ + _id: 'Undefined' + }); + } + // No undefined types to manage') } else { if (!undef) { debug('adding Undefined for `%s`', this.getId()); @@ -221,9 +263,16 @@ var Field = State.extend({ /** * A basic field has no descendant fields, such as `String`, `ObjectID`, * `Boolean`, or `Date`. + * @class + * @implements Field */ var BasicField = Field.extend({}); +/** + * An embedded array of Fields. + * @class + * @implements Field + */ var EmbeddedArrayField = Field.extend({ props: { type: { @@ -247,6 +296,10 @@ var EmbeddedArrayField = Field.extend({ } }); +/** + * @class + * @implements Field + */ var EmbeddedDocumentField = Field.extend({ props: { type: { @@ -256,10 +309,6 @@ var EmbeddedDocumentField = Field.extend({ } }); -FieldCollection.prototype.model = function(attrs, options) { - return new attrs.klass(attrs, options); -}; - function onFieldSampled(schema, _id, value) { var type_id = Type.getNameFromValue(value); if (type_id === 'Array') { @@ -337,6 +386,10 @@ function onEmbeddedDocument(schema, _id, type_id, value) { }); } +/** + * The top level schema state. + * @class + */ var Schema = State.extend({ idAttribute: 'ns', props: { From d64bb524dc4b0c8c91ca0ab8994cb082950c1f79 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 11:22:53 -0400 Subject: [PATCH 72/79] working on examples --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ce2b76..5654291 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,25 @@ Infer a probabilistic schema for a MongoDB collection. ## Example ```javascript - +var schema = require('mongodb-schema')(); +var connect = require('mongodb'); + +var parser = schema.stream() + .on('error', function(err){ + console.error('Error parsing schema: ', err); + }) + .on('data', function(doc){ + console.log('schema updated for doc', doc); + }) + .on('end', function(){ + console.log('schema looks like:', schema); + }); + +connect('mongodb://localhost:27017/test', function(err, db){ + if(err) return console.error(err); + + db.test.find().stream().pipe(parser); +}); ``` ## Installation From 85bfbaa19182b3e0116d5ecbea5833be3f42df87 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 15:26:57 -0400 Subject: [PATCH 73/79] just lots of cleanup and docs --- .coveralls.yml | 1 + .travis.yml | 2 + README.md | 120 ++++++++-- docs/mongodb-schema_diagram.md | 36 +++ docs/mongodb-schema_diagram.png | Bin 0 -> 71622 bytes docs/new_schema_repr.js | 252 --------------------- docs/schema_spec.md | 387 -------------------------------- examples/parse-schema.js | 13 ++ examples/scout.js | 52 ----- examples/simple.js | 24 -- lib/field-collection.js | 11 + lib/{schema.js => field.js} | 162 +------------ lib/index.js | 28 ++- lib/parser.js | 82 +++++++ lib/type-collection.js | 7 +- lib/value.js | 3 - package.json | 7 +- 17 files changed, 278 insertions(+), 909 deletions(-) create mode 100644 .coveralls.yml create mode 100644 docs/mongodb-schema_diagram.md create mode 100644 docs/mongodb-schema_diagram.png delete mode 100644 docs/new_schema_repr.js delete mode 100644 docs/schema_spec.md create mode 100644 examples/parse-schema.js delete mode 100644 examples/scout.js delete mode 100755 examples/simple.js create mode 100644 lib/field-collection.js rename lib/{schema.js => field.js} (68%) create mode 100644 lib/parser.js diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..aa4361b --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1 @@ +repo_token: zcUOJV55Tjfuf82xwdXBlaztt3H1RawBf diff --git a/.travis.yml b/.travis.yml index 12476a9..be3513f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,5 @@ notifications: on_success: change on_failure: always on_start: false +script: "npm run-script ci" +after_script: "cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js" diff --git a/README.md b/README.md index 5654291..477c9ea 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,98 @@ # mongodb-schema [![build status](https://secure.travis-ci.org/mongodb-js/mongodb-schema.png)](http://travis-ci.org/mongodb-js/mongodb-schema) +[![Coverage Status](https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg)](https://coveralls.io/r/mongodb-js/mongodb-schema) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) Infer a probabilistic schema for a MongoDB collection. ## Example -```javascript -var schema = require('mongodb-schema')(); -var connect = require('mongodb'); - -var parser = schema.stream() - .on('error', function(err){ - console.error('Error parsing schema: ', err); - }) - .on('data', function(doc){ - console.log('schema updated for doc', doc); - }) - .on('end', function(){ - console.log('schema looks like:', schema); +`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples so we'll install the node.js driver. As well, we'll need some data +in a collection to derive the schema of: + +1. `npm i mongodb mongodb-schema`. +2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test` +3. Paste the below into `parse-schema.js`: + ```javascript + var parseSchema = require('mongodb-schema'); + var connect = require('mongodb'); + + connect('mongodb://localhost:27017/test', function(err, db){ + if(err) return console.error(err); + + parseSchema('test.test', db.collection('test').find(), function(err, schema){ + if(err) return console.error(err); + + console.log(JSON.stringify(schema, null, 2)); + db.close(); + }); }); + ``` +4. When we run the above with `node parse-schema.js`, we'll see something + like the following: + ```json + { + "//": "The number of documents sampled", + "count": 4, + "//": "A collection of Field objects", + "//": "@see lib/field.js", + "fields": [ + { + "_id": "_id", + "//": "Just as we expected, all 4 documents had `_id`", + "probability": 1, + "//": "All 4 values for `_id` were unique", + "unique": 4, + "//": "The only type seen was a Number", + "types": [ + { + "_id": "Number", + "probability": 1, + "unique": 4 + } + ] + }, + { + "_id": "a", + "//": "Unlike `_id`, `a` was present in only 3 of 4 documents", + "probability": 0.75, + "//": "Of the 3 values seen, all 3 were unique", + "unique": 3, + "//": "As expected, Boolean, String, and Number values were seen", + "//": "A handy instance of `Undefined` is also provided to represent missing data", + "types": [ + { + "_id": "Boolean", + "probability": 0.25, + "unique": 1 + }, + { + "_id": "String", + "probability": 0.25, + "unique": 1 + }, + { + "_id": "Number", + "probability": 0.25, + "unique": 1 + }, + { + "_id": "Undefined", + "probability": 0.25, + "unique": 0 + } + ] + } + ] + } + ``` -connect('mongodb://localhost:27017/test', function(err, db){ - if(err) return console.error(err); +### More Examples + +`mongodb-schema` is quite powerful and supports all [BSON types][bson-types]. +Checkout [the tests][tests] for more usage examples. - db.test.find().stream().pipe(parser); -}); -``` ## Installation @@ -44,3 +109,22 @@ npm test ## License Apache 2.0 + +## Contributing + +Under the hood, `mongodb-schema` uses [ampersand-state][ampersand-state] and +[ampersand-collection][ampersand-collection] for modeling [Schema][schema], [Field][field]'s, and [Type][type]'s. + +A high-level view of the class interactions is as follows: + +![](./docs/mongodb-schema_diagram.png) + + + +[bson-types]: http://docs.mongodb.org/manual/reference/bson-types/ +[ampersand-state]: http://ampersandjs.com/docs#ampersand-state +[ampersand-collection]: http://ampersandjs.com/docs#ampersand-collection +[tests]: https://github.com/mongodb-js/mongodb-schema/tree/master/test +[schema]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/schema.js +[field]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/field.js +[type]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/type.js diff --git a/docs/mongodb-schema_diagram.md b/docs/mongodb-schema_diagram.md new file mode 100644 index 0000000..0205032 --- /dev/null +++ b/docs/mongodb-schema_diagram.md @@ -0,0 +1,36 @@ +``` +[FieldCollection] +[Schema|ns: String The collection's namespace|parse(doc) Figure out the the fields for this sampled doc|fields: FieldCollection] --> [mongodb-schema#field] + +[ mongodb-schema#field| + [Field| + _id: String; + count: Number; + probability: Number; + unique: Number; + title: String; + default: *; + description: String + | + values: ValueCollection + | + types: TypeCollection + | + fields: FieldCollection + ] + [Field]-->[Field#values] + [BasicField] -:> [Field] + [EmbeddedDocumentField]-:> [Field] + [EmbeddedArrayField|lengths: int;average_length: int]-:> [Field] + + [Field#values| + [ValueCollection]+-> 1..*[Value|_id: String|value: *] + ] + [Field#values] o-> [Field#types] + [Field#types| + [Type|_id: String;count: Number;probability: Number;unique: Number|values: ValueCollection] + [TypeCollection] + [TypeCollection]+-> 1..*[Type] + ] +] +``` diff --git a/docs/mongodb-schema_diagram.png b/docs/mongodb-schema_diagram.png new file mode 100644 index 0000000000000000000000000000000000000000..c7003b46ffa0c016c59f5c18899977dc1920b274 GIT binary patch literal 71622 zcmZVmbyQZ(`v(e(ARyh{DM|{`-Q7roba%H1h;)Z^mr_#F4bpIvQqo9w*O}Yr`+MKB z)>->6cHJ|xXRf(EaS^7hD20kdg!Js$GgKLAan)zfU^1UQgS9|}2WNI=Le8H(qj)AG zE~4&fu&s4%5aIUuIR6xhkvBXL8KvkF3T$e zU*}$c_}`g=e4l?0!AMXHU?@Xm-j>u+|MwagTGA!he-FKSF7x?gJlaQVjn|5y*gCX? zyYq42T?Gnod3l@%e2yFWwLCZUHSZxX%5~Pg3W^;FR$kP97jeNmtuW>qo*dHzJtogL zqL|EM6_K|;Tkb5rvU1P5%mYoZf)5l6jp_VW**>f(LqEdkvyZI-nNY|9m%_q6#*YU4 zEh{i7GEkGDXEWrp)5Vl-MtrsNYDawG;q^|zQn#>01tQH0&?V?0b5dkQ4jN4H@0M3P ze~qB8fi^uF9E$O+_6!aAYQ>{#k3k5pG=69eW(G$uQi4vafWzsFCfg2z)Ap#od?p+E zP@8sy{MWZ#v(0?V-Zg`2uwmihqDTH>C$l^3sl+^JJCH)T+fM74Qg|>*;K)=z<6RKM z#2P4>z^*o5qN}z4O|U&?D8Ag{HC|+95>dd;=*U>{=NchUthLi3$fPm#Kd>vyn}Up za3!<~$skAP!b#Hg>_PdiQ=^h~wA_no==&&Sd+kqLq?jFpRGE|%5OChf=h>=53l~Mi zqB{L^qFcFTE<)>S3GXIqGvt2Er04kYmFmwAM0#}LP$Dt(9R&FsssRa2FB zg!8qwpT29?2qjnaMx@p|@dg;+qu0@Ye=;>FuDavUSMjo9oTl^;8}=jv0r! zPrrUgV6xVdD`s#W#>AEyYr0@w+|0K8;YS^6k}852$$! ze{(7RD^;dMYgHre$9-r>-usc5mBX!gNlh9%dENhp$*yG&Z`wSa^GMv$67KL%z(@QD zoUT-ifLgmhuot_&@G0ho1MG?6B?fm7G1#eZDGBHmvp)Ou3lb3i4c&Y*^I@o7-_d^N zSqig0=73pG)gyPnDppsYQg_SO)$8r-bj!_o(ntT+9&_=Rd-HX~1nkzV_J0~9x}~C5 z&v#t3ewxxEwZt0wYhLK@E+zgq;YzO_>aEOal9&a2o$kjHs0v+o1v{vAGWY^i5Lc&m zY8(&7hs1e>_LtE;H1Xq1-_co$;48z5NDP7nqIX^R zc8lI?^A~%T3uO##`ZX}Wf0%1Ps;#C~?P>q}I*E6WeLfpkhf6_n>CBGyx45T5+4#9e z2~J1Le&ULxQPKLh$e8E5tASDJ_4yKKpS7wiLog)dAvgBxc-6nJJw3YRSn&Gd@`cu0 zi5!g1lF75EGF7PWk9BS|Sxv7k^?rJ4XJBRMdi3>^w?@oo2Sk$j!3)LJt3$%Bnu>xE zl3WwPq1u&hz0IcD^p)F;6OZZUy-pEkd%z#&S69136Kw=zSpvj#QRIPP2^K;UyfmTD zU!3MUJQ-b|^)Oh4URWrQN0cwPf0Yh3a46Sr?xr9WgAfWhW6&vN2KCBm4%CDow`eVo=HWFy%%(;2AV?u@;{I!hh zO@>^r8K<2I-xlEC#W6knhE&Wr$%FU@JxI=$rc@Z7mty_jlD)CsE5L#27yoY=_Oty_ zAWfo*eU15FK#)nQtXGu9bo}pef?`V~L_T!Z$*?dV1uH+2wbVvRAcZ?^wnV(=^W zwSzA18N6n2FPcHfDx!DjP%F3YZHL*YMoXFb7qcmhKv<$dX;(jStt-Q2fxTwAH7QF? zQWOOtn*d`43~=shMdD*9pOG3?_VU|59^r7Z|@ijC@7nKR^pBNQG}N% zgRrSBPT(r7z7QJ6g=@dyzux)#Ss5}R@Go?RfAp?=K&X`EeZ51oJ5&_)=6U!tbqL!6 z9CRqX>7b8LQRl=*%koatr_Q@#-9$}p;RbR}n0l9j=Q{I)i{P-?Bf1nPHW->CNxqB&9U2l~ycpk67obspKl{UnUp%4*$!s;~?eUozSHFeTq5x%+Yye+vZ`oeMV z(k|nEkAv6UGAij?TaZnlWW%w8dwin%Lv$tS*M@y=wKb#FD=c*w)~SQMX109nb4iGz zboIVOy_&??bnJ6S+^Q&iWwIZSf!)F>#>;$<_S~E|w{Og3^Sr((#uM=}L#De!(N(_L zBQt&#ddcIm)ln#?>3DLQ z6|eEx3*@$su@pj$UM|#Lckmx&I`0uKyL27YJG*+{oE-8wX{hvj{$ApPxKzks_jnb>vIMusI4V*)Bc8&iFM*JZ>HC ze(DNAUIC$4H9LMSwTj^srGNKmR*t6OfbYt9a*H&+#BT3$>F>q_kz4!su)C`xlV8Vd z1RD$3niYBy0qw#Ak2{Os10yG-3;J-T#bEn#L~nj*)t>A68k~u_?(M%$9nETnV>f4t zKXJXZjo_^l>byZtDG)p{kyM6mB{~@EEna7-m&mhaN>91hcSkmF$&{{=Aw`rgT+kAz zKNZ^5_csn_w#ye7A1!CnYG)KaY6r|m-lR5vXq+Mxa{nn^KQl97PhZAqiu*6(q|1b= ziVnH!hnV`dG*dGArjaZ%kF<;0G>{$d&X7qU;dg=fr<`{k;b=AaLhzv}*EmE|!LKT6 z#(rtj@n~rh`)Jk^d03UlYgccv(FcEoXcpFvY(KOeopcJ}1@cB{1DFsZEGf*Gdj#`7 zmy*calAM)85YR|_3yv|Vw0meWiJumO)1@5b3GhEIHad5X)Zl+cJ$!?}I_;F( z85Aq%$GgAmgK^aM0MF~X8xcqCt5TpWV*C3Nmf5f|oRf)|n!+-JWaYuzqpP!9itVlI z^DW8GCU>I(1n<1_zT<18sF$8x=`}k4BH<1Y)Xno$;V3ag?vH*5A50odmk!-n*_uF( zBASvsU=ftOGJrKxF!U5mD=`C6?#LWS-BK@JCZilA9|!AwM`}% zkITX6`XyE*jw)njv9@hLs)BT3;P!k+2<6?f7pz08wnkMtJ2i@CrM}c?#%~Zl^wpjW zrz=AQVL#`N;%Bc!=#cB~2b|65fAYHiYXnWx;+Xw-7n|JfzV2CGBu*i@OF%c9esc&L z+CE`|m^f4IQwJtUILU|I#?l`K+DAK@J?Ot#zh3^^uKz&VMK8Vv(yQ)fB@1YZ_p8kA z8y)LyZg98mYjjB$hQIz)`{j&dfAlMpWh!enf%IGc1}&hmn#sf)2yMp*x-D) znzpf=C_>&ELBX$E7)ft9alf$kbbJytAJKn}@GTdM&;Opr`kGyV@^HfIIQU++Z-d>Wr)W$;Qi|Uk&2G55l zC0qp6Ly79MWz@DQG)GA+Pu=w6hNT@X#tb?NN^3%fzhtXq9}v8jFyNQYh^xU1D-e&* z!x4YCdW)(nJ}o1>O^MA`RlRGZDk*P~LZk)LT)_ln$9h2lOOod%~ei*9Lf+t1&)jl=C zO`pQT=7*e-Zf?C|1(Y~qFL_8c2mErh&*e$~gh~4^-`(k(;esF4lDzP_Cm%R&v#>BD zSH@rF5&0J1?&au2b6q-Dw%Mv8>%8sjzmLKq?I-4;oub+~HgEGjko?))_={4pPIX|d zx5{b=EknSAGA$}19`_|1-42JH9=?J;KbxN_;^~&3;_>cyBROwVL%djekN8`zhBsg3 zvU@@|MBcqWNeQQNOl^>%wQB22Z2RllIVAGHo1H7Mz_Ry!S`!lhQ@o>YSBxR%cl+I~)?3#4MKe`SL4HBP2goYr^>$eM#$}85!PQ)&O}1yD ziqO-gu7*mEu)i94^A(BsCCP8)CR4Wk-#*f|ge?+Be2(J#TmT12WNs`r)eMZ=Bkvq{ z6qbM9B`aHzP4;}*Yv6st;PF8&KAIMi8yE09+-B zV_0gzB_o8gem}xSN-Cm4;9&zRmgfu`m#VPQzYT^i6jrP$FIYAdMb=eaz0v_9 zbSj2nhoAr(6Ypc+2p`1YT z5vW{>h=|ynTkg4{7HJkTh4!)W9q@>1N-wFjwbNh^G}U=V(wcjMU!wl4HBs4= z82A)Yu_XUp%H%Ovrl!YSR7sHNmZ6uxT7c{aqyIy5eqXc0AZd&B*3s<+G2|(BHFvEXrw_-ZBgKcfC)n(Qn z<(>;t#kRcG8<9pL`|2VRne_E8?|k*o?7yAA-XS8NNX>lLX1dH46odcia~aqAtE*M( zhdA3e5RnT&LE3Gq~W}zgr3G4hgW2=*~3t}Ke@|Git;s4?g`JM;>(UILc;bqj1fB) zNpjbzocy4|b^#lbWdwM1dK4s z-Rc3l+)D33DT+3T!(g`8Zbl$9qc5Y%oVrQd{f-kj74$zgWY;U(jV}CS_rD*)wo7>b zO~>iYMw~(>#0?)Dl}z)OjPlnw?De;QJGg+Hti|kMBw20>ZLnOxF#R+qW#UC%DhI1n&W?E!|a9E8=^Ps#D4IF`b z3g(VRE1gPCt!0z-Kbk2v6|sc6GvM#*oo9fC8lF{Hcn_#9D7Rwi06Ia21O;5F|D$$j zwup=D0H_h>_#e1CSN=cZ3w%r?nNXvHMiC+3rvpos1j2~Do&mLZE_&vPL>B#~PePCU zVY?2JN=BKc4!UpMa12`*QB!z=Q{|(19M;rQQx5^$_bk7E^DCKYk3IcMdF9XH?TLJ; z_)i&xvT{j`c-fRvK6JTfO2^M^Z7G-us%3O{7gYO`R~Y~=#m%@Atdtt)>vbk=ZF*+R zU{cJKo=G#)VpeR)P<%t;GAKT35NWsIAp@xdX-EpsPN*sYCN7=_BC;afcS3zYxD^!W( z7(HrvXrf6pv9_kuH#x1@|6L>VgG1^Ki3VD-sEtQSY%pqf4 zNo!lWQg)X53ZXm8gBjRIe*@1_%Q)^BF6J|fitKFvM}_!xvtO~427bTKa#IJczSr3y z7l9tlpg&bW*ADv*>&xT@rH=(jS??^WO<%rqp0QnM(^XYcgR|%xWhPx%b5c*73MiCk zkdY9uKUaq0!pMkJ=9goslnDq;H#JMmKUX8ho-BCxJzX{q5>*FRa&xb5QvQSlr}jh= zzH$KM5Sx+OF~Bd7qIN?T^>S4av^p#HEtA7foAJ69_j9YFH0ceGA00FuVr%F6U~EmN zTtaQLtEmb5vv_!C!h8SGKM>0HaC6ywe9vq+r1hn{s-t?Y)~vX8XJ$tGxHk9% zAgvW4Ms*rib5S#R^-&h^O^MkpSUzidc&KcjtV^{a%bVF(jBfJby!FNcutb3wx%>Xk zcq}WZx{VL_O=5yal(to7I;Suo4X^j6v?=Wc$4>13R3ls+{YklSw7SIAY2xSt`SdUU zbYdRG+tf6kC?q_+>>B6^jpT5hZTW|YgCxyu{ zh*~P3FI6R|dTrsNMD^y~#%L0Dmi}#iObWdLgPN+U7<7tX(ZJ$$E zJ%Hc0-VD4T7gMY~!E%VV4p)+25RA@ZyN?L+Xm(1N@z*H>s6}$?AMmHn&=VG+CCO&-P3v zE$z)=D*`5)6`i>}TJH|~_QZk@;w$YqZW&%X9ZvtP+XP0Usmc(Oet(Hg*-fT3-@1%a{r0e#b?5^${_0Y3 zjo#-)zRS2F{UZ6V@&#g_BEnc$Tyjn5g)S6N^f?1q7+XEQ*SOu;M`*==X9x_1M;8TH z(bTRH#MERn;Ro)i!T#Kqn|DqpJ&^;Hd00FFk5uz+$6fcoF`c=|1TNoLzS@e)#zH`U zH?ytK?wI@BIrUVcOg%Rv7U2Mn=1m(0QzkKtqM|eQ)x{xQ=-9;`*LYWB?jpArcQn$u z*hF54LJvR)UrJUYFJ;I`lxCwVzGSpwT(o@;yBe9Y{M^*@6DF-)FOdiUF!(}Qy#A$T zkBM)g2)+mi$YJA_%k$-YTjJ5(BLVh#V%z@4_`DvESn#NXAq?1ACFR31{l3fcjr+@` zm5ZWY9c)RQ)1II8dng2CW$wv`BDEtngjWJ|cY~}mmLEPN39?Pw^hDHSldaXV^bEJj zSiJ^d&NE-9&?0&W%Rt>r#d>uP^+LmtMX8ZSOGipJQ<6eMR`XR_H^|E$eL_s?7C8&)HWd}{!tBirRXD0IcbUb5p%4Ezb!Df z#=^fOHFJ`yIQ8{anm>yy7M zqMxd9lUxA_o={-opBm>n@39~Bo4sp=ofUBh!jtR2G{(m1Ted?tayv=)nL@LI1Tr?F z+rZ}V_;%Q^_Bo{pERYAEEvy+Nl?)0(x0IcQp9Nws4dE-rz5ev{;u0(kih-d45KK8-Ly!5c_AixPIef2!lE8Tuq zl6;XKkjFSJj7GMK`)s7BwwXc;T$&c;clPX`#OONe>|8jk)amE>`uqfpAdLG$o^USa zk=XI!2TQASUY#|;q)+A;HAYuRC}=4|21$ds0KmxmGLY6=E?ks)3}(0*`y5I00;Wo* zC%&LNV!MV!0m}8SiSs6MTeqAQ&Cy7c5Odk}?G?k~b%_q_;D>#q}IIX{?I;S@qQ zT<={)jY8m}MQF3sc%3A*quL8-W5-f8fL>1`2ftE>&e=cz)Gwi?-}=6dQHSG6cAu<; zP_=dIcU^n!WLqXX8p*v%0d^Y>GF~Md>Z}NiAh*j_#<|>c-rLE_JiHu%UwV)*hIgq6 zXy`a)hL#qqauUS22eMoa%c9u$=qj|P0~v&NOFrSZ)LSi;Kpu=rtA`37CGYE~uK%Eu z@ncMfso=5b(;B>2k2*f8AZ@{5?a|#wNxsb)V!OCw*%%A0B=#p{65v8V1nf^l!+QW> zSI&28=x{T6oH%TcL#CZ3zJ4-Uz1hBM>zj5@X+5_^*vvDDv|EH0hqx3n5H!wapBO z-v13}g7uJ1{}|6!ZgAIH@&)0}`ztF2j|;j}mTwCD-Q_3EKb`OYWX@GBoOBBYM`eEo zl)HqPOf#Xtb9ReCQ=PM~F3J2MEbq{1qEl1*)GM{SGToAy4NaWdA@w~Tj>gxBUi9t< zJQSQd|F&ps3rH^g_$swfn0nWUA}P1w-dSt0==!V^?!)RVZNLd|&R|Z9os**>Q_vAAe3 z^IIk4+LIPs0Z%7UXv(~E{ES^aWu{MpzdfgOywDdcnrVk3ro2$G*(L>cQ$M?{&(VQLc;lfev{(YqSh+ zwUMp8Ym5wU{b;tenlZnhGa{5WXj^aY@RMqeQn75#-}oiUWj`JnqfJT+=56pZauTOp4RhO2_{41u_p`${^R<$jHp3CLxk-5t^)0reJ$Sl?ud;UcQ z*r+hHP+l$9U?_tpfk1ae-?5Zz9d$HKms!Bm6PumImlH{)=+DiMhO2#4prsg_n&pah z1%;v)qCMF?Iv)#De(l*`xW5WTCl5=r%~3+XshXPk;d*0D!tFY>F(q92O4hRPO04>V zw8k&`E6cY+CnC47DC~@2-w`=mzp9b`nEb3C635!Qv)#!~gk?%Lsl9qqHWwpLWDAQ? zyw&AGcN+%3VpoM6IxH@1%E{%%=6M6o+=dg`@_Ri3&sGdL3{7-rUvJF~>(4Q@YR(+{ z&G4C_dOQknGG^D&y$0~EC(O0Yei??nI6C@bA^a!F+nHU+SqK-Mca=8vTN}HRBXzs+ zq)EMy4?`~J(Pw`lN-Y~&ugsJo)t3^jOm;S;5)_n(RtW zcqr2$D%nwEsJ~UV^VVfa5w+seNc_y9=*)?VRO-(-uOJWu9c}+l%)BF=o&aJd(tik< zxJjo@b^!zTg1LP-dS^Ga%)Lz!Y7L2h0VDZ7CzN=o?ICpXTskD}Br2_S!bVMVL7fpWq zgP~C=l4oqiCLMZG3yda0P+LhWrn1zhCk!B6W0}n_rzFX#35Ga*iAez~e7WtTKJ@zn1ditzwFoZGMk#SF@BD>5G?1Zhq#WS%VZa5pN43OT~ zxvs@d5v;4AiiP~sc*C%F2yQe}e?@xPy|26z)Dh)4>+>vOH{@k^?i*fdH2rNoz=M_Y z_b1BF0hG9aEbb7gVb-q<*dW90DpatL(d+HePDXP1bWX~OGK}nyg%~f&l_oa{F1Z-?59<^TjSh?+Q$i#g0h4Kg9-8TV-NwrIikpwLuH?3Z? zvT4>)!gMtKHwIt>8=lv$684`eePCb)tVKjhWe$*9O$1U+%r`iAP3(Q>jM*Xk^QZBw z44T;ZO4sLm;rwHpece(qH01X{;LjJY&Z{oc{U8f_Mq@5%I9#=}*u+j3jhyJ(x8#G` zHElD?mg9iG*y^KsIub+9w`knFC9}*(Ygc0?2y_wuM=e%9(c_xW#Y}eAq!Jwy|3<9( zt1GANA{|4)8E_pG%B6HzWcyN-CK2XGedU#kMCl(TzsxlqFpf{VXp+ExV^Pw)Y*pG=%Lbu=RL`Sfk4EM7HH_lsLYA?v*6nFYU>`F6kio z9aGO0EdVrQQSuk+kNS>tSgpSK;ByhC^X#y${_bI@q0yyg4|}}4a~h69L74UUAruzrCY6{NxCUz%VNz z)&_mB$H6=*c9uz3(aPNjpZKovBbjVOA_te z3&PE#_VQ&nPn|!^emnbGJ3U{H(9%>V71tHwcmlyqeD#>O5o57yLG8zZAOl%Fk`GLO zV}Vwk%}UF;kjZOAV}I|04L~NFk=`@s?ZFpZV+I7AKUn}XZW-^|_NNxsq+dzw`fu)B zy$QPcu+FQ3?nXwvAEyk{$&{!nKfdWU(}A-T+T!#fF|j0P8DKPfI6ng_ML!FbTmS{d zGZm;+{Ql5bGbcbqF;DcS#&&VIc@@wXq!7Kbz+C2RUF#((H*wp>VR;ILfgnZ8}DH{k+u z6c_0*O@ZPP{FjGdQ=<)EdZQZ@q~~W-K39i0-}L)3TrHK1iJ=(|CsXdhU)ixK>Hk4lGu7faq6(+1-yK z2D}KuiwkbYq78}_fM6gT4-BkleN^l^+}pg7UsQcyD8iOk`Hzhca~+;G+bWp+ZYEXh zu+4Z$M)n!V{TL3+VvyY3Zz)$8)rQKMruLIYeq`63l-Mit^*m`DWhN)IZ>Mb7i=#y9 zziSsppQMYBe4DioZcdLVnJr0vt=*DZGKvDpb%hkD=mUhZX;~olJO{~cH3Km&G7@X@ zWgx`h?0qK2KS3$B#W*@wygCKw@L#0ew$N~GeSRHJ01K2B2?+{+I%h>h5p!0mAcJ^; z)Y@dOi;%0U18tcomVliR8w(7>?#4uCk@ml+gC)Y6sFg(PBn_ezfqSvQ1TI$~D{no{Wg=dyr6cc5M3XZrvY~?Fv7FjY;ew&F|d{ z4jK}H&!1kjaMBZW9~wUr7e1SpkrwuiUUuSf+309=!5<&vdgemiZv;|KV83`r)%W7d z1B2z1r;3uc_9nn!s7;qGg$N$i)Jn67`iAIG0;{g?CNh*D-bt3^G|alD$bX5t-uK zbPeSePZcLR%e@&j#82%2^~1HN=XzUJRnDq$lFAbD7cj-435-)&Fk$`pkXyjcLq!1W zT3o+fftw83V@wc2ejtCIlg=+FJC`@+0=mLOvJ$*?|O#AMQg{akbiWIy^X zRdZ4}d9gfdMOvZ5PDEsmY;T+x2_eP!SR6?GvZvt(UsLTsV=mKa<+d!(IMf4^;cZIW znD%nqaA2hVle8-e)=P->`=NHE=)nIphG>(P2wo%!3Iz(Nm;rWqIs>=N0;&~)|B?9I zea$8L%5V_4fBsE8nNV`2x5H8Y=XrW26o;6N!Ju%s5WuZqVXHWqz{Qyq$1|0cF2A7q zF9E><&o!|edrybM=06h(ff}iB0>vdioP%Vinbw&caAbhmZsq6YmXd(l7F0;m)tX$% z50-o!6Y`_gKwG(qz$>Ebsd%$+K${iAwdhC-U_nl@{xKp?%-I(VAW1-!Yj8AhqDGrk zL_`mEVghIe5uw;^^t(gH_c+eoJ;s3v6&050&rKFX$p5E&>`DUnty^W~s3NpM|NpBA zsu-SY=11zbe;&o`R0mVtGQvMngP~bR{FY-P?Sx1W4Yxy9{O!l~N@oy*{d3+A5)@yd znHk+m-9lm%Fmx5}UrbKWlY=G?gtlI0d;(jtOlL%~n9_UD(0E+`1O7tL_5@W;#s17i zFvc`jNS03wAcxAtX|(0FL=XneguxRDjXlLEd0)QvR-6a-Ps5Y)W+In2X*E@>FZ2=e z%lR50(EwvE8hDPQVT0G`p9d`-9sIe`ZQ!HWi3pS)HXJ2EG`rYO#kpc2O(OxK*!TLr^2sD8t%1b#rUXhlj_*GkW3FAs$Ds5ZymTLAYm8Mr?zw7fne z9CV>>?d7xHmEfQlvckJpeog(FJOV}EqPNikS?aKs(liT*vN-0|d4ubXEi7FFs z6m=4d)5^Od6pg$aAaN0y{!>oqhY))9R=_)>;v+9c!H%W`J-HMdNMQFgN1{y5bOZ{i zFnD}00i65}CMUjfe>o!eE6@qFCe{G~Mlh=kU=9p1r8A)eI_joy{5{bX3niBmItlT> z{GYHhg1RG@2%t~}8aim;%Sv+;faSXC3-1ew4j4@7qylVQ+>k#{+=0MfymqqwDude# z2{@r(=;_mo?uN~H{kBM)w|+%0HoDO6IcEuYz!WQGciU_liqzX`XY(YtKec^c@Hkun zx_sVrbHJ0x_c`!0d#Bdh;lgDS9Ytz$zv5F>TV_2wW^Cs0BDNGeHs5GM?S;eH@5rE5 z@Nv=jVP5wG^jXdkBw->OH{< z{+E?!Yn9{zVI(79hUhKU!HcK-JGrggC_wt=ZilM z%Yjhe{GZAcX=E^@y+tE@$A2fNZI{}JGx>G$4^!Sjg^EZ5!f4Ny9aLI`dUBwHak8tq%;p))r4j3+PJ_J<}FL>wgbUEze4WG zrdRB2)P}8fz9HhWzYofZMckkF6VBROXr}%ggD5tz#$t5fk?wT1#RP|Tz~ZvKD)c|u z>WrAp_t{{(4!jFKV~Ts*!cW|2 zt0Q`X&r#XY4(}@x+(w>!4fLAsxpeBTiP+7Vfzy9%b-DE}m0ys+XuY@#@a#~RD3E$h zJe3Jbc=!J2&iAcx7GO5K(UlJML?I&z&kndDy*b+!>x(30_4194SM*&b^MeBSwMgMl zGTS`^la6P9L(CO+*SoLE@?Fr$O@~u4jdI0U0hYmGP(=h1iKOs2_jr8=?9<9>J^>-G zh_umvJ~=9=%W%x4QDUk4Pdl)ZY~_rV(^o=eR#t33b5UdZLTPQN5Ue zG-A#;EQWiOX^L%pmzX>0Hi#tLjG-2Kj0BE*6xul<4b(4EniT?!`>Af8^C|dF6en=x4&4YG@pC!w7O$DeGiYYmxc4PRfenC?yw&Yr$5ZCI~1To-tDLg9l9MAScjDqggin9ao zA#5{&aDSGk34yRG{viD}rqo+qhJN%YUJx2xI4M5hLw2Ue4AHcRc1zKWe`bW^8uS7+ zYBLs%NbuQ+j=MOp&We)pZ~VW30JoCp>QVWUcTQzWF-AX_`#FIA$9!v!HQ1tulXn4+>z%UBIUE~H#q4;@1`P2My-1r` z(yHr4NDOU;>r7BNf1yEiV?V?aQo_9Kn}4bXrd%9>=eXK@&Xh33 z$hZ1@2a@}t6Tbjm_JcYu9#9hCZ_i`d#xi*4P2aHD^7h?F!e(FQ0iPYHT%lMleQY{c zvO1dnQ3wL^aq%&$bU%gVXthOlr=y2UXV4wGco`8sFAmd!TW_5WmwZ{>`OYLYl`S_K z`OWX(uRJ@p%T34tcqO`*7*OXXGa8_z*Y79Z0y#tCEIG5MEs4dDEQ8nYeF0P54$ZXy z1RlC`O2Pr4)E|>AF7)TwtSpcrtM?oVfDp3a&Gpvr;Gp{I-X z&GGewiSKnMA87P5AR#01yUM(;uB!p$rg$VW2A}eH$;rf&O{=%(7dSC2P_iRYEkGDz z>J02^y*wWgoMyB9sR|v3fejTKsDj4Z6%Tp-`BS(FuF_>g@4t6fEZt!e@k{U1Brd}s~OhCql73r6hrCBuZv0Vw{Fs+j<~M4{wp%sPWI zMZ4$s&EYuMJucgKrL)p+E0b6v@J8CZ3QAoZ&*v&ZCB})_gf(;vc7%4GO+#|4KunYf zi(67%f?wEbyxncDK63tlaOAteoxtBnVd|@ZcySZp>J9R>5u<+S6qDNIDQogf)wK?F z#{@{oc|6(Y!t9t2@6zYs7WJq5Us}lU%^AMRPe+m9kZWp-QIZL1U}fRp8P;xcbH{y$ zryt&YevDa4>2qmotV>8nV6I9|gz#7!Mk&hS|41F8tUZTr_eRiPr0ABdnhK`YenN|e z=1S2|yo|wMU&3@WL;K5z>9nYy?gzYSTy`IC4I8?@TbabY(TuiC{hFXyp~n+a1zfRI zCOJ^lrghBDx|PA}>Wvll1WdTdinZ;f-b|}33H8v}F$31Zj@A*CcNBnDRC39~W3Gl< z9-Otsk;DQxy%D&$xO)8+%c(+qqWra5#x|kt79(lBYgJhnT!O;RtE=ghVz-uKXCLS> z=Tr}u+R^E`2Lj1wjZ1pgZuN$5ZRwFzeq}Hu^cIl``9}hWz~G-72f05irPLy#nj+a? zV}_AKq~Y0_`+<_^`S-Paicvm6d>qr5O=d9Eb>)as_W~?dPa>F58d8(gmXI@D9Gmz! z=I;*xF;oi`%|lICUxZRG;vrotTG??kRZs`lyWsO9@O?4J0hm||(joGc1eIxwKNM9k zrQxCY0_Hq%yf9u-R*RN2p9(;TFW5W4X>ypzB}VGhg7MSmiurK4wcp}MN@7)~g=iua zEEEa_%9V}ah`UR2e{)3tZv|>~*3%G0&uiT@QAK)?KttVQ7%XrZc^UHu0*;o}w&=RF zq=J^*Sgbm=$+aQRV8iEcqg3ZbB`u&gZy~Ze3q<_?5S+d=eIbq0WI6O}K<8yGqVKH) zJ8fn`h@KCio58OiK+%xM zJG&}GMk1nIO$AOB>p94eVQbc`W%Lni9qqp)k4Ax_so5X9wm5-_F&}w$BFTwkPw5l; zELg42;qEW*vH_A?Cl)ELMC5+`e5;_wXQ3gQSDrK_3Q<|3>1hR%mX!De@^AI^KoelL zm9WLkcttAM?;}tTTo)8268J}7*2YKYmw$z)Y@WNrGyDDbzM}f+T{NW+m^+d$(k9IDF0ex+6PSNj-c}9`tCv$$dy_L=?KLZnhqKKUPNzFMunSP zcfJdn7zFxF-~ZsrNay@0b(dTuRIMK~`fxm3%Ffv!$7Ku>VOC(-YEdbFJ15z*@%7sM z0v_sa0gRM?MxVLO<`T$kY_HWfe#MbC*e^+*7)wk5v9`^6?e(7t*HcTA>eA~`eVP85 zqzP~6++jr2q9$Fh^1sUDQZ5r`yFVLK02XX?+VFN$w-)|61QZgO<=y^hqURKY=4-1p z=;VT;t&(=*Rh{^jv$rykYEX*SJuvXHEhxxo_gq^(lND0Lsoh|t#wAa}^*rfKQVN^j z@0=uQBQryWsGS5+TkHPa0uyzumUCUWXcT^+) zW`DlNaklz+NIo=WuV<$>j00TZYjZmAwA}4M+|bosPODRA8OgRzX9B8#M2YC1c!NY= z7-B#gU2l+@u83Y8)Epgy0@LcQZ>MnuKE;ARUA8NNgMR}fP?#&UvgE<{^ZF))))81C zSOaD}Thwb26(8M*47og{qCA%23KMD1=sKsnk!j^{x}(W22hS`G$r{qdPTedMMP`#)+hl6q>JS;otP~$qm-x-NzltuhG!3Af;+|YWIE@MD<5=)Z*0^ zN0jmzyu?P7V#ff!A)-ZfeEN-DWjyUYoY9Inl0KCpmMty{VbH8V;q@T@7$a0h>5fs> z`-8GM~&z>X=~4PPg*c6+{lHGaP5 zEt&<2US72Nc;@-9yNmLItiZd2@iqx#Ft;(q$CldI@Qp`7FX*qN}vs4!@aX29&sgTLFH6{`B+9xRRc9f|_( zJuRVqVD%1hd; z=YLQ`2>~zm?p{UlfoK=6qosxqC5_h8_ZUmO%>(nwMER!OO*LE0m{e?iU>Qw|?^45z zWbneb-(QBt)~I|Liw*P}&l$iLz4;sDQP3`nJGffT_04$nzVKK2HjlU>VHCWAULjg$ z#FYg0|9JskF^(+7TdRN)NkhN^LqT5n$MOPp3rPY{y<=qkn4Yv3?E0Q1n?Y3=m_y`I z$SFhVHLPg}f?Tg1$YLW+6|hcrXJ8P}9=?<>cp$?7^D7L%tUbh)nv%a#!?-;j^QN`~ zpEfni^WxC%@>Q}zc?U+h_iM}t7~RR|eMAnO`3IBUbUY-4Q$crksgpK zp47K~6&x~Qv9FsoY&ek=X@-FOnA}b%Zm!4+YOnCn+an3tsXPudO5PXd$$%7@erqww zb**Wkt_`{=@I9}OM=SgUp^aF7d_c%EpKsmnm@GJ}+#Out68-|pg3D%KP3X1VbG6;L zlbXvDq7u!`eqRXwNcqrnaI=z|fBh%)$oRfTT4gLQ)qk$lkOp7&HLYA*u))LUFAA^Z zW*GB6t#bpZc6D#-Kw?DPEh_;VL>{&4`b3TaQ#IiPp7`^ev{*7{ zMiTY~_Fp#(g>j;EY%<$tY2>*g>+aWUXED8l`|s{%C_wY!Cs_uDDHZoo)%o`)lVj~e zt7DhiB}fHGHro}g#?(eu%e63|U>h$#2%&4i0ZuQ?kCu}p8Q3kt3nURf!uU0V;??MC zS>E$9V$#c=+|F1_Bs^b{J{V=$-Dm#k;-z7xBD%9xm_}9Nyr0j-Ey9hc`_d>5XPeo< z!jYB^bxV6!O1`(K7D)}mH81=W7vI6altl8oCU79?+L%Ver~+%PTmrM|MIa~%H;GkJ z0?21JA3BGQYF201@GKK&^Wf z>%`1cD0eln+={@}`?)J%aL8gs9n_A$dh~iIqp$Eu4$n0JV%0rraY4d0L2)vftpMvj zk0C9oPu|yiyWdTrAaDIpZmW;tgDxS{zx<_W`7+)lCM{v~$_Bs4z;W`|aFZ8mPcF3- zDT{ucEN@i2EixzyC!Atv%io|Nm3)DSNR+ZxJ^4~x>SOgbfd*xLUc4gKe(Hv2E)Nr+ z^|cn&Arj8}6LouBa>BAqrO1J&n}fglE3Lv^|2F$?9)0_jP9GDTTvewfXD@yP{z1Zx zSYGvM(A%@q?>&kGC!+LpJN$135}34v-jr(SVS}1rr1k>;Liru;@;at<*B;)POG{(= zl^^tUTf22$?EMQhR(5X)F$nlwaiSmYbqCcd3By9KS8>}RxxJ{ZaD|XeE8^kZg!&^Skbd*l{y$uObyQW+x2_;cNOyO4Nq2XHgn)F1 zaOiHSLw8F_NrN=fNFBOC`q14VZ}YqNj`!Xj{^8KU-mJZ1t~tN?&2O4l45A@U;I(U67xuSxeF=g;;YlN0J=$3WJsb)6T{ggP0R zSZpu6i`J3*bpi-@2w&^^CJ_;keIck#Sg2u%%mEh2V9&~~!>Vi^85y6DT6X;CLfXYD zjwkVGSf9gGqS0nimmPfX*3`IXM%!42Bdd7)69}Wj--8p>$krWJDCH~gsiG%6Gwi}F z9L+rw{VbduPHLZ57Yv?a0&{cTb8rQG;1XHu@*p!5Y%!gjN*T@!Wa_-EzXFQPg(sE% zHQ5XA5raW7f_cOV&p2eflJ|1qeyBy&S?Gb9{Nh7Rf2hFG6T6WfMOlxt_-a$ z@9F`oP@nKOa$8(}V<=8^>+6i!{R>c7PmMrcxX`Ilf=xN_UhWrKNxKCyQ~0|LmX+!N zG<04}|Idl~!XF0TMu0_9zA!qi?bV#LvEZ|_qqDW2wM)(H0EBQLt)J{>czaBIFLwe- zFaw}bi$B$Mo6Bw`e41JbobX6_qCO|EzVBn>-R}gkzJBd|66WiMFP+FvYNuN<2*|w~ zv4!QJU9Ah144eNW62|tHWv=X*08qQYCe~{O*+&2{W>SYGsmUS+rt_j}9OM21*4VZa z(@x@e^C2}noMt5_^{)inCV=4rowP$=YI2Oa#A8(;A*}cT{yQ>k;JrUZ6aRAG{ymE8 zd*jKXhZ-unjFGU%#`jRsfUCPnt|TJuvL`}>FY87*YFGUd(1pgXZOgqN^9CQF?!S$W zx9+ydi+2Ahn#*anLEBHe8W$)pRhmh>Vl@3LL7ENl00RTAHUtwrLNPou_@aPfJtV3- z&cst7t8hM+xNm*BD5xS|W!n1{nlJKWxn@igu>^mr!~Hq6?j$Br&~FzOkq#!-#pYvL0sr;BDd|E(W>B=^E&Ob>l0R!%~d)s61292JH9ougfKUv4}BinBzKD>%+h+-1keOfJck4JRR8Hzw46 zn_vDX+~=E>LIr^vDX#lvg>#@P`IQ)5^L5%?osNBEJuEuy>)Dz5q@QXw=Ldi=BcakB zb}_eHCTTbGwEZ?oN}`O`Wt>v}fZMHyKB*vsVmvQp1qx7$q?CJOLPqI+hg2z#(B2(Q zzevPs9@KgO7TJl&0C4mR1u#u>q#pfa4={`8<$R?%O-|y9Ka?P#B3FzH5ep#@gw4rm zNaLjC1=Pri1GN&H$uo>R#IfJnIIxrwV)Pr1P3z{zGMceBu&7uPQE7Oo%I4D=MP#fO zur(bMvZP-+l|7SGjjm0u6*e{Hrd#LOQLrWzEl5${&5G4%>i zlHf?`a276Ta>3kSH>-6abGAc5!XteoN=->cMJgY0&w)?r_fUkM#1&kiAH~vwaV$nn z)70Oax<#*D&c|sn`Xvz*(>s+r9DCH^uvuvIcv?eZ@dq-*kNK8rFkD<l|Ap`dZZ-a3XBfN|62$#8p>*Jb@qaAj=8E zsCTZk2$B6V0@Ji$%tZD?=ZuKiIF=w?`{51|&n|)YXebHK_(`KKJAWvAm}vLYm(fTl z0CRf#Q}{Us5($@-CX2X)=#fR%R?q(adkYsP z^=;L1R4$-)T5U`^3k8cz-Qwo_#r7`%^kYeWeFXbSO>MyR;H4Kyrl8Xhm52Q_>;gaC z=gB^R_N8vg3|y!P9Ww&5`G@7dh(2Gm#gn)e5FG%wn&X0SOF{>4E-;vJ;kFhkkDu9` zyu0WGLd;j|lINmR6e`Gzjw%A-%B27Sjhzm@c%t}2X})&bPae9)%^*S|Fi>wWGA3#n zTn5NheE&D^ApTY`N$(w89M&}<8@fUBG3;I+hp_LN8a4T_G5{RR1bo)q;!NI=0sEyzVy)O&~f1$7Q0ky#ExIQqw;hu|1n(pR3uAI8Rye-v-WYFEtY)(cB#w~ z)?{<`p^Z#?>6~^0jc7)mK-rgN>!_}G?zRle*oXVe{^1Y2Kh1!$e}+_0j6ppN`K9+I zdcni*4l}YAH^Oiy1xm8^Plk^5%um8D3O}Bf8VDMn*G5p<%h0sp1hfa7o{+aZga@r1N35IpL~eT~8!s-O=+0qD-)dfYKNC z55wI;0|7=Gg?yo|HT*rG4E2gS12Y_jFZg*S)2ufE(`FP-^ZL*?d_Um>yT3*eA2<8& zx!``L9oSlr7YYj8LN%7dwQ*^LC@7EbQP(th5enTdXsM}bB3{rTD_Jgom17^BBmeL*IXH>U}J=9 zPk=`Aexc&W>&l7rCZ=(XcEV&;uN-42!*bRmQvLkTF&2c&@~k8BM6(Zh z=MRZbcrDSgCK&(|QjX&`G++ERu5}T9P!@$hEHgd5%`Z%>nRD#w`RylH9JWUOm|f zDh|MvHJiOj#0%P9Cn;Vl7R%kN? z>ZrbM^jN5`YqpY|tbfY=h}2a|$zSzZ;O%RMoC2n>m}Mzg(2a~7^Dd?91s5J&VeTrN zX1{pA^pF&Qe1m730pDI28}P@eGQF1-!NRso`wh0|9|^hLQ=rr|itV8Qjc8pl4CK-g z??MUEF)@$`2pd`Ut)PMUgq@c?DQ+s7*Y?CHl~193=Q&u=yKC=7yZ9JQP&1Sib50EhOp^` z;x&Tt`rk7!Cl>)Pf^rR_&R;5TH1N>;LEf6`abq6>UVV_&OiPf$9nYnAJh=iD8$7K& zN9?o!Fwt)W^ect!X3+#qnP@oEc+_LAEB~TWTdXuxQ>^dx$X%jLhIM>DqRe!uU9-R~ zMUulq_|+J2={NM=suxtdkecD|NDXPcxrN6v2DfT;3E)DQRO~IAjTJRYI67?(T;8&W zX@3n7Sos#&ssu0%_gDw6ndI`fU=cHyAAE1pJB#lvQ>D~P2gzOrArcdeF>bPPpL(-6 z3Mv#;`TWfts&6m_NPR?4Yl`X}pDpkIai4>VBMkI5lU-SrxfBp;kcTv!s+9%l!)Xr<}ej30wOltMKzD{Fhprmu#82t7Q zX&IOloI`!svAg^(6p= zUi@}7mE!`FHVqa43XXXH^c;u|O0Fw*Y3I=!*3D%C6SD6)$fl9c3Po9lBg9OTR5T`z z=GVcmOoy+FV1g9tPO}Z!T4eYEG?OjDrU>fU(igR4R&C>c4mnBkxdBhmZQ*f6SRqHi4$mkpN6qG{J_Slr(KDski_rV95yB8cI5zg5U`=F7PZ*J<>cC8&awFF&G4 zeq%5%W_2j0LAbREFOEl%X4gYxnsSfIbV_MIA`)hwl_af37?9rTGm8uB<6z??W?6D; z#BYkwz@_5kemGFnM8Xr{HJbzU$Ql#n1Y@#ane)_+CIFX%dK*m=fuNRYad7HhGhWc+ zX|wk~LImUcn_^UetYvn8jf}vn(vvv%ea?p*Sp2KB_4%3ddD!}r*pzMe(KM~CW8O3G zMuifkiqVMK1`F93Qhxlde-9C%B_Rzcfo!Pm^_UXC z*=gXn2WnN1(Q!FQJRH}>0nTcIpa*Lt4jz|1+Td8W&gjF)5JGse1?k^pcUl>@Z};}g zI?C+sPu!Yh4T6K86f|GeIlSSi3*ysIKzmYi9j=@8+`3jsEStOQo(4!k zPp9<*5`+d#CsIQT!PK2Vd~?@wEO-G)vis;tr@r?~83$@-H=YTd zHrAfu6C+dQpX3CZ!jDbf>s`t%1LbT;%e}u`Q@{1x>3qY$zx!%$Kf&o6C;Z)u=~XD= z|63m|Rpks3wC6`q!jCZ^M??eXd^e`gPfvK2Uoe1F_4eG~+W^h>~ zrdt7?D@Rz##^^1X5aoNP!nK1&qA*bU(k2iX!>VkByGg~$sB*tYKp{S0S+mxv;OODj z)|^NLAy0Yj_fvYlbR2lz{e|JQ{PPVEfssxl72%0EHd)K_`jEDj7vLC)ZArRIXSqOX zAUCLS^-e(sInnkiA%3;smBt(YoMfA)4Xi=2AMQSWImKQH3U%@rB#?t%3;K?NgtRj@ zXLYAO8lZf50dHVNsD1T8&F)-#djPA5qyLq~GKwAf`uktfl24 z)+SU!H3FUoM1SbD5e$GY1`V8a`hf%1mlP8u#e#GBZ93*#jl&=*AZ|xZ7Rvb@w)X~f zwDiI-70F_crXAfN42aLb0U4Oks{$|BB86w{;q)r1ImmBFY)uXDfa~$+s7Wrjy^~oFX8vi(MTBWUVlqC|n(iYp-E(sW6lbuKUL@Rp zZ&B5#7(`y~)j~!7Em<0t&O<^4wN{$qkob3|w@bnKke@8Wz$ha9YQ01rX=gnSjhoWrk=Vk!GFs#l+j;hB_c4Cuf0ms@a!h3Zr-k}q0k2&ZXCN{Db;(38-W(oy3Z z)Z#WpOk~9I*6Oeq_`M42QqQd^O&&7>`}1N^LO`BFzl4(vprudM*PD-I;!M-VN9lIzI5z`{Lj8$HZ1*0+?;&-Jlgf$_>ZP zKtaGgNRLRK0m!uT%_-1)k{B$KG64j_Xdc7?Dc-+}ofaDIIop1dZrKbydD_6}ef1i8 zw*eJVI{r!3EI(Nwi3ikvDfYb$KbAj~mZz&a4dmtt0Lau$e^J>DX>y>^0O$KnmPSN8 z=rx&7GPg911&|MtK{fufN%?{LQ?Ddv8>g`(A0P;M{WJWGOKbK5{EgsGc&rbsJFxPx zoln_K0Lj?s$vm?am=a)8I6Hj7kg7y5Ri(rdVaR{|-;(!U;g}9Uj!Eb%l%1rsQcw%%E41vKF{k&E}tbdh9{wv?-Tui4{2%dtf8`qR9OO4P2U}`gzo}}sB07HD z4?jX@dwX$x8&nuyRjE^0euSt-)b4y<0wt)IhwluV7BfugHT!b~OHi6`0(X*l5bQmP z@5sT~Q7kZ|4vg8G+^}vSmjG0xvLUsf30W$WReUM_IJFVg)^HN@0gp|Qdgfcrn^ia) z18QR)+YdAdj#)sR&VRmfdr zLcV2~v;)6QHi*y9^&=5Y?q4xunp|!dKH%dM!c%~;ZAJaPDW9Fh|Fi$Va3*7v4R@^__3gy`$W{DanylJP_+%3BRo*C|eVDtMt&A z2qqBUXV0ICJsv9cnxfD$y~|>kyhyKS*M|`$*M@()%$ry^x6P7Fwx`FWATWnM)s;on z?T5o{+wOZctO2NYCOB%DHg&2dgGSjoJ-%-93WfN3AJEoyMi6&~Z_tt@>5dkYKRb?W zq)nQ!(BNRi}eNX$5bqziSF16^%@K;$qV@Zn?~e_|94WE}~$_n+|;(pXT6UE73IFAx7k zvBFU7Djr?j|B(a>lj3+3QJ1K~tb3t$K|~zWE9;*~_&`zi%wy=%U@rHuVV>iCD#bIT z(Jdq6Dv8)^T7(4e^_ean1pys)lsxpv;*q?&wP`sy!MFkY`WG3cBV4m3C+`~q=8gE7 zAoAJ-)t2K5P!gXV;8wo<$stNzY?ZaURSUxhUt!9J?R+dJ!F(Y0_e=Eln_l&O+OK#3 z1zyIt1;a~FMVTz|f{14AW?BK@Jm95P>)K_q(tHBSoNX)~I8|i?b>xL_Xg=-Jo*Tmh zRnlLS1koY~@8Xl)NeN3dZB25~wwMl*1>i|MR?x{e)LN%8Nq})DI?4ib`qr+OeoR)= zrCZJHdBrzC6x69TMiL~Q6LM6)qVBTIN@J0K3m<7a)82~C3be-Y1SY)bYFp{zT zWTHH8021mqI?M-M1+g#dK~!hZCu+`+#PKAY!sLl>kZ`}a@X?Pr~@zKD)q!JP%cb$vI`@*|D~HKnMyVX`1jVPpr_`G)u~dqS$n7Gzv5thyNV^d zGCw=@;&LrT#czh+s3~jB;}*=)bR{Yysi~XnP1omf9UQd%6>KW94ApJd3!m~_uX0|f zGpM;ZxV)@(SN6`yz1G`&PX$k{#yj?M%|;-TjUH5U%HzT{uq0G=s_^nuc>Q`(-D*ML z1UlpX*+oWBU#t4MBm;!wceXR_?_rb7kTj!^&wMLUOHpEbFIL+<{Uc9#Q8<5!GfM#3 z)4tw_xT)vQ4z3y60rn^YrmN=HQrj!1?*wQym73uHhYK)eG%v1k;ix$+68BMJZ@X+7 zpV#Jn?(-v?<9g?ZiJVuHn;A7Onuxfkz|**=H)N{3+5A2*ost8= zsbkrXppe_Y{;vd(E0>GDn=wRB77xeK+-8JB9!IN+h|NazgOEg;sH_cn4nwRtR1$?w zd>%msENwBNX{B4{4%;!@!RGlBuzhasQypZA`ZF<>1dPfgVIN(xZt| z+kaAsjzwSK482>;)VGnt<>rk@7g}f#ttJe+uQD)nG4LX7xTqF1ka9TRk8FK1xt)Kk z^)rLL*d8w2Q>wV+v{nkSKBNnc$6>LbE3d&9X3lXT zJd4-qcbUhhhWgVGWxKSEs;OeI7}cortU4KJQKMsK00h4`qWr}- z``l{2&K4V*?$rs+(wphoS-75UUg7kkDtr>+|IgGH^tQP6n#B{7RRzK6i5?3IQrURH z(Y~J;h#1Stw~Q?Zx*Je4YWwb&?e36;t_cfrS;K}acc)&D0AyEIIB<5K4}Sx))qa0w ze&I-Kfh#YF-XQ7$2OYW%A+~`>E_S|bp#E|aQZ)BIC!I##^X7!5g{)r@xtF*J3PaIQK@QZ@2C zvk@2i&CRUcin7!e6S|#z@upgGIyx4!cLz9VcEPCp7@iN{-Tc^w(eK#$l#2Q-FdOxf zXT6b{nQ|k4rt|@ci^a$)g7&=mkqY3&u= z*~=pvWO$=RiHJRUJW5Rj%@x}&7~JVC9Pp2Ln=*&DnuM)jpHVeWL>g$#a9uS;`Lo{q zbz~TE>1#RIP$)~vK5LQFSM1+^2a~#-sdka@ZKEL^tIXwXmWrWZUETx#lDcKVAQkr% zuHSwMAGQ_@qL7YHAtG@Mmx~`jO6-x0RZ~;>RCBQkWYkn3#e6e3j95Ks;Mi<0T9$Lc z!;+YdXSW{KVFSOj@MLcYzXJ=3FDXq0$gz-@mBEH1fntQdGX5OL%$LmYqxv45#M>|4smx{r3{Q{Jv0JEo?Yrk}BQ#<=%u9 zjDj3qeZ?CR-;Rg%XhW1#FzaDB^U2Sg*)$Kvw#*T~R$x=<4kd}NBhbN3a2|+jEKb+P z5A#a!&Nh?+lY~J#6I=oO1Z2Ay1z%wusZp{MUjRD@To9!PNTCJ-^PT$mgkQf0Ok254 zcq=qS&{YSU{ufQkCq#Tk2uE*dSy5zmIPhbPUg%m7lprt?Erp;VxoXi}U!yM*2{nxq zndDnwQtJxaE!L7WAsB}PoOyVfH~RTz3U11uJ%DxTMMgelg>an1mHl|ge*KM%-fy&K zaDu>5v{ZtI1tCS)&BsgYym#)Bud7p6_{(C=OyzaQWK4W(@n(T=q)pM<(>ZiQn)3Wc zQQt2`Za_1yiJA(JOIbczk;fpCD6pIzo?59jy3i~v>bcumVX0_?wH;WYXKDF=^;}M9 z2QBrVQ9giXL}k5i*^k9Fjspuqd}Y37+PerLWQl+eM^QoJ*}`aew{Ps%@#BCTlg-1; z*#=dE?J|jAGqpu}!0tw&Y&uyjK44V$m~$pE3g`_=xB(zPNwuIWS2UotcIrM+by<*7 zz?bk~uB@yX*Tuy4-6#ynl9|?bQnkaYe%bNWp`>&jm>_V9Fcvm@vWlmsI%?w21dsF~3uz2T8i#$R zmZr2J!F{YEP;9W>JFB>{0Y)-K29Wbly!EFyl<1kdUko1K>G~NwB@u^)^oB+%tN9Cu z&d;^b_t&;tuVpTC*I(^nXlSb?D^kbQ3Vih(bN_PCfJFX1-x|lD5q_7?znRVTLm(6- z>5u9N@^-T&_v}Fc7@io7b{!$1IEUy&!qHnUv1D9u9;a))>4*i^3;C#tiSvb;5l#5Q zJ@C~XH-tC-uIO8|W4`xO6`wI~jD4eWR|PYrofNJ6v>@_!t7@5D35Dx{ z6!_4SzIj|mE%Y3p)~)-?01m5N>SShh`RC=Q`0aQ^Q^R!o@|XYI*p>(@u)owYnF8Ga{2)dBjK#8UE?Q*cJC34-F=g-b--fP7&GOfOq4((yTol(X7DJl^EFAVF_(9yy7lI@Gx(-K<|M zCpV?+7aHCc0`h^bgYIV+Xgz`d{O|=DBBnd*xi)bpfC@m1yBCtS7Tf%-eZ0O$`4-nb z8P9D@Y!-dE*E@Zc1m5)WVF#H5cDV`V98*=r4i`95wT@caX!U-_u&h(s>x)c6tUHNn#_l31sR{1j5<(tyQAE^k$BPvaIM&y-7Qdzv4bSil1AtNbsKNx(& zX%FfHja@JaGhu6sjjwKR$LKjN!;&24EF;UxjKjgBkxmj*&RXopUndHFavbv_ERRVY zn#ne~8cr%5O8MYjYU=-R#n7d@A85TEcw9-9Iyt;-GH*9)t$m;GH7ggQHh0u6U<73Q%eLEn|JUHCi1$v#ZEC0@<@7|7dkvd z0cIPK3fNHme2(5|d6&g_^Q7bPTRKs2=4{1z3*k~o0Xh$`?EomsD_$}%S`!UyosSJ0 zqA61yaO+aNm0|SmDV1lB^V$cEaVmAP{o9~_Zx<^}+A6-(AfQcd54mcv2k8PwkJrY|cXN4$3`XL*q+MbaB~7 zB~_VYOgKcw!f>y`2SIK21rP4#LK2-)t7h;S31_OwOv~2Zk(~HE);H*0m55{bEfKGI zMTOnojdP7tG$7nc+nW|h;e?pl63`OL=*21W)IyhJ ziAlkzBXX-py`A^gzeBx9j>3MqHo|N=!9w=ysk(jLa}*f33|S>*r2i95@oy^~rsS6*XU;C~C3E;(b!@jl;J&0Hd$=qtqMEW)s7! zlSP6XLk3oZ-D6wHi1wLeT_-1OmJ71GO;6PbN~7tEth_YA6?imhXfpYYZ6S?VRO&Mz zs}h5dnP`!Bo%@E%=m!dkJ}EUk3@kE?>?idpBi@ zMtr7CTtImH3FrXlW0Xk<6~>ewp;D_e|9W>9Cti<6STFr|WgTx-pm~pmuQ^Ea?)2YJ zFo8rwV(0b2VdQM&Byq~$$t^l!`R>{X$yQ;KeoD$WqnT~-X;2~fU){4UrAEiEo0hdu z*CDUrAG*Kfz(z>PX@Nl+s`YXKZ;usx;r`?4#!XI}oL6Q~024+N*V|%3G6q8*fu(0^ zr;o7evy4tJw;nVfQ6(SD-@Q)y7tDIW(9wL!aTqR3Sx$r>^jW9a1K~HaKdi}msO1tK zIdEJYN}dElgdR$w$(4QBu}70!PU4xa3VL6*E-B0@FhZLgVpJPTB>}&g^;H1&#P7Xn zypQqK!6f2CLl&-pxnBZ~j0)NeZP@nx;h&F9mZ_|nv6RwCTUmetjxXq&%o2pb>=mo- zNQJ8xBr9oGB@$?;)U7&D`~?xxu6k(VLrvH4EfY+Eg(V^KC$GCOqOuuMjP0DYJ2in! z7`2NrMpGX*ZlBAHXz1aTGdZoj-0F-RcpG-jpA>c;msH7qq9;Qdxx5?30@GOSN66L_ z`LGCHQrv!n&0qkQZ(~oA47yMwr1KFJ1lCqAl}3}vH=CTkwawxE2!h6ZI9^q|viAD} z@Eg4rS?Z~sA3hhV8tk^pusq{H)9nU|b1DJ*Xz=S0EH z42#VZkqt|43ZuugeSh;jxn=JMA;5R3B899Y0zOP%1Hr}D5Eo#**^;b zXFfnL6%0+fIEll!EY~a8DAK=>+MI!RZ7NlS*}PVt)!yum8ixXeq|2DQ$6s4pAk@h9k zD~0`JO~yy%f4%&LIh?NXcc89X={75@mkgq!1;BjM?0oRbA1=ZqcMZWQa+fu&nd(o` z9lOwY+D5$wIgC(cGNq*;loA_3s9Hn z6AJjp*5y0A>N);|4=$8Rh3zN`DIMEtm|Z^-4UM$>N3z)1Jc-+>brtvWqNg4& zbr9+4d|o$m^hAy*CJ^P1fsw3TU^RO9kW@O}f1`{@%n%6cYfHvcWmw<(9vO~0YNCmq z5*rW~|9xXe6icO6;h`jz3E97{h;_|=3|N9V^?qX8GfZA zxIZj9;P7gqx{EsW}*9SA^^Ok0wV(fQYM+D8D8i{dM3TtySOKE+7OoHKHY!@lrtj*8Y?bg)Q}=C zd(Smr_4*g;3Z4n`D9rf52A61rR5UvT#I0>612Y_a&&zqlzsaef@rqnIF9mpf{FL!2 zkd_ERcL({+kfP-G;*sCu-2ouZ6U7D4GP|F25+esfrrr@JZp<`U=Q9o4EK2>-F!1*{ z_vE(%ZJ@KZ!gkdLJ^`h@bo!~^iN8{Q1)Rd@H(5u~scP1EE+UB(HryV*<-4D@5vj2q zrG#*lWNOS;0Jnh`&NcJXT4jDA36U})2AAsrAKw*p5MF5CnZ~QM(ZhKlb6eWCJIQF% zZ`GHB`wRMbc1OOKNs`tTN^WP1D?~X@9pUV}JE>H))^CqA9NheZuRKJoV6$45E9ZBR9`Agl!6njT zz|P2bjr;c*o(03`-yxC#6Oj(fKM0}fr&U#;7@b{g0&Mh}SE+{>l%AfN4yJ0gR zI=f~D0yi5O3bUb0MM3^7SV}_*XZj~(mTCe*(ui&uows_V*xA#a(M$$v+lII0(x1|a zB@afem5LO9erETq3~x}LM*<;hEB)ksy^l2+>|m}fE%vPiF?GA0HQW*^3qx`25{Stx z&Vtkxi7G^ZOA=a`_}L5t6!>qQ~s-E~@{K<6g@DPWlCz^G>kFliV*jytB$}QxAXc?jM*H>spWM`@N z^h_^2*Y<(OT<(t4?{IYg5C~6&-%fRY2TV?Q-a9ji2y^O)w*q%IF~Z4}C1I%(L+{#p z@f;R50;8IiU36}(ZhRzrZrc?EciTVZo2fJwSB(M}`oh^|0s&|!ER8vWpNsBd;HlE< z!}+~c)tO2o;}N6M#@Fwrn|zcoEqqChwL&Hz3A^S4(E|TnT{@ki5J#7)YA*OxN}Zb$ zFOpMw6X-YF51h{qDcTKG$|)0JJ4Ph-*+hg<;MqnQK{_h@%AMKaNLo`G_6AGKP?4&Y4ULx%53=17hkVAp$4P2Oih_V5R5@5 zSNw5iKme4_4Z-&Na=BzduUivXp358rE-&dV5u^gVoc)PEKFW{8QKr%}mFg&l-rmkq zdp5rdIb7Yb--ZTzYa~P87^8`7%T2;P@ZWCNaEAEF7FzN8s2Ep$mSpp|hzB%X)OcHa zdVHV9D2!y5>-@bKBi_`>kOI1rLS9C_KxZs*Blk5JapKJq=K_hcaR~H4u-~n4!KO6e z`E4}rXm^Gy-hy)G(qi2+Qx5P2jzl?z-$AQ@7Y-|n=QNu2k-+Qt;%tIJDL%x{0Fb34 zaCi68x;94yX?-N&StSSpqo@Yxtl24KTGxS+@pyCMY}OYuCdwiA_)7+npMPafEYg>r z4o^XOzaY3fs%)mQ>7gcjiwVY{0HGH8PR-;-f5u$Yqhn( ziX1xaQEUvN4ez<&1D#I5i+Y>_f+lf-6qLC2ZL@@(FfMi`A|UbP zMoUwrr%P_ugWA*JW*N=u&{GOkzPj?{GhuB1h|0_W$F&(TaD#>!2K2{Q9s7D!Sd!&z z4^lTvNQ=j*NT;*2j6DedVs9iOS0%)gAS!jffeI5EtTJ{m%SR7QmZt*xJ)$&u)6c#M zdnxtHODgr>Xn247A&#RlO=Y!?UoRl9oIKN zZ{qTVhS`~s`H_oOrzedF3wgjKqQg4VQ_`+{^Kng@5;A`CKP78H^zr*alIw@a>yjfe z#KPW-NTj&@LXcm1LSEky|1m&n$0wp<{&)gGPfTfX|7=m*2*|R)p^mTyGu3jWshW?Z z0aG;w)gf_K%;YO{eTmK1EIiCBrIX2Qgx+@|8cz;yj*dy#<>I!kJvHe-ad`=Nk^P+) z1YTeu-shS!zg9F0MW3mYQ#$Rz7Q%)KtgX|w9lKp=7LMcd8d^=gJQ+=ZX+6d3W5EPmRYLQ$syQKm=k40{?6)#mZsFR; zEZ_4@Om+N?f}$FGsJkil08Kpl1KO)(gC=m#AV@brMZZ`na6G^~7${+a7`_+?$y5XZ(bDzn|G1FO-tmKD`g}f9o*Ei3!zR-<#Kv zrH0hbWN=AzafN_g94!o8n33>UixBlB_bjaMN9ev(?jFUs3{B0pzz@G z+JEd~)@1aBmrQWE&+uJ?p?~#+zN1v&)~6I2in6TdJneJS#vpFnCEp}p%{HJ(;U;7L z316c6@GUt0=3os`Q(~{sz3KpZa2AH4RX@rICy>hTG`C{OX?)L4~(UlqsBpJRxw?7oxFE*cT-n*`$eW!n~cicfgVw*I8wnM{hc+Iru z+fY!LGcG2Wj1pE+knXHyt5qEZJ=-sjeyZH#CfMN&0;`)RE5zgjw>1K#zAcU(Kc<>B z!5?kSE+dxgE@!>^cK4lwb?EGTQt$8hkF_S3Dn>>`+Z!<=Ki90MTi?m%sL0E2h~)wC zj)h$~Y6y?b)r*e+w$E|ebXVbyEZN`cuAc?X(Rgf0I7~O;;}6$i<9|Y};Z(Dt_g+L~ zmsLe&?k_C(5`&j0MYAqjFrew04mM0&z>ep6@-=r;`EQ@Cj>xrbLS&BF3#J8V%LPX) z@FXfFt279-f#xek77r4WsGS>nml>60_UX>M#Y9RC0a%qjA6|v~$5SF_(D~G7LFO;) z-k^H;oSTUs{;g>eM`}KQE;2E_Wotn?bgY}Yj>%d-#h;h>ypLoZsrW@S8rAcapYf4E zN>w#8)zyHb<(E>|b66LQilT>TQE2DIT>mx?JD9dQC&!4*3+bbz*ViE;l{WKRF$NZt zrpD6{AbRA#E8UHmEP=yj)q)^daUN0tvDlp4Pxd(AZI{r>KRQ=IU`D}Q>Cz#7lj~L% zgeHTn__|h_EeF(4ilKgwIjtK&2-1t|Y3Jd{_HWdog;swZs zzE<x#Yib>2gs9&>r8jtli&@=4 zzW}MSo)#BQ#*GBUyW;gKMfi}%DiQ^&ode}xF4y|0>SYXnVEF0P3jD{ic~U2^<>jjZ zi;;`e2t;z~BTCz1qIJRJ;mU&SZZdp>(tDe1RxY)dbTXnwd7A`tKLx1zzH>2=5maGEDoXn#j9y1;4-O=oCZbZ%X z%We^DOU~&ejDSp2-$P0?{}YNd5p-?VC`mY(Ifr3-&y~@haXS>f%Uq;f+fNWaSt}q3 zmgIUOq3TL4-;9Z?$}BrWJR7Kz3Q)i&+4l8q!>50P1R{8v)haA&5H|Ji_-%Y`-f-mI^C2u)e+0^lyma!pAghQQ z>Wb&k4XP{GlSn`m_3O&Vy6;n#Xf#muzZ2WDL>N%sH^7+d_UrjFg#Q4r+`s$^dnhbP zit;!9wj=^j)9Fg|s?RNj{e!1FI*=;d{ACmN0Vh~fluSj~@V|gz_}I0o!AIdgAOBzy zw=im61-k*6cKNOM$kfLOuvb1bE3vR70*7XDGqwlI)1?f!DgD#gu9XcmkYU8xVr4^M z``F*L3?U0_yZGK5y^Fy=NI?~FeESwdfaaTUkF{PjC2F$Q;h2_~9$_sGR80 z{lgpExgX_0V8!(l>wn^AB60!?R@esYOAv~Y9YD09EI4H8jGMp$_>#gQ`V@R|;KIX@ zYM5aJP%aifjL-BlD8;DGpqtH|#^&{(flVadRKD))nVm1BfFSrtu4RL9p)SWM&ySH2 zYak`Uhm3OEk<4&OYaly2831}}f(8`Kh-Dez-Wj`WPtMi$S|R6j{rqfpYloybU1>u#?pn`miP~iUsDPyqAtqD1 z`s%ii9j2N9!ph&kk*dw5X1$tLXOMRS?-glz)8&Bgjtp%KwXMG}D&$b(BQlI&y5{$k zpN9ah)J0KZh`&sRJi{CMXTib-_(hZuGsLB=wm4W4S0>hDi`S%icpi#hzFF^u?kw<1 zosT)V!uUa8Y1Q=#w>2P7i1mJN)|<*B>%StslvWgVv3#>{`a14aosdahWEJ=ANvXg! z{5%tG7~8yyi*~(oIJW+M*SG?|jT!;FW^#K+ZlCQiWugeQEM9R-3NL7Gf?3H~&`i*Y zABw|N<@^WGC}T5sYUF<=P6#;&EMnqZM@8c`kn`W1hmGGVOE32|jc+@bQb)yQuUiHo zooY`!s$G*u^0H+Z^hgqI6Z#Yt@j}V<5tMYvOpmDr%9+>RL14X|)B!kw3uLGUITI8g z^L+#}dcOv_Q*#QUI{l@NjnrVDiQW_AYYw!JEoVXpNg6*tO`y1#bdwi7I;{!-H{Kix zI^+9v@%Qoj4mo7DuL`X^kefezzWH9bdAbGGVacMhl*e3`voPxGX^D>Czg&23HOk5KJ>&ke0OuY-+ide-IMkQ$L!$-u;n#yRCC`P6>%m_1|Ko*W ziA130hAU-76m!&{{{_qq8UD~-B)vXt4$F7)VndS7?l;vK>9|5s(>j{?WDm{mXBrn% zhbMgKK!|2V?mJcf&ZylpmysH+KZS?~0Io?y!U3E6T`6MDAEn1NSA{cIG(#iOIdpe@Ywq9uJoocG z@At=dy#C?gjC*GGUVH7euIs$c^Sn-63mnykBW4ZDIR#_RyrMiX9q;=MiP>~2zSSh- zP-$tt6J0-^PVok9nYdHl%EhP+XUHrpjfu1e`?0e_vLGIFkCCKM%=nk6W6aa48T-Nt zFVlA4K=$JhcfEeP{(4t~gSv?{?SjEgV(v7NII&Oo*>JrK8=t@JSEExDkR#|%;U3=h zlShXf;;V(BzT3z%V{%TT@4^j`>XWY;lUB4k2(c-1lwcw2dYa}V2GbP$G*L9)JDjz_ zm_i1bR>wlP5(ikMn-5qw@UFqzm6@RpH!yLmLa8&g4z%f?3EEtT+t-IS%CG3`y1z$90k5wvtq>ea2sVX4@f!@)d39vf8PEajX$9F_Wi5UxoltxwQ=7A}Tz^iCFd*3Ve{{!SqZEMxE(H;yM;k>Dtv~!r zV4`BFrZ#SNCO(ZQ7Azulgvj=^t*0wm~_Az3RpybpBJetXlkgBV+^ zWG5bL>}}Yr2Pl`VOm@@1c|qEa#sF5~m6mE>0Bbw%n*_hFo3EZR_${99{Kmz9a{^BZ zOHNK!dFEF#6urgx+$dX3#O)}@;mL0XHzXk$<0_&J(0RI3#psa`sg}(zlb*Ie=6FVI zaJT4O$z8r2aF+q&@9l>TG+GyYE08DGe%j!wG(gFrdBWI|$ZLkbho{ak>2z3Cc>PgE zgFmsv{-1UB1#(wt{}v3`!mVN(j5h@np;Wn1?2Mr^$`dLoBz`I?d7O5euaO501x|y% zHQpYk1KuWQId57n_)(F-imtx#p5KSS#mp(L8omh0lE$U1@K*txGyjL0&<%d04P-uZNnaA(A^ zl`5oV9%5Cb?_xo1Eh|Rx$93W9weNgV1cz7?UDWd|qN4z}eT5e*Y8f^b=)84(+~<&Y zprWPXQp_&`@Ct9#EN?G>{UZvaNR2Gm5o+h&^DRyZWAL2sp)Z`kY7G-~GxFHsv-f`y z4f%cHyqkjLT-ki=EC9y8JMJ}^+Q@=qPB2G{Ux1uiwlgCL&f?h~eES%1|ChGiZ`1Xd zrz)>wX+WjgWvJ6`hPFtrc;C z5ZL+o0kQJKUqG{ytMDjyud>|vgvqeZT^X&%*yoBEgtF40NV#Dk6EukhHPVyJZ+V5x zdY)!6YX2mHA`DJf@?GGINBl`26C@7__NlryGKP{GTk?~vA}QaJ<*Lvj1uNr@1T(G@2fn{+heg*Q@>WX zbFw0T2x?M8D93#O@p~d}(|Y*Y3-=I#vwrJ6DA<)flDAsDP z@(Fi>5w_IU*}Skr-?5xkHTJ<~($}MTE+6pcKWuD7{NZ6rbFXIGKc)^J`dhQcJ5>*e zFgH?ifckc5#@#ZD_1ASEckUKj9Z)r4)18t4K*IMA6^cWaDG2^h%38 z?|!Uro#L*{-wRtu&b8v`9)5o3KoSUYuuu4o5~Xy>YH2N7l)W;9E{I`zoV6fd_CWOE zz6-NNAy@kv2rv&OisgLMdfm-9RWFKfynCUIhGf}LAQn5|>M>|oy2m3;|L>Zp?JOY- zZZ%OwWSCT+yGfuYK2Lvx!ayu{IE3|CL#mzV*C#qqy?m>+B*_w^B70J*9P!rf=kgQv zCPPZ)o9RBktjZA!|El11$DrcmH+1lkR4;NJTm9crR~u_1IC}#2UjW$cFLid?Fr!=R zn67z4hR8e7u=Kcjh?ts7vXM=qgj&|+eJd20F?4?i$f`>5!lBK72oCA|4C`FdbYsi~3z z376L}x4=%WI?*Y<<~QKn(%~{kumx^{H4g>jc(2`2Cru;Iq*XHltEzdt+(xX(fHW-4 zX_W8zcny2?R`w-2z^o*6{(y{&Es@aWBl9yk|I*WKBv^;*w?j?5e{6+4J$<&wSn-1~ zti8OHnduvSgg-itWlW*U-Q@$%ps$@lT47Gx=d53oUj|4>PN2I%i&eg^HGV0aEa53C zF}k77R*3OUPM-DA?kAap2|68;znuCty1l_tf9-tf3V%(x(@e;VSXFSO*cJ-q@C%FO zZ0{6#L<;TR)K4j+VphHJ&;KdM;H*x=B@-D<@|zb6_&rM4jw$V;q2x&+{6F9BvJc(&5953eViaTW7Prv~Pvv`aahTAb$cnxb_BB zsR%TzXA=zXR4@%Zjl3mrqg)HL&6z`4f67+K)Vz0lfkY%gmbQON{{+sx9xOZUZU*-B zQug#!?eC<}C^H&kRRN>R42OC7XOpHz9q&FGo1mWZyq$*%J#Xegq#9~5-vS$+fBA)i@bi*y^F(D zGuQ)X18r=S?sQY3U1~vkwe;0`z4T>09Kh$I99NA%xp{!MY9`@Kqx-W8v(C70z|>=Z zA>DlAYZ0Bb)Nd|yBOh8P#Wg46#_NEo7a2AriWbVRl^{0i?6iG1N%{su=#UBiOpLG0 z&k*l5NgG*u*t)kLAwb(naV}U@;I7vJ>h)_K?A22SN5U^g+25IyJ~moyWqia8h2q^L zp87{go{iLLzwt_&UmZwvZ>aBz)SKriNEMG1^Vyu?*ZfuDt(J=Qt{ys|AmFE>joq_1 zkPQMBLxsY!-c1i`jbUD^j%nAjb0PbmMwYbkG-wR1s!vG3AT_BCw>o z7w)Q^ku8WH1vQd?M%8!5rVL>KNETSV6EpFP*imW{hkabr-+4lc$ny$PniBJi!0Ob} z8~+zzpm##66SqWkgXX{31!*q4=$$NAFp{2+TBbQNj-jaeR)L*BxgA7vdU|*6~P7$ zyHcyao|z}WdO=k4ftFU^zico0!NkTm&)^} zmP+>vLFHPdR`yQpv|U~^|8);L*-vq~`S>4ON}&D&8^bQFz5SXd#nk~KMG#kY&ckSZ zuHEo~CgHPjFOtrDuk0_Om4X`V-i|MYh5pKGr@a#;0LP z0@0OTBYodq3;z7T?a_6=%JNpU>GqV+fWnI7L4hein*lA1np0)=4+OEni>XL9u!fud zUaq)Z)o+6w^ zrzA5ySo+q1>V-vn8*i*o1Fe-<*5rES`B$Ka0SIgdh{&HiZ);9ugL2we%;S~!u31{6 zOnZ;nnDEW|qGXj;l%m+Q+7@yZ(oD4?fHD@R;Wf_Ot;AG4 zD;=l3H-%6yJtWx?#F>m+*0-S2FetmbIlls4Z!oI41kEX zlW-p8eBtKodxJT#`$pHvLEr91UAxRd|1zl(r1az62M~og0}d zLCzduB^>g))gO6w=Ng9;WsGhm99=d#C+>0d2tr+@04JF}#31DAj4sVAEa%f9FFF!T zoWOI!OyMWD7%~#??jX|JHOmUb?>PtjcS5R~dx?`ArYx6kXX~+_{tXx@YTa)JB!F^z zGxnsbt8|$oCVgjgHWciq`ycQ=7~j~l*O#Ffh;Q2~RZSU+F)7B3WsLm?LCeO2n_+rDIe%*Gv3)MD zgon58P1q3S#c&13(0ln>u(Fo**#v_;fI&57vdrwS z4lX^|#$O737;?mbN(5hy&$UuYJh^$!2NkVxP!Z=`Q9ys?uR>4mP^6%bDQBU2?sg*g zr@~>e^Ju+6OsgKs>Nz)zCErI5I!ylJ+j;h(RNkm#f~b0aa)4Y44Z{qR_LrX@?jPWb ze^5cikvZFZcdyBBnZg9at z-?n<-HQza5NXele^~E$ap3)2ZPvymzRaTk`ERQ(EtTZy{KhDT5cR2G2F56fxzgsw? zIeEFm)WnJSo^(XcF54FR0kJ%Vjv`8D%Lji!zQZ`2qTsni1rHOpa(gP3sk}wD{pI_Q zA8mv8-gm+)W&?3zxdm(=&avpTj2^Q$c z=;z~xu>ax$xRoMnncp;6dry$f3_8C5xn4$AEI|HzzOgkJZ=P4qv6i}DkLJjM_Z>}F#HnS4=jhU=C%IXoxQ3FT~wiZX0idR~u2+3tlYS}xEAIq8NGR5M# zK4NkK1PbOl8CCXUngkNTrxdTGa zpl>A(yE#9!|6Y#N=#qr>YwqYfH=@!irig59{uYPTuZ^Y+gQEl3V#BmROev=|`gRoP zp{MqbkzwK?5=NV}rq36b+u?cNc8dj7Vkzk@0QsV=q;0JbDs8;yM+?sV%dMbiks|9n z?SCVMa5WZWF8g|@zb&<>UpW88Zr}V_D1-6@NGAcH7NbdOcl&GSJ7glB@x=Ht!3&)w z!;f{QqH_L8-DnIlJ{+;lr7V1L5g{U!`7W@6zv%M`L9%}dQ|a`hn1~bA>P<=|I(W;peZ50QWxPWUx3pTq(`eipRX; zHZa9t^MxWcirdjeU1s%jBQZ;KM0xm0z233nIBQuoC>GPKJ)|u zkJUAH6Byc=!NUog(FF0vySzJ-s_MOP#J&|}PJPab(`3mTIp4iL_9TUioim>N3b)sn zI~#9t92XXd23L9BYFT?4~@3rH_OWv z{Xpd$E)FBCUeVV;-n#61wo)M;1R&j`g5p(2(Z=$@>0eTDt_@(qoMtqe0O3I2IqsV# z3;z-7Tj6Pb{Cbj+n30`&h38@9QLa;hRj-{!{%bd+QldaJKPr8^%D$a{5MM!ly$UC} zi+)AqA@unnkh>wK`_SzER!IY;si2^~;Sv>lSbQdrV8ZDXQm~uqTWtL8HDV8}Hu65P!b_I}_+eD$X@C2mrwU z4ikNJe>zo)`%P+?)yb%G%OpWL8!rv}Mu`-ayxS9az->RGq{2(i+ZKCOV}7Hp@=O_i zvA)Lfm5PTn*ZVeF!%So8WyKgMABQqzK0{;H6MX_xS%nDw`PB&sUxdEVxGB@{^!pA` zJ3lMDM?O7XS6&^PgL*-gXkt1`m-Z#ghu`kw&^*w;T>hFL$+JH+@HhV$7cZ=(q||6r z*{WpTVTLd&I?7vCdnBclz+~!sD`I$jZedko1Qc7)>2;q``U4p@(+i>Y?*5T^}}Yhc?J-26=lD0Msh_9EV`jX60Yh6xHEk}tIe1l!6>52OZJ z45b`6lSt$OP6h(>ui-B>K$?0Si4tU6e*_@dsJ~ox#pgUP1!`IdBc^ISC-srJa#wz;3J)|db(uN%0O#+}Us}*&rsIVA&cq1{T8rY# z&Q9~a&Te74v6L@WRCV><&?(hMPw~z?aQi#qxH}38Yah3#!vq((bz^C8AP0Lp^q+SU ziOW7**|UEQmowZJPP*Mut@*-2K0HQwL_|y(^qMiu^zWbul}LW-{j3G7_Jd|qvB#gL zg>B!^!{AuJa}b<I1FOMBf5uUE+6d(6yk%5F@YhtVqArgN|Jcn}CSrB7J-Qc~^CxT+K z?TFe0%6+3tKY`I_fbbz9_AaIIikn%O3~e@9+(|CBtF-O-`y<2JPx0x?Ya?NAW#Lb4 z!ZtI_xKZgE8xr~K?vv;4<|pLz=lwTO_>ER9BtL!ei-E8z4}a%Kx;v)P*`A1aADQyO ziWN2)mw{g~^%5bkJp^#6)^FKltc>+yG}vEkT7V#T216pc=&9U#D^h^6KKN$k0Dc=) zF(YU-1IIn9eBrSMSZ(_uRpFw5mTH>vA>uVXf>7G>o2;AsL(uD^_V9-R2n7$karD?7 zbP9Y`W{veB#yfw4D=oN8I@C%v;prl@`U#n4x#t~xx+H>Vcw3Xhe+1m+zflR1_8bg1 zJdl*}a4-V_(NwJ)GpeWX{>p~NN?#&5C>?gidNi3KYEO4iu1ZQ8gymw;-QaG(Bc(BJ zDqH>$%s+m#D_g$+h$o5u2_3nT3IBrzhOYLdBYA9k3fS9+)9Lm*2@O#({DA`1mNy6HmqcdVpD;283hXLIv!|C!T2c<$nLN^FmR%lRhS`?G5A_4F&76P+;-du} z(1jsYphNb_W&b68*T{he#YpHQzLSdgeH_D>bp^d`Kx&T(qMc#(Qlj@%ym@%vyuApC zs!5PWMT|HW<$aX^f1Z0_eYF8Q2U@`IXp#WsGXXHEQAK99_&;&rvSyzRFaKToGT;8R z&pzh6OmqL&Wg}rMt6_Vln1EH_WLGJyTk-R-EVXj^ul4lkW`)zg>G{UigpM7}hT z8q?f+&iHAPWSvFugNBRJzQ~1xpUlXxRYD@r_b2chO3BG;=^0Oyc*O{+$eZK&otz87 z0G=2o8g&A)9%ZrUn`bgZKsE`#Twa4uB&A6aW;YfD;Xw_8<3;R2(f_*L@_z&Z>(GiH zK@S5~t1G2ag!2gk*v^LcddA}$8me&I(kY*%;<&{(e)7ZJ%EY^lin6%&xOCMQu-L?L ztF}7hE`*G3N`8P7VtG!>OrGcOR!y2^>u4XP4FMah%e<$PaZ(^05!BSSqAtazjmB53 z&qlr>OlA$rx0x~V-G1JfP-pRD8td0cmAqo*1~OjSmtp`F*&cEL>=_n%Wuekm4}HhF zVuo|4*RmDrBPVM4;w~oi{UKv&1fDP&AehCy$Dzy+he`^vTxN^;rN%b?5pMnu+?e`Fs^M8 zt|kjQB3O3CKDl*|lJ$nr`*eL{_F2puwG+FVA`42g{z}hp<6oe~_GhNntYs5{7z%w8!y^w3Exu3?^3Fr#& zY8MaBsiJZ_JR{j>*m>e$bg-Eu>K|c$LCx?jMhb|d<71pc!3&l&RPZHiarZa;FGwEk zRfn`zE^Lp{z7g_y+Z2?|s4h-o0{;K-(Nq-b(kl~n&(Oo@_BJXmVderoW66!lmshe8NA$9UxPqvvR)LPCfxjbfk; zexk3E%ak0vP;co6pX_I^<&CYxhA+b3X8JE~kKqA8%%z5Bq@Kkj0Q|SD4p%}yic(an&N3s2!h6+iQA8?6l;T|JS%YU<^e&ucl$@^#x*)N zz;aLYyYI>PC4nFRIm+{BCs7kc0j7*1ip6r!*H&)dUZWW=#jKPPh|HsbOwe@XJe_GL zOp_Cg_``jT6qIK=zJsgBAC8vQwToY40ke-dg5Zbm3$%+n?qw_q!eug43@$1kwQB?; z=`*HuS>#NbCW!8y^Me(xnKtHPQRzBo2hi3S^@|Znsnv&@&CZGj^EScn1u`fy$f=K` zUD{H@)!Yi~mlB+Q6!LOZ4twKA*QC~T?2D@wPD)^~VCNy3g;eGemnP2WNc1P*?$OVh za?nyaG`|6*W?W51@UG?yBJ1t0REt52 zT7rzOTl4W;pxb-mcEU7ZhRo|tRfCJ{`}WUHw#Iu;8=W&*ObU@V?890mg{%HIu1MBK zROl`<9jeEFaJA0q(5;~Oq18DnI zICakDNvIaJ_`xvnE$}T#f0X&ZIr#c+Sg4fVdf5IDz(=mfM| zLqJecMMf?li+TC+`YDq4aeH*Uq48OXCS*m6nvj|{Xr|6(x%)0+xo`GrVJeeYHat1h zWu0p=**cor6=`3jXE>^|lQ7N=_qg9?5u9c@-1a|wL!9<*(Uh>`10Bxj*E}{7-gSn) z12t~UMCySXqjQ>=-a(cr(OL!PUGs7AkDwDO~02ZII`(0n8|5n579|8YEE) zgbd|?CIZM6a_zVdKdu0I&7Gx^oIWQH%qgEm91oY@n{w*Vs#XFH5Y6>WKn|(3U;FehY z3#6?;?Qt^78Nh>ND!CNcN!Xd8Cj*|)+pcXE&yh{3nR>4WO)2)>KiBzcULyiUY@yG0 z=RJI5I1-k6MoHL>YETUXIv6Zxph5VJ+gErL{>h0yDX8w3+xW~cm{lQ#?SR1j2lDs} z9`%d3bbjEUHOF?t1OVz*<{kHUqd*`Cd`f^9**CFU;b6eL=4rY74Kc^0)hBE4zM-#$ zoQVS<+wwp15)+Um0!Aao8`=kH!@o~}<~IfZC2t_Wr{DqXZ?uxZRg5c#Kp;`1P{eBT zN4IdJ+i7^dZWE2Z_c2OJc2)~#DJ`-CVsncVZZgO^Tl;)?KiVp+F!GwI<^l(#_TfLz z#z*wiCDAE$oA12Ie&+OdJtA1Yi(<7`ZWQyC`^in&`W70nBeP~hj1?K-oxbTA z2{DJW?hqp5_qL%zvECRj9Ig~Gi^3f9aBF0I2=Y%!fN`>iCiw-OY<_{PE+*N91duJzfj2oRX6BYyEGn+_cXv z7FTKHxYSvd&0lO_YGra$(22|7tc$Gq09_mmgmjjGN^=_I;=Ju&1F#Xmgohd))Mco(`Zp-aX=4i#s4=X6UOgx*2XbQ|5_vxU zS_kwV9!8SKY3=r^+vn*692Q9zRhT%eC0L%59K8yJciqcr8h$_mUuvtGA}mv4XrlW3O`z~7CJSi#+6t@&`5cFT zdDlnmW{PRb$;$jr_PM)>mB@_(o>L5^$!`j-^5-uOH+Cz2iCWBL_g0a`S%lsjdyVF1z)LLBuZ)Ln^L1-!ncn4f zUU|~Vhq9z;qj!v`Z}qP@HXF{O?N3JA2+rVG&8eYdld=2a6_QiCCmM2L$0>s^|C3cy z6x49-FCHwCts8?1&9NK^m5CX*+9*~=?pgU86Fy~2jXBNQ*0nFps9{+;uI;F_ABI$?=dhscHchs5zrgO1a--D6 z2`?{tAeCmScVyPrNQ#&`+{S?xnV$mIJx{c@7@=RpxT@1B1|Zy=Li_T6gAg^lq3XYw zw~<@ukQlyURNB4-E`!}-aafGSJZ7KG{I<~NdGFXG;T+(MO3oR_8K3kq5lFpKC7~MFX%bJ9X2l zs6jS*Q1Z|2k2~f;s9#GGIO13MBvd;SWs27Q+QX-~Yznp$1O9k?L&fNWDR{(&y>= zg95nQ!4f*KD?s zNpJR6=>wNK=iz#jr^z0`dXMDU(qkvUrXFYxBVo;Q{i_LU_|mDHOcD;PEf+yAw@w0> zYP!XlN_XO<%APxtZWFC(f&$Kq-QZq2SFB5|cb^6YcqP7DUP}hf3fEtf#7KdkiM+te|sb>o=~AdLd}pw zu>LrHjGjUP1_mn*15yF|>wwyAGk?zTLi{%?dPxUvSoZsBS^v>PRV6OA@?df~B zFvU+`KT>^=3=ATk+m5{#R(1=*l0ea|Gv~plzd-3Zz*$%o!vM&8@V8uQeutZfbT=MP zINx%#SBAB(BWB8JRg$g)x0DRYBAk8#(+Pv3DQaOAR4R!m_BNGMyTz?lzczP%~z(E`i!!p~S15ml)O}@Xm)ei9GDL*+{k9WbP)qkD2%4+7O?1wMG zW-Ka%2i52GNiEIS_>Do;tU~}i5v9rnLJ#K#w%x);b{<5!_=o)#oA#N*u;&!{^AH#Y z2LNI(66M@TtAq~Ie0p$yaj{(vX(TPRIXORQ;@4LbPQ-_3qDZ%c zEUe;v?pS8w8#7#<-0;;`_@-UKhfQi7GvA?CszCD*JeC0VE!|ODDkt_X|A>l?iQ^kr z5W>-avf2UmT8oKZ^9=8&g^_;I$a z?f%J3U`enxQ7nW@TPjgv`$5g^_F9n3sm}errAfLtti71PK+Wn5N}{_mluGaA;VQwo z;tzO%yzZrRGY{|-AJC$|pp=mFi~9I`ArgPVI{pTlGY&;{f+5VFj z{p);XJ)XTsSIc^jdY?m?;FnDHsvKC$6S9pB(Krb|8+^o zd2>Ul>vaX~6ay~jK$9W|lo!C4i~xY%E%~nGz;hH;JuJo)H2I)-;rS4#+XApmQJsJ* z&h~7L$sEbG+nOf?(ZLl@Fwe03{i=Egm?YWeYe+`i8=4WG{5_M0kv;;b{2G1 zxf0$ZHaZ>bkH4clU5|G*$NkPh&1qa01fuqg`84hfx>WA=j;b`9)RISKuZ=AWb}E^| z_pj)gWs(!zkE^#3h&XJ>{ROZ#VQYbN=hkDF7_cF}-#@^j?$B2O{<6=_ z#|X#+OedLJ=N?6ekmyCvE!V@ons zZc{pyP~^s&NIH1yg(AVA(fRurUQd9sG!y8vNc~*zUI<-5u43-QD~Bho;fcSvC%8cW z(ot2hwwHI2sGf7T=^6MCxvH1u316~iM6Nr2lN4{L;21S}OF8^bz#|?TO5iK_@+#Oi z0+(Rrm0YWjfUWx)_H*cq$!@0k?++mxSg$mULqbYr?n{ep2`Ne?-a{xR&&gkX$*=5? zehGQmHaEvP4y%eVwXirFj9De;GT!tO6QSPlDqs%I%seFxpXKGt{d20^oGF!lIY_78 zT2xbLLr}UgwUU}QKH1~Qbkon4=4@?n z?w@6b#abjL2wj& zN^7D89B4rKzYf$3RO!SvlhL;D&eHKc>>sXt#D=;zSz7$dPsZt4Ig33h;GaiOMz)Sn zxAIk>K4b-7^6FK!mMIGq0^Q+K896fbMn{;~0eubyWGyzq;CUuE1)k8tl>)rL!P|i5 zFV|KIpAh1kXCC;L%EgBH4KIZYJ_l!X8_f#k*s#e#6!stgeFX;i6AePEMGe6sFY2YP zCp#SQ*8l-rHd-xj~{mz)I| z#c*jEJ%(y*Ef7;N&3L=T329QvbdWZB9~!X>mAei2Y7CxYq|PF%Ck5yaYv(=O3&yf& z`(7P?DL*3XS9*)h$^x$HW#71%)y_KvOQ(h{vs5t5*9d(KgLZSJUl6C<{D#cVN0Ee! zErW(n7?+ms(=h*K3%ndp%%U#V`ZGQ){!M)&j#0BW$8k4{jY|xAr87!UL^N&70Cf=< zMw5KoEg3$^VBwb&4iE8fq0)x!Wv-Ku@t4u*nN&IjO;2!X$A3@N;unxY0E1C_?E2+c zz7}nmX%01W1g;`j9}pz`@}*$P&{6PL=19?wi|2yC=u3D+RL50M59|5cJrv%%C5`t( zDQ|v2D)q6Xmt|nglrkirCn$49=KS}$ntl*@~Khgy#_ z(KTrAK)n1h@#BL)(|~l~jzL-PKDq0KnR47*ou1}9Y&n?|`2X>pI8X${F4eaO zl_gPty>rdMH6;9g!UkWeuiCcQCkkb;8_dz1`&hqnva(F#Ebay2ZvJG2f))y)9UaoR z2m75F=%bPC!zhD;ZXBY*6cDo5Bx>H*;BG~>bA1e#)A;raBo2{ygNlZw)9J>GRRFAE@GH1JA`V(^1GTL~sPV^`-P~R9T!-rjSBa)M8 z=bF5pH57Wp;@e@Salk=$^1!RKCd;uhQRfxMj!5Hgi{pjT+<3WHm4-nNq~thLOoU!& z_@@7im*l^!1CyTfIz!7<*yGf&RN%wtqUUcr0VcWr;igzFW25b<2GmHNHJn%I58N5p zolDO3RAQUBp|-~aT7I+U2o|_i%yT_ExP0QaMrOMH5APRH_5+<(yI1M=q~x$kwRC~V ztzCyY&+)r+EJ;3iFVdc;dDuDtpEYOivA`_e{WYC&PY%Z{Q&GD*b1PC4#H+$wYio@s z&=GqU_^fyu6vhpW+LicQo{C(q2VbiW;3XeV=vKVXLN(4Q!ZetG$=eG`$vMl{N%DYU z0gJan>BJ1sT90Q#2XL$BX=1?_;A*ALSJr^%z{W#7XU6~=!jb3`6f=<6tBGzkaCk~U zO8NqtH6CrF5d)4fyXbp2p^ps#A%q{;gvqvI`KU*(%ekRIaA^;mali_&gA!je1dp8K z#--ihTcwK51=r;!TKT0Dyf!mOyeeg#%o7IdR_{@~3_Ezu#Umx>u3RU(3eHeHWr+P9 zH+UUS2^q2JL2Cc>L`U^2|27N5a1v$=kSXttx|N(NGg)UfPeds2PV;IX1U|7CNOT1 z<$|_7$#guLpam@DkaW`li7vapS>W@DP>;1_ZB!_WH5O)Q%b(^UQ>|-Z(A$J~+5d^t z&6*v1NPI-2*zIR49`Y}PWE`(4G;+U60d+u{-(VH%GPh+OLH!4-u!x=JwiK^mLOvhX zhcPm(t*vxNrc33flSb+(C6)HGn_>NdeG7OIP^BN=a0;ct04dP0eh`SdN4t3hwmOr| zujr4hSvBgPy4T_c6e9<*k@h)9y6U{Iq3=BMGGH3Y)VL zV!G~n_1vNLHA>lz(3#0aZ}=BXoR)0GNkZC6T&Hp#wHzh+%xrT61qUs;W+b>&QbCR0 z5*Bnw0xQ2jY)PJ0A(iJfv+1yZK8sEbZiAV3QM>O4YKL_M|klv}MkEBuh(^p?oQ6 zEyJHmq4G(>AIMVqt)8}+ZMVa@wFNnInZPu+KFs;-rT!2K68FzdXtGN?f8P_W5*o6M zguF?5ltJze@`{LeGB~CRdCec)!zcV|?CU8`~UuO=rsTQIa~ded*b~&r^WNL z5iNC5)s&fc0e%`OGqQ#KaBh7FXWnD*D{Rg=QE$Mnyx2VJi!JRKi)(C5zDHzNsB(p9 zl3HN6XGM4UmCiphpFj$xiC))0g&f`1FmprzXrqB)^|jk(959|kj8H+t5a)+?;F_*?r)ECg{!hVHZ9u@esG-K=1k1lj( zrBCk}c(zGi4=?qpGVVa6=_Fx4c<-~1f#COaHPMYFP< zfZ&V6kSMZX{1r2?gj=aYMm!cw;#MV4~jTWa1QSU8%CyZs$R!C@cIQ0G;1=- zMl%IGC#x6zkIBI6*!c7(jX8#yhhXGchAha+b>-pd+r#TzhLWIvf{BBJ1CiHPtIz4C zU#$&g?Z7XLt*M7xK9NHh)_da7WulhzzePz(I<)jqAg}p=cyW(f8}`^()S?laFs{aS zHXIRmw>yZ1MHMDXKk;C;^C#%Wy3!xNob0U0^wgHDH`apyN?Y@P-hG<%qfM>HpdJuB zU0skJYWd^G4|r}WGnuSh4o>o@#b7J*BN7*jARs1>1wAiCMIELo=rX(BA{LsKo;EiH zKW3?KDV9FSU_wqsJ;@SrbmaV5kamGeM8NCZw(zx!W-uMNT%R)Mnk@dfZ#!Lq!)K$} zS$9NU?tbC9GLZNHbUV7jvK1 z?|?Wb!#X-NBjUoP{QzWqYNhrZQC!9teiB5;=Nh!?#P19v?<}$+R;yK$P`l61>Pc2& z)X3ngTlMPgc?50H_MggJXFMqw&6DUY%>;03vfcV6PkV_VpevWGxZ{0ou5VpU^ggm< z{@HP2%(%hcx1_B={=cMPg!8B(u4vQ;T2Wowik43@r+Md?{)(@+xcbfI`Gb zDOo*BN=F^^9>(tJ>A}P~kUDrLLr6ix`Z&Eey0`tq0t-q!SSI2GeDd!_EA9=ZZDWHi zRAyjP>-pFyy{TN?a;7Za+yRLcOfn!gF|rz*p5AYmdshz;ghc&39H+hD+$|1b>h}c# z%xJL_jJ6hEAeZHlWTPjH#?9UVJ3>A!*Hf#Omm6}AD-P%dWZug>LOf++$_VarBw7S% zy^^8B&JI1cK&*Y9ezyNPyY~V6a2GvH2k4pGy0F-^Ps>9C$aR5##krL54@eWK zMnrLGx56##^+dsKU0KG_(|hcSIL8P|mzz92FMBHzke*3&-aEcU@nVL+MqBy`w3h~j z6c6hDJG^Bi5KtJlkMW@h9=s_i%F78Zl3bNLj)Fb-?lW9hS1$|ea8Xl4xAm7wZf#^l z&A7NaS45MZDb!d4y2FBrLd-o(biTU37sv17_fa|cW4_l+9!e~m5V=U=FHD-z{Z z%3ER9*A*Et^FZ*g1u0lLwIko$hJ#so_-Ytj+w_=+?H-i0!aXW`&r;V68NVMt_eu-c z^S>O4t5-_ShkX3x&e3=1j6w?5LG^TvY{ZZ1G^t@3wQgR}xL`Zkp8ZRLKPYR;4~z6{ zzEW_1mUDV~d;T-;=e<`g>8$(_(5FA+pS}L?-CY+4GWKBF?DTZU;QgLdXtD9MXWzQgp~E zyZGRb-dlTw=WxOA?f~;J)$^=-ymuKPS#K;5uh^J%%w@s-Hz(2Q(MqI-?9zF^JY6?N zOr?`TsvWZ5=i}DQ+AUb781d+Gc&itKuN&LcBfc`TN8r*feiMsGCCR;ca~0<;kq|Ol z{mN>Nej`w^xzeuN@o}T`& zmYO7QP*x#3TX}Wx9GayTxuu50V6r{nN~hphE~6&tU{18Mw;ybSbhIpFnCfp$mIeMg zZJoY}dS-y3nkph!3Nl6!;T({9Ym6;dSgV6Aj7(Kwh9Fy+P$26*2*5&z{9aZGWq-dEWrlUe2|2xz$lJ9G^Fo{RAtWhLet6~Ey96(Ra* zA9Lq#O^2ha4oW28zAXOi!c#AJ?^OKmeg%qDLuYZ;@k-B6>cXXU2G?Cfru<$wx4bB4 z(L-eXy<0IFNt-Z?RgZ$X&%sHBbXoyvTBOjna|tEd}mH(f*YOyPp2!@ts@#rxr-&N`c(cHG1Wxc z;3@fC#Do991>ha`AGl}xng^#X3o>C6cZH)=pn!5Wi&h3j zC^?t4x)Vr*q7$iq=7UKT59yjV*8}Ysaz=gjDNw$UI=OrYOK$y%vz#sTDWGEcN=SJj zt^2Sc$MyuVZ{tKn?hCrCwA?!@f2Y6Oc&3nUGsN&Ej>h+ldaQMsSye(Ecf8VU%ii;! zMf}~z9!iLLY~24-+FOQ26@7ifQlfNs36g?H>mc2o(v5V>2!e!wgftciA`FevDIg&d z3Ian2NQffJ(4e%$v&O&A{XWn8-XHGkdi~@zoSAdZKKtyw)-Tp#*1rcQBe?p3iN^v? zR-u@d^vK9tn`oDA)QMSYO@4JAABR)AWi|J%8?t!fdG>FIPg4VC4I$YXRQb{21Hl9E zL}DJG#Oe%n85W~s*vDvBRE*`U!<^S+QI6ixO<~*sq!~n}m)YglqaN9$yDer$MTN#B z917dEDGolr_9ZAbpqZPeZT!zO?u*VCGeswFrFHDWv3}vwXI@i`nFg6gCM;W=T&8)x z@QjH9bDKWG+EFR^KS^2t$8WoqLN8}_nmHG79_MD*@zg_$`hW4SWxLCN7yo{zrXbQ> zb@L$qacjE8BYOdm2KCGQrmom@!Nfr*5%x~LAUZ1qiaN~v=P*ZK+hyS*%SF@q9z^_c?Y;%D&oFRAB-ZV z8#f#vx`%J$Vo{A*ITnhT!#kuZAT0mkCT`hHRH6^(C(7=#4mOQ|to$4ra#e=&{du{^ zr`_=!&Nldb%im&Y?Fc?r0^)eY<>zFr zzfKjudQ!gd(;R17AEFxOu!bd@%_H+z;sYRzaTrDTEt=zK!ThYg z-!~KUiDrK(eeQ|~GR8Hh!%$?P2PZxN*caaMppYJt+RW^kE0^pp!OZmR9+jNZn~GLr zK8GwG7dJ%HSk8sLaCn(iJn4zN=i4Y$0oRE|8lRfT2ad{jjxhz?ee$-D`#vY|TbsnGl3FLm&mDJgui8me*L&ep$#H}X~AP?Du4&a4jJrKc+P zodrt{{y3~;N!j%F{9>pb5G$|YEt0{Ro$GjsY1 zo8vHIQS1onN&J4ksy}~@GbAsmPMdvCk(F29H0u{1EU&^Xy9cB64)1LdjV}bwQa-bk zp+!0bA(FjCGeg-=BH&6GuSV!pJ;x=cyWA_46_^Jh7I^!LelY!fYKCKSkBgNJuc5f3 znbZGYm+f=8?|I1j8W0dK=u<9 z_6&$<9nl>4&u_PWiCWwkxM&e&?l3h87;WS4n(J|jaI<`4ex42XL}(OxNsK;M<#*9M zTXr29rIc5f?=?v=5h$w9JdWGmO;m+z4!h_h+WLL#ZvNC=_o%tktEkylNw-MyH^)2= zR`Emhjr}T|qG$o)R6`{@){+|CQt*5>xUz6Rhy0XB_Ip82 z%+d#+YTP1hW_v^7vhZC3aZ`p=R5TG?2Z&=0!0rJuJiSR>Q)Z}2rM!D=hi8r>fnD{Y z=$ptH1vVESjBf6d>+EIgvbOcW#%huVvso|~MgW)7pL*O>-e0I*R_iKZ*3R)ey2)f1 zhv}_)oxR7u%7-u4*Xdw3_*e=z9xNB-7Zd zq>LcSvri0Gw3O8Lzo&fXtw(EU1}E?v7bu1D7a>(WTmtsTU#pSKkwjt&eZw9T_}b&Lm;|+;pF`#t+sl z0hTk*6c#Z)y_9U%=rw%i=7;lnH}`hsla`-+L->jZc$;77c(|*fp%F0+PDqh5IoYFJ zo0lo3zz1O0y1GR>^j$IM-p+ z4kI|-GtGiG^rBHv>UNrd!S| zn|RJHPOr829Ult${rH_FrQtd|OfcPMp1jtwdyavWb;k6JN8Ms>U|*)Cbxd*-?axE= zh%zi9pJ@|+XPN?|e_shbe4XZrV7z7YSAfk<=D8JlhB~nQ1PQhCopn{AN6VFuVrP`j z?ofWc3Q^OYWX6<<%DFg!!JB5mrir+*8^yRU|1~BM)xx<~cWQb9^ z^@q*?CTk);k5$TaRWX{SQ$#7e3&B6f)MjgXbp7Vn@Q!Bxy|xmM#c}+bN|JT!lk~*- z#q-7R*L@Rgr4@opQj&Eg{sTw+6~{b_?+0IgXhDepYUpnW9>d=>XaB) zvjhjM+@lW9?)kH`LL|Qg(JC#O@A|_$?1DBGdYA;Y;^(}lp}h(H*97MrtVwf|Wt@F? zH-J;qyj_4U`!1O(HUdW--G#!2MRB45o;GAD2H)|K=hTnS-u>26{;JEEMPU^*K@P!d zdvdxQ?r#l0=1^be__-!wEUy#je)-xp0@jutHT6`fTR%TW49ilh99uSo5l)rKdnj$| z-&At)U+jBeZjeAwcaC4nv`ndML4M}7A2CoWT*yfBHrMe5*01`k|MzwGD8!BN3a*oM~yZ<}X5vdmwD?KYc78mK&M2vE7Tc>bz^| z(zFEy88pvATKpKT4)4zQ4`szk{JbC;~A=(ZOAxlYc-ygSu-zDu9X@5PKePTgMTJyRcaOoheOGtUWL z4v0BpJz=9nK`o);sk_NSeShbYqdu_8ekuykAsZAApu+Y&=#wB}Sx#cjGL35v3f75^ z%4{iMf+gUk-;YzoLq;UB_?>v6cp8}sH~4C4IEJlo(8Vq;cgQx#Q-;6SwM?9&s5?KDLDdW{_%5y?nXpN!3%Usy;K7J%$7kCOaEI7P?0nAe)0b5)Gu&Eyb}s3x z1MT}HUu@Q&x+YIoiC}jN zlsUj>Ty;j)eY=fBu%_)i_+@L`D2b}R?m6;y)zix~H}>X+o%SnMoEiPB9E}Yf;__=8 zcq*4}stARdnqtKm-8ZQJ5q^SXmpGbWI6iRKheZQA_(#5!4$$!BYd?VfSp>`FMfpQ*E>UGi2|_%t+J`cqrZD*&n%>{lh=AOd;ZcSz36WucHVIOPm@Wa z9Wi7XN3&+;O-xBwn&>)|z33Sf85j3ZSX`=Ignnju)z1D+NE%oD%7*SbdyJX3dzKgE z3W^h!#0b`A(vE?w#v3Iiw_C=3t_X|R&oCq~yoEjdqvY&47~b_pI;5=Uh-$REMY|Tw zQrE|Xs<;aztO@TuN0YD*AUA^TJxP8brZiaEusnOB~#j6 zJv|;ATb$a-inDAuOBjxb2ejW|GPTVWgbTs1LbrZ#W7Gb0T`zR)-Mec;cr(Eao@XNx z$lf8YU3>fa^Z7>aS%o>Ae009$U6N&=LjqUqJ@wk{{Z9C&cDP1g+mn7n^>4I=$tYCU z)qQ&s6}tt*gN5WN2}lMMY=0TbFT%2mS(8+JjV*k4dSTFZpMJ%(d%0XrM1=D4v09$d z9}~<-Y5ndZ$Ql?|EnemWvR&shZMgk?XGO}aHjEhDlGwg+%vO5A!WQpRNLU}u_RvBO zaM!Wp?;xms$B8HT$J;6o5YgH(T>nUTd>EkCyKDPxCiLENF}+`c&+q`_`LR$ zlj41cJ%;5NDGiMbcIp)9;~PKpMwP5*!ZtJ+lX1uM^C2rw;%~)Ada^V*vrTu&v2~Uz zX5}2lXH@0`p~jJl?^jpQ*UR54QfjG0m-{oX4FSUQ9nwdU3&^w3*8Rel6o5!@4}O1x zSCNuQ*7B&$D~s-G_=&BEkeXgT`HJK7Q}x81?EA@UZHkbx`YnHy%V0p@rLf6F}Gqs)XnNC`_n(ZJ(|43auta$)OJLo_d>QiEx%XZDVg<@ zxZl)US=sE&W$We`Ooo#gl^82fBYyiBb~9Qz@`GR3JZ{KhsrN%2MUcBGMjh4@glMJ< zL}0Gj`!b6kvSR8yMsZuMWzeg&DZ{1jCK#kHP_c`bMB#YLKvYWu>hK4bAw9_?oy!Z& zqQeaUEv>?YuGi-0;-s!^rU-2#y_1lGE4jAhf>t$DkP_&9lB!vP0u1|qauUoar3k?} z<$y8!x)GICRRM3^%CgVXad47lnoJ+e%6@!atFJ5zK{0j@J$IfBBL2m1N6}hlRMOVU zs>f%8-a@bp6d14FSg|QD`tj5{I~j+lCrdzBBfT8^$r>xhbM{2Wfvy~V@}kbX<-}6Z z`YJVBes3#YHcyfq&9KP;(~T{KsHw>+x7fH_eDRDw896#kTP>WvH5L?H++JFQd$Y{H zI}w1^{!ED!-}%RMpEo-^8)L7!fDgKF%W0|jb&v#J@n6OttGJPba(vo;uEJOsc7}xo z(g&V@>En?5ZO3ZyI~2i@Zoy@KN^9aQp&vYQzl+qd>z>?JRTjtRfL3=`uU^enXtgk_ z@@=|jFjn@V#H2p2WMv>r3gk=|K3Z1Mcu>=IeBe)+q-^+>wFbE~T}<+(7Ci!BS9y?v z0uL!Hd7eht0zWGDPf=vlSy5KkaTGHpD$`~ccn&X0nMb8n7b`J%#Yid1mt-p3;(Pxxg|>EFF=T5j=)g6*2AvdE*z zUiacoo1$`SzsCQ?nj8@RB5EEy^QE} z{gzkSVmrz!d=X(U@_3mruFX3)5}X;t+^?|7gyH_B<4>g+v3NAVMPDEDHfFeI0as;m zK9XGL_CShtjFCrDCm%hhU%wPe5$Sp8|Bb zxRwp;bRO#4=$?oN-+S#oj7Y$2YBA%@l#{Y5`=g)jwt^}V6znoByW}Qmgi%E_j|OCt znY~MSlO`W+drowPN=eO`EUl9Tww1Km9_MGWhtqzE#L3DSeD;vB5*+cC8B8ws8Vfck z0%ED?;m;rart_!8$Dy}wiP~`UbPm6zgs^`~R6KNV7hI2D^z=5$#*`LQS8JvV-Svzr zi(Xh=^KYw_780|ygrzn6~{G@_(+{fR~6uCpq?rQ_rH za8u`5Qb=YWmug_Q1YV=YO=2D_j$t5wjjpjL{Yf<%b&DpN!VKzIyM^2gwYJtt-1O~nb_<+@u_Yb zK{+Ywfk3`?KZugHnJvq0vH6}_3~*;lo_zhHawGimEeAb`ERhZ6Sz4xUXC?d!94Vi9 zHB-W2vpzzY_tt&Kgy9i-9Sg?JX93<-GX@l@jktrr(4AN|duuzbmrDZHo?Bu0qmH!iH9(Y?EkA zBY{R6A0a?8zIv7cQJQ_Y{n;P6I>o+i8%nNnjb=@M%y9+u9ph^E4*m?GMcRAld?HYf zEPka{J)3O%-F`o3Nye+~#BgTS?Y}$hmJa90M;5l&!n;5fMf)VBp zv_^TUo;)8g^v_Y~r`L(OT6+s$rJ=a8pISW%!$zVkj3n7(x~)*77=dI?MHM%erF@rQ z{UTc4>4SH0`vHneE+@m2Qfblpyx3Ihc&#O2BBMmt&Dl@-IBE#@&Ac~%04r<^PpnCX zTlcK-Qe@a9$OU8R~1!NcZ_I5E11x@(^U87`mPRe7V2En_!VXBExWv42?v znK+TG#Z&~-_)0d-4xJKjXyDrPsn}6Z^aFZGvsLd^kM^k?ozi!^px$-(4u1aPm- zXhZH!T;+629KB>^iju~lVydPf%!GD|o8#Ou$sbubqpty(tF#D^)RyEm#!oMeA9qHZ zyoy3ON-TL5*~6jM185Oa1c*${+Wjtt9Up_^I7JO$iP7>aB3lwB<)81bNco_kqST$k zZAO3%$RJ5lNrsbf&KB|IGa}+|rQ7$2A`@c=0fE7c_)~b#wk1FzMUKk>sxfFlhZKN0 z>FTbeOll^-r4T;B#`sj^W5Re)0?Ltf)~oaeTte#rT4T)^k^g&jNi5AzelC;DNia87 zHvG@75LR!$`g!e2$jvhiUhzW|9F;cQd3V_ZarlEVGehG^{i(cq-v7*?Vyy2}Sckd4NP^!sfK_jd_zN8)hmhe9)SPvd@MU zV|BDAs+!1_!;cgfY7P5+R$aqj)grlworD$dq+_4<5lohhi#gYc#gK-7R6t1zc&6X{ z0^^oqe$0;F!i3}eBIrnKh**P?@0VG2qdzn|TXA~8vQSf$&iTa|QxITeQmZ|hTvvPg zJl4mKL9)yy4pzdiAmS=BZzr&pu8(MKZQXS#te7p3N`(irPidZXSP zS4{ltA3I|Av0rPTWNm8`nGyv!a%tOop@;r;;W#ij+c~WC;#krnLXm$-0{-Z;G_Bt; zVJwDxyxmxS7DW2)n+}<>P-Lxn%=6D6yK#y9p*14#8x>ZiWs{rjqN#^|c4M*rpgAaJ z#gt{~<_&`|xmVZ`dzpdzMT!Pt+ptWgN%=0ti{BU45~TD;bai>PUD6j(WEraY*|azf zf?d*$OVI7}kr>UTqKfi%=ci_VpXS;^>083Da;yTMG#L64yvz9lfhQBsAYT0sr+`xA zeyznsaB|&`35*l~<5QVH;^laoUfl;){a~FZHjrK|u^Fcvld$gW8%|Th9M5)lNM}Is z5WVl_26-0Ov&X+mhx4MX_xVwuFNdI^kc9rU(SxO0=n`%80Fi8*S1UP3tM0iQ|T72Mylk z<~i?p%wsjMp;a9fA05uDcKN)QGSEjnR`W64o0{zuO0|AWsOh0qS`ddSC8M z>x>^1>i67tZaITqE$O}*{*4L_Jv3QXhftCWPr@YL=ybMcB6ypj` z$IfGw!uJLtM5Xja-OSh~d(s5*eZ}IB5r6FHL#FnXj{Hw-jRjBHK&V-PN$W%#)crAb zx~`;lWBn$ELYqw{IP(3b{Mx*n7ZYYW{2~hrOBHbKBGE+=$vwy7*^OnQCkSTBsb9h9igzXO_VtI>O4I!{yXbkU%df~+hM=U*PMElr2C>d*@xRlj}uCBp* z9fW1pXvxn0_b(maD|D$rE79}p?1fRB2IX4>u$IJ(x|~q%u*uZP%pTHrmv`m8zJZx8 zCbs&6d@@5;kqQuXi7yEQE~|yrDW*0+U{xlNUyNY)w93GfU;?%YQKJ+&sT=UFmJ;<< zy}5wCP%8(xj@jvtMIC+M#29z)>pawq0N1YlP@59Vk~%lvgkv3CV}9WypSSm3WDf#PBrRRe^D`56FZR*>f6F{S-38nQfn)@qxnr7ePOZ%i+qRJCXf3KPYYC9K z^(4OMpb@sAA!oa;2n;}1dA3^)I5CjxA`Er!l0}t&J3faKe@A{d9Ki~bY>Zw_xawlw z1V4!ra~VPfl)IO5X8ub~qleR$6kXue1K;4e9>T;-HGJWxaUK1|AZ4??s2f%Dl}jq7&ZeMy_Ux9A%^4Hv+?e0i+Q zfEF5mbS?k*4b?{2%I02^=7@X88J>m@h`n_Ll;jQs8*f@;+=W%AK|92w_blq@AY0`k z^;c=!@4VV7p9E-CSV1>#HycyF$tFgkmm{lW)@3zTVNS{{>7|CX@j>>qy1MF%DbCLj z^?I@C{CQ6^-|2S|X7c0`ZScfvY=}3Sq@{t)PJ?yIZSt?7{GqcHKE9gW&M(*1KU*%! zLT2#?{++&o^ELHLCfV(@ai4R#(gAC<2AV4-wjEL&+7~k}V$e*wt;iW!pZ=60S zgbcw|><*@~?ofTepZyzw>Q2vjuu@_wl+sgMJalDD?2HHXxv-52N)5i+yq93o>a%_+ z`4rI9J+OGRMc8A8>t$p;&4%nSt_vq&LpXvq%@!RA%YOb#MbTsm&P$!5Z>RkWFrhbJNDL(2!H@ga#)gd>X`CLUsxFT*VEh30PhIn*I2fV2x9+EdAsm&Wqf>o!o};|1lCSr!}?b z7A0>uVtEyTC^(D@(`)&*y0%tR#;#)7dvZNGHMcs?6CwkF8-lTHvr{fjm+R?Z+4)r4 zw?dD#+4PS&7@?Gg3D#uqZeuiFHb9YXp>aD5NAYvEsps<2Gnn>mbPRqxa>2P+Lc$Sa zW)%9rgMz41^aTyR;14fkm{WzOPiZtY(sq)ox+W^qNI!phkT>GLwKC!3yY%aZpS(nt zx7>}ZJbwcxMOf(!O1_JVS`uM0Ea?yzgQ3$x8X0eXELnbX7R7@!HavKCI$Hv>_YSs~ z{2!5rk2GC4-TF2qH2HJAR$|5F?=@s`u|7$`;g*k;yoi35b^DT|Xrd@aX6l*hbn#LM z88`KDz_d3uX#ns!V?pEPUBi7r@X`#X4W3?V;`1)NdaArw1Y3uMhVA8#L>MCwhR`Mx zm*&<2&6^>6>8#HjPlC!JNaK3}(d?p^(9sD!{qJSgmUerhx^X`y4CwR?}iOlXmRnJ zL~>by5f7jrPI7*Q%z42o=DGLbQ87Sn3Rqw6(A}#bv!#YkBwP(9fvyu&>6KQ!t{dlF z=Nh<8DrXZ@WIZ`XHqjQT&gpEQ)pN)L18GBvNOOcl-A41fe<*pvx`YqgUZ!*UD@daI^Vl>fLh$wycHmPkFni)wk zR_KpS30H2~vEze`G3FC-wac8ILzbk1+Mc#v@S0(mRAgTXC7waGqPe|PeSVfnEXRUW z7!jhugTLk?S~VWTl-&&;?S9`}M_yl8o_HmcR{{75@mIq`)IC6;K0AE}6xLKk*V)WI7_j1Hwc&y53!z)WdF9wX%9g?HgST55FD~EW!M<%i3 z@q9u2g^K{vo=LaKPLryn=DVv1a!d5fABzkt^pW-FMO3}k8h+~k=jF(nKpM-QEy%Fp z!l3Q-ns3SfMh9g;j|_)N3e&Au0K7jC#Q*KAM>oNw|D`^X;J4$%I=hPb;jiKnPOo_8 z;FMgY&xQQUs8!q;`9(lXVQ}fs9s$HffVd z_++}+r5K4)`b=_`bce2k&$xsGz(9xT-HqYW?-LtkORk)pxJpRvHyL?cxjW0l!?^)s zkz_jQ1{DoMnc|VB`~1)<0fCKJn|E2@9m2WOuE2Ud>=%vCmp5-8_39ZFelr+B+hh1X zSS~k!D_8MU6NgpPQcMb=TksgAR=eMZJU~>-DyFPo(2k@DbsJT(%<>p7R1;%X%{)_gAGY?3G>=}1YQW~M@#{nF6(zM7 zVbw)9z-sw&$ZG{QLW6I2Qo(s^_XF#IeL@g(9k_R8Km{4UTz7vj9aHC28o1yftki4B zwixiwlS*`Cbs2r&{9D&A&M0vDcJQr{9s)!nq9k0{uSjk0bMx_CPgsfQ26Bk>Dmk`v zft)}$_1bBtoKviS`NVhcOQ@fep6YbvPQA|BWxYv`hC6?fS$W1$eEqv-?fT012|D@p zyz|xMRYOIa4Y`N`WTHt<&;V5-xJb)wQC7K1vC7a47&rZkwJ2?LTdtcaBTgY$bV|Rp zL)x@H(~`1$-tJcA@(vV!S?#4cO{o<14NgNoSh0!o};+x^Q>~JVFVLz==S_5xJTv`NS2UsDUWQ5 zQzvKwH&n*n#V58S_naU1_F>YrvScI`cf9@j8}3{yuR7|a^%~Tzy4k}NM_!d}1w!Ga z`|Yo^Os|J;QPnP9rfSdME53jBT%3O2bD|$#-hImt|JwJ7nPEeYN=Np90w^DynKh+S z7x7q~S0?3g{AmUZD-tv!#OaQE z-*A7v25z)1_OhgBP^T5v>b;hV*}tcAwR1TS6QvOgfnSqP z%wtN~sy&Q&)TP&Mt&d@KHcQ(KM9Y3qS z*Y1)UXjkwF2?^7(?cI(npdpRoq*i0b^2(@iY&i9Cvh8(dcs(M0zTQmp%j%HvA5=_a z5;-UF@`mEUuMH8T@xGLMKcuN8=s$)EYPL2tZbm)%>91nrd^Ml)6=lbd9EBfJv0oiy zhESf$cE2vQ!hH0Z=xsJ=`W6S`NV=d&J{-*4sTD;UY5Bx{N>-$|^>n--A)u+8^@Cko zj=t+h++Mm8UIqDhtOzxwSluP!8DP@lv=7M`q9IMI4q6EWy!M&oZ9R)Xk6f`_TfK$8 zXVfojzEe`u;%wa07fejpT^N=n(ry#@oVon(6L|8mH`J;qSe@ zeCPKeY1`agefijtW!GPI+}y1yIbmERk;zfOFJFX?>1Qtv$o^qqmRzG4MIxn3+SH^* zP)j25+Apq$@^Fg8x0TFI_{>nDIVpA_J>letuRxCe#{A3}y(ypj>+rB3Lbvo`_war` zxpFHF_a_LfA#Kqrd}geah~j$k&?==A-f-<3`Y}HWBa?ZA%^&G8#RGM`p||%LW5b&p zLE;o%v_i;7Eb+rLUw8&2vB^QiPvbpE`PN7aD^at~E~m07Ffs{lQklAWC3b!v#kZov zPv)IIzL2GaD2v+dmaB#5Mv9yZ!x9>q4aTs(dhjq|A zkmcH8>}y2l+qX8#_wi9c`CUQY+?VR+(ZIUi5kgKzkD!3~ZK`O%OL~NBBQCS)oG~xOPSEtfRf|ofWi9;cc&?#npZ8@S_ zWYi$<&hSU=p>6-rrq7~61-})8LpiN$E?7SOlli1!$vQ^Yg!cR10BCr7t@$Sqo~~+I9M#4A_ngEJQwV1_1k9dLffUxCwZ!^TCuu-Mkzud zopus4RZRS)&3#yWHrqW$^6`^xnV506wLJ#J`0km;N~~L|v)YanlbTrf?Own`jp~?( z&n#UJ6C3OrJx>?`*NC6E-6-T@BAY){`>?I|ae$!t)4%fR^2EOcW(W-2RHw8Y58ET9 zknYYS&|ajamw}~$lXURUjz^R?Wuc`M{|{mnW^8qj*5>*y9CU5C?Y+}|U%vr=kjd%i z%7>+W2D13meu1A-nr%{mn~F-W+jK78BW#+Qu$)&k865mtRmw64Q3U0_GZJo0LpN#4UTR4%}b7LkL;zA#fBjSM48b= z1eIw~Dluo5)m-;It4C8tCeZ;~AUyxze)F%(FH158iyA-}HPuQ-!4mPQKfgP@l_wrr zZdTx_oqvx{klgd;K#M@$DmakNJdgj+884wCyaFKXl#hcYR6difwNcq_W(H#Vts6-- zt@DM2KI(xGC))2)G;1?s0Y4MAt{&vU)!$oMbFBk1z4{+BB8;Z-RPAqdSv{9N6?WFm z=4eo?G^$IjiVj8|PaV zw*Lw&rW>NM!Rddl{=ZCF2=@G}h-c#_SdYKIWaj7|jT=^cT!2Ce+s3m1(vc$&$OOp{ z1$Vaja5`Pg;8)CDaU(nE^-8;o{+tVPp*-s>I^TdnkJL4$7K}DY;SW^VS>)#L4%EBg z(LH_JbBPt9&5<>chMZ#{h0ev#aXeP6lDyDUl0^KJA(DhutK_2>Lmo_jiYGL?$uwvb z-~ERnBW_{wSHumJr<9%@8n&UElX)Vzop2}X|ET%dCh^$2qY+)})P_kW|aaU<%UPVyCv)4lPFtMZ9z9*aQJavOwpit zOTV~Gz{<3aeC(|(14Vt!XQP|P&3Cr}K!!PJT*)b+%N*J?dAU!5EE% zM@^MMppjy5!^$}70g^9mDb4m2oz)hbs}?j*WfIQStdd6Nbe&`R_M?1@o_=$WXs+zC zrEZ@rZ26Ldx_XNNLwPmF5Y7D8_}A36!|!C2~@FJlFM4X?eO8T>s%K>D#?z zu^P(Jad(M+QpCmk&fjL;^Cs`SBVlD`KO?#fZY|Zp)IOSPifS+0{A7Bt*@~{zW$soO zwk3)BV9+JLYLO2(3me!XRhGnGO{EJh0`xyBOC#LICHR3F`OpSZs{01{HVeS^*7i`L zf#!j+8-3nY{C@IF9M}RX4+x$p-HwruZBUjvtSe{v=)^2o*6~&*t4qNm6r>5AgH>;T zVqv4^3uu&g_+&v6CKP<~-AOq|C8jvd$2;R|(jcau=#F1vR8kYE5S>B)7gle72k)q8 zp606rYj=}&{aH{!U@ZQ7y5av1#|Ky%x|9cC7RS7MR#u`gpbs_pbK(U_S^HiE0QiNz zG2R^b#mq7dTL-keKb_CI?}*TLX=W4xM|mK2QlCHPl$L=lf-PyLO%&Q(hB}mLyKlX@19jNY?Ojz=9d|I(cW=o=#26ZSyXN9sPsT+ZBX%R*MZ5^ zaVSL7`y9i4_0~2+-xDFyp>KFy)(VGgk(S`>?V_FCTbC2pBwrqaW;Xg9;j$E6ik;4= zkfyz_KOKUd&%`7@&QDtr#X`&c%OC2>3Q_q@GU9709)^AScOUVq7;m#2?_4b!ut>eU zI%2c^l=d#0To6%X1hlFdO&M--^Z;_5XuRUo#OiACMu7h8N(H-Yu)|oPH?}1sbbG5$ zy$*yPW_g3EJHfdd*}5T$)P96lt zW@e?Jv+*&r_T1fFRw@JKJ0)^_dXpd82lJF#o=z+cP*!}p!+dhoh z|I#jRS?fmoOF&wN6N=ItXcOgdi|-%48WM3Z3kUwhqjGCJS~qw{4wRIm&fRf*%lYKY z9pyfzB)>^$cBx&T3O|d2eE?ccS%46av%;q3!r|SWWl}7m@F=o}NC@h9;r2;ZdF;tj zWN^LfK|vz8K?1tX=O>C@(;&?wI`$Djs#6_`O8f7!C`R_oL$!s?Z6Yd4f>~UP|I5~V z72L7zSmaknn34f`x^ud)Bu2vaGXV+@EcgfXeElYSE@vU-l4*tGXk!ajXE@BJ@a-## z%Z#D%SN5fsOPb4HHAcH>=t74Bz;*=|>$lfzZ9J#m7@Yx7u-Bo_%d0a4DuF2zvjQum zjIUsqf2jd4l1up*za>NE5=HwRN$`FX%yoRo%dt=MHz;@K_Ump~SL(P0#y%8vsD8EL zwL5=e^9~ku(5)Vpt!RzPldq{ud^cXcR)2YL{3UAtwE^DhOg*`q()C~aDxj`? zw#pB7!YU{Oyex7UCLbwil}icmyht2%^vUI&LZ2e_?SF{_l=#_9f0&vxs_Wl?&U{upoo za9#Bk|F!5J^XFVRWfF1~P1NN7L$lEXUnKp6tGoRtzoA);Cn=Uh=cJ5BBW|s1f zdT3H%Bg+oT78PRF$S80tL1>KRz)8r9g>Ic+zo&fZ%>q2VDwuk_EKb!-nFd=4Eg0#M z)5jKn4=b|;sp0${ey6!Vz5U6IFEo%n*wry}NLI(^-y78IkWi*7%F3FFW zJBle}bLvEvx3||bA~d<2KozjNJd8uSJkb*sn}TjURmaf*yGtp^DoQrVtx=F3 zgO%}T3|Z(BDdT#AjSM2*?bR0C(V(Dk4h59DBW0a#RQzWhj~FDMcX!7%HZ-JyQc~2m zT{O2>kGzFv2wPlzD9iOL$}$340*-kc=()dwn~p$m+lvUjoi3j)rbaf?x_5?Dt|S49 zGT=@Xjt6$A*SQ}wWyEK))|)BIH>Z(>Mo`Oxb?t?#PubkXEi#2-C;)>MoOjwl{!eLM zW>w1wp(|C|i}mC;zz@xjueIcbZ$FpyCj*M;Jq5yTMcB3?G0w`QYe1nbWYEIYa28*_ z*+iZmAwFBwVh~j{jIBcQi@TN*;gn2Q)SOo6-8u#-S#&stXbseosAMrIWC=(|$sK<^ zzJ`nILG7@4IN*U?`H?({QKu0wI|E>2$6PO7(5=N0UDIp`F+moVAZ#3u+gYr7KGmx2 z7VXLR;&W$)E%4U_`oXUWHAhjtC|t11(Hndm+uWMhIvHDP@6OZ%rsMQ^%ehPee$q+r=6|M-H9)02aP z@*-%j1vsx*+;tz{cARMoC7%um5guL>=zU=24Hm4Y{#pRApGa_gV5il}A17 zo%MS3N4FNpjrmU5T)%ff#6iJG7Y72+PF}m3Gen`Pn{116RP0j0?AU&~FIr=(t~xW7 zgzDf!67`L}mIzDjKot=HzsA+4GS%y~*X{AJ(Gd-=t6=govdD80NiT-7B1Mu`Ok zD(o;sb za^I(M?O3ZCH_8gVcaF@EwH6yq!WJMrYAS?W(M;2gxPj|Nfuq8sCVnw&P1V{CtJb#w zosc}$ye13+)9;tG%=^&Qk4I*7o#A6qBlUdkgBZv2mZU_oNg2Tf&nM`OyVu zqRf~8>fn^VFiM_bK{Pw8bbq+nqk!St|vZ=}vGBLSm(qhCutqC}U$!G#U6AG-|MeqGE7C-^Z9 zW~}!wadm@!-TSq5?Z=1gPkBeOmr|j^Mz%KhJU!OJ4sPrM&^lwmF!DOY6Vy};Q1yXvY8 zY&xbfKwABu%L0`H$-mQ>VKsK$1wUi{-9H$`&i?zaAXbd)VCb*(?_N&{ql){kT+CFq QI|KhTRCJXq6zwAa7fi^U1poj5 literal 0 HcmV?d00001 diff --git a/docs/new_schema_repr.js b/docs/new_schema_repr.js deleted file mode 100644 index 8aaa50e..0000000 --- a/docs/new_schema_repr.js +++ /dev/null @@ -1,252 +0,0 @@ -"#schema" [ - { - "t": 3 // type - "n": 880 // number - "p": 0.2 // probability relative to parent - "u": true // unique flag - "d": { ... } // type-specific data - }, - ... -] - -// ### d for each type - -// 1 float -"d" : { - "min": 0, // some stats - "max": 1434933322, // .. - "avg": 1002484, // .. - "med": 998433, // .. - "v": [ ... ] // list of values in order of traversal -} - -// 2 string -"d" : { - "min": "a", - "max": "z", - "v": [ ... ], // unique set of values, order by counts - "c": [ ... ] // counts of values -} - -// 3 subdocument -"d" : { - // sub fields are handled in the main structure of the schema doc -} - -// 4 array -"d" : { - "#schema": [ - ... // array introspection, distribution of elements in array like regular #schema - ] -} - -// 5 binary -"d" : { - "sub": 4, // subtype -} - -// 6 undefined -"d" : { -} - -// 7 ObjectId -"d" : { - "min": {"$oid": "553f06eb1fc10e8d93515abb"}, - "max": {"$oid": "553f06fbbeefcf581c232257"}, - "weekdays": [1, 19, 23, 4, 6, 43, 1], - "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], - "bins": { // adaptive binning - "size": 86400, // number of seconds per bucket - "values": [14, 4, 6, 23, ...] // values per bin - } -} - -// 8 boolean -"d" : { - "true": 48, // counts - "false": 13, // .. -} - -// 9 datetime -"d" : { - "min": {"$date": 1434933322}, - "max": {"$date": 1434939935}, - "weekdays": [1, 19, 23, 4, 6, 43, 1], - "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], - "bins": { // adaptive binning - "size": 30758400, // number of seconds per bucket - "values": [14, 4, 6, 23] // values per bin - } -} - -// 10 null -"d" : { -} - -// 11 regex -"d" : { -} - -// 12 dbpointer -"d" : { -} - -// 13 javascript code -"d" : { -} - -// 15 javascript code with scope -"d" : { -} - -// 16 int-32 -"d" : { - "min": 3, - "max": 883, - "med": 145, - "avg": 168, - "v": [ ... ], // unique set of values, order by values - "c": [ ... ] // counts of values -} - -// 17 timestamp -"d" : { - -} - -// 18 int-64 -"d" : { - "min": 3, - "max": 883, - "med": 145, - "avg": 168, - "v": [ ... ], // unique set of values, order by values - "c": [ ... ] // counts of values -} - -// 127 minkey -"d" : { -} - -// 255 maxkey -"d" : { -} - - -// --------------------------------------------- - - -// Example: parsing these 3 documents ... -{ bla : 4 } -{ foo : "hello world" } -{ foo : { bar: 1, baz: [1, 2, 3] } } - - -// ... produces this schema -{ - "#root": { - "n": 3, // total count - "v": "0.7.0", // schema representation version - }, - "bla": { - "#schema": [ - { - "t": 16, - "n": 1, - "p": 0.33333333, - "u": true, - "d": { - "min": 4, - "max": 4, - "med": 4, - "avg": 4, - "v": [4], - "c": [1] - } - }, - { - "t": 6, - "n": 2, - "p": 0.6666666667, - "u": false, - "d": {} - } - ] - }, - "foo": { - "#schema": [ - { - "t": 3, // type "sub-document" - "n": 1, - "p": 0.33333333, - "u": true, - "d": {} - }, - { - "t": 6, // type "undefined" - "n": 1, - "p": 0.33333333, - "u": true, - "d": {} - }, - { - "t": 2, // type string - "n": 1, - "p": 0.33333333, - "u": true, - "d": - "min": "hello world", - "max": "hello world", - "v": ["hello world"], // unique set of values, order by counts - "c": [1] // counts of values - } - } - ], - "bar": { // note, this is inside the "foo" document - "#schema": [ - { - "t": 16, // type "int-32" - "n": 1, - "p": 1.0, // this is relative to its parent "foo" being a subdocument - "u": true, - "d": { - "min": 1, - "max": 1, - "med": 1, - "avg": 1, - "v": [1], - "c": [1] - } - } - ] - }, - "baz": { - "#schema": [ - { - "t": 4, - "n": 1, - "p": 1.0, - "u": true, - "d": { - "#schema": [ - { - "t": 16, // type "int-32" - "n": 3, - "p": 3.0, // here p is equivalent to the average number of array elements - "u": true, // this indicates that it could be a set, rather than an array - "d": { - "min": 1, - "max": 3, - "med": 2, - "avg": 2, - "v": [1, 2, 3], - "c": [1, 1, 1] - } - } - ] - } - } - ] - } - } -} diff --git a/docs/schema_spec.md b/docs/schema_spec.md deleted file mode 100644 index 2df4e9c..0000000 --- a/docs/schema_spec.md +++ /dev/null @@ -1,387 +0,0 @@ -### Scout Specification -# Schema Representation - -Author: Matt Kangas, Thomas Rueckstiess
-Last Revised: 2015-04-29
-Status: Draft
- -## Specification - - -### 0. Definitions - -Whe talk about _documents_ when we mean the data stored in MongoDB (a collection has many documents), but we talk about an _object_, when we mean the JSON representation of a document. For both documents and objects, we will adopt the JSON taxonomy ([json.org]()), where the document/object consists of _members_ and each member is a _name_/_value_ pair. - -> ##### Example - -> An object with 2 members. The name of the first member is `foo` and the name of the second member is `bar`. Both member values are 1. - -> {"foo": 1, "bar": 1} - - - -### 1. Escape Character - -We shall define `#` (ASCII 0x23) as an _escape character_ to distinguish meta data members from members originating from sampled data. - -Rationale: - -- expressible in one byte of UTF-8 (ASCII) -- Non-numeric (not in `0`..`9`, ASCII range 0x30-0x39), because this conflicts with javascript objects/arrays) -- Not `$` (ASCII character 0x24), because it is not a valid prefix for member names in MongoDB - -We shall then encode member names as follows: - -- Member name begins with no escape character: -literal member name -- Member name begins with single escape character: -encoded metadata member -- Member name begins with double escape character: -literal member name which begins with single escape character - - -### 2. General Structure - -We define a _sample set_ as a number of MongoDB documents from a single collection. The documents may have been selected in random fashion, but this definition does not impose any restrictions on the method of acquiring the documents. The documents comprising the sample set are called _sample documents_. - -We define the _shape_ of a sample set as aggregated characteristics of all members of the documents in the sample set. These characteristics are further described below. - -We define a _schema_ as a JSON representation of the _shape_ of a sample set. - -The schema must be strict, valid [JSON](http://www.json.org/). MongoDB-specific types must be converted into strict JSON as per [MongoDB's extended JSON](http://docs.mongodb.org/manual/reference/mongodb-extended-json/) definition, "strict" variant. - -The schema follows the combined structure of all documents in the sample set. This means, that for every member in any sample document, a member with the same name exists in the schema at the same nesting depth. This rule applies to members at all nesting depths. The schema can thus be seen as a superposition of all sample documents. - -Within the schema, the value of any such member is an object. This is explicitly also true for leaf members in a sample document, i.e. values that are neither arrays (BSON type 4) nor nested documents (BSON type 3). Every such object contains an encoded meta-data member with the name `#schema` (note the escape character), in addition to potential nested children. This meta-data member with the name `#schema` is called a _tag_, and its value is an array that contains one element for each [BSON type](http://bsonspec.org/spec.html) encountered in the sample set for this particular member. - - -> ##### Example - -> Sample set: - -> {a: "foo"} -> {a: {b: 10, c: true}} -> {c: null} - -> Schema (with `...` placeholders for the tag arrays) - -> { -> "a": { -> "#schema": [...], // tag for a -> "b": { -> "#schema": [...], // tag for a.b -> }, -> "counts": { -> "#schema": [...], // tag for a.c -> } -> }, -> "counts": { -> "#schema": [...], // tag c -> } -> } - -### 3. Tags - -While the schema object itself describes the overall structure of the sample set, the aggregated characteristics of each member are contained within its tag. - -The tag array contains one element for each distinct type encountered in the sample set for the given field. The order of this array is not defined and considered an implementation detail. If a field is missing in a sample document, it is treated as type _undefined_, and we use the (deprecated) BSON type 6 to represent it. - -Each element in the array is an object with the following members: - -- `type`: integer representing the (decimal) BSON type, unique within each schema tag -- `number`: integer representing the number of documents encountered in the sample set that contain this field -- `prob`: float representing the (relative) probability of this field being present given its parent field is present -- `unique`: boolean representing whether or not the values of this field are unique under the given type -- `data`: object containing type-specific additional data - - -> ##### Example - -> Field with its tag (`...` is placeholder for type-specific data field) - -> "a": { -> "#schema": [ // tag for a -> { -> "type": 2, // "string" type -> "number": 160, // 160 encounters -> "prob": 0.8, // relative probability 0.8 means 200 parent objects -> "unique": false, // the values contain duplicates -> "data": {...} // placeholder, defined further below -> }, -> { -> "type": 3, // "nested document" type -> ... -> } -> ] -> } - - -### 4. Type-Specific Data - -Inside a tag, each element is specified uniquely by its type, represented in the `t` member and its decimal value which corresponds with the BSON type. For each BSON type, this section defines a structure for the `data` member, which carries additional information specific for the type. - - -#### Type 1: float - -The `data` object contains the following members: - -- `min`: The smallest value encountered in any sample document -- `max`: The largest value encountered in any sample document -- `avg`: The mean of all sample document values -- `med`: The median of all sample document values -- `values`: An array of all values encountered, in order of traversal - - -> ##### Example - -> "data": { -> "min": 0.0 -> "max": 32.8, -> "avg": 9.3499999, -> "med": 5.25, -> "values": [ 0.0, 1.4, 6.4, 3.2, 8.6, 18.3, 32.8, 4.1 ] -> } - - -#### Type 2: string - - -The `data` object contains the following members: - -- `min`: The smallest value encountered in any sample document -- `max`: The largest value encountered in any sample document -- `values`: Unique set of all values encountered, ordered by counts descending -- `counts`: count for each value, same order as above - - -> ##### Example - -> "data": { -> "min": "atlas", -> "max": "zoo", -> "values": [ "atlas", "song", "bird", "zoo", "breakfast" ], -> "counts": [ 15, 9, 7, 5, 2 ] -> } - - -#### Type 3: nested document - -The `data` object for nested document types is empty. All information about child members is tracked in the respective nested member tag. - - -#### Type 4: array - -The `data` object for arrays contains an `#array` member. It follows the structure of a regular `#schema` tag, but applies to elements inside arrays only. This concept is called _array introspection_. - -> ##### Example - -> This array contains only strings (there is only a single element with type `2` in the `#schema` array). This element follows the normal rules for string types, as described above. - -> "data": { -> "#array": [ -> { -> "type": 2, -> "number": 490, -> "prob": 1.0, -> "unique": false, -> "data": { -> "min": "AUH", -> "max": "ZRH", -> "values": [ "NYC", "CDG", "FRA", "LHR", "ZRH", "AUH", "BKK", "LAX" ], -> "counts": [ 171, 110, 82, 40, 29, 23, 21, 14 ] -> } -> } -> ] -> } - - -#### Type 5: binary - -The `data` object contains a distribution of subtypes under the type binary. The `sub` member is an array of sub-types, and the `counts` member is an array of counts of the encountered sub-types. - -> ##### Example - -> "data": { -> "sub": [ 4, 3 ] -> "counts": [ 3004, 2554 ] -> } - - -#### Type 6: undefined (deprecated) - -The `data` object is empty. - - -#### Type 7: ObjectId - -The `data` object contains the following fields: - -- `min`: The smallest ObjectId value found, encoded as strict extended JSON. -- `max`: The largest ObjectId value found, encoded as strict extended JSON. - -Additionally, because ObjectId has a timestamp encoded into its first 6 bytes, the `data` field further contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$oid": "553f06eb1fc10e8d93515abb"}, -> "max": {"$oid": "553f06fbbeefcf581c232257"}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 86400, -> "values": [14, 4, 6, 23, ...], -> "labels": [] -> } -> } - - -#### Type 8: boolean - -The `data` field contains the distribution of `true` and `false` values. - -> ##### Example - -> "data": { -> "true": 48, -> "false": 13, -> } - - -#### Type 9: datetime - -the `data` field contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$date": 1434933322}, -> "max": {"$date": 1434939935}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 30758400, -> "values": [14, 4, 6, 23] -> } -> } - - -#### Type 10: null - -The `data` object is empty. - -#### Type 11: regular expression - -The `data` object is empty. - -#### Type 12: DBPointer (deprecated) - -The `data` object is empty. - -#### Type 13: javascript code - -The `data` object is empty. - -#### Type 15: javascript code with scope - -The `data` object is empty. - -#### Type 16: 32-bit integer - -The `data` object contains the following members: - -- `min`: The minimum value encountered -- `max`: The maximum value encountered -- `med`: The median of all encoutered values -- `avg`: The mean of all encountered values -- `values`: Unique set of all values encountered, ordered by values -- `counts`: count for each value, same order as above - -> ##### Example - -> "data" : { -> "min": 3, -> "max": 72, -> "med": 20, -> "avg": 30.5, -> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] -> } - -#### Type 17: timestamp - -the `data` field contains aggregated date and time information: - -- `weekdays`: An array of 7 elements, counting the ObjectIds created on respective week days, starting with Monday. -- `hours`: An array of 24 elements, counting the ObjectIds created in respective hours, starting with (00-01h, or 12am-1am). -- `bins`: This is an adaptive binning object, containing information about the bin size and the value distribution per bin. See below under `adaptive binning` for more information. - -> ##### Example - -> "data": { -> "min": {"$date": 1434933322}, -> "max": {"$date": 1434939935}, -> "weekdays": [1, 19, 23, 4, 6, 43, 1], -> "hours": [1, 2, 3, 4, 5, 3, 4, 3, 4, 2, 2, 5, 7, 9, 0, 6, 4, 2, 1, 2, 3, 4, 5, 6], -> "bins": { -> "size": 30758400, -> "values": [14, 4, 6, 23] -> } -> } - - -#### Type 18: 64-bit integer - -The `data` object contains the following members: - -- `min`: The minimum value encountered -- `max`: The maximum value encountered -- `med`: The median of all encoutered values -- `avg`: The mean of all encountered values -- `values`: Unique set of all values encountered, ordered by values -- `counts`: count for each value, same order as above - -> ##### Example - -> "data" : { -> "min": 3, -> "max": 72, -> "med": 20, -> "avg": 30.5, -> "values": [ 19, 21, 24, 25, 28, 29, 30, 31, 36, 45, 58, 59, 72], -> "counts": [ 3, 4, 8, 12, 13, 15, 21, 20, 19, 20, 16, 12, 7 ] -> } - -#### Type 127: minkey - -The `data` object is empty. - -#### Type 255: maxkey - -The `data` object is empty. - - -### 5. Adaptive Binning - -Some data types contain a field `bins`, where the data is discretized into bins with a variablebin size, depending on the data distribution. - -A _bin_ is defined - -The `bins` object consists of the following members: - -- `size`: this is the size of an individual bin. For numbers (types 1, 16, 18), this is a unitless number that describes the size of a bin. - - -> "bins": { // adaptive binning -> "size": 86400, // number of seconds per bucket -> "values": [14, 4, 6, 23, ...] // values per bin -> "labels": ["Apr 30", "May 1", "May 2", "May 3", ...] -> } diff --git a/examples/parse-schema.js b/examples/parse-schema.js new file mode 100644 index 0000000..42e1e8a --- /dev/null +++ b/examples/parse-schema.js @@ -0,0 +1,13 @@ +var parseSchema = require('../'); +var connect = require('mongodb'); + +connect('mongodb://localhost:27017/test', function(err, db){ + if(err) return console.error(err); + + parseSchema('test', db.collection('test').find(), function(err, schema){ + if(err) return console.error(err); + + console.log(JSON.stringify(schema, null, 2)); + db.close(); + }); +}); diff --git a/examples/scout.js b/examples/scout.js deleted file mode 100644 index 320e565..0000000 --- a/examples/scout.js +++ /dev/null @@ -1,52 +0,0 @@ -var scout = require('scout-client')(); -var schema = require('mongodb-schema'); -var _ = require('lodash'); - -var Schema = schema.extend({ - fetch: function(options) { - options = _.defaults(options, { - size: 5, - query: {}, - fields: null - }); - scout.sample(this.ns, options).pipe(this.stream()); - } -}); - -var FieldView = AmpersandView.extend({ - bindings: { - 'model.displayName': { - hook: 'name' - } - }, - template: require('./field.jade') -}); - -var FieldListView = AmpersandView.extend({ - template: require('./field-list.jade'), - render: function() { - this.renderWithTemplate({}); - this.renderCollectionView(this.collection, FieldView, this.queryByHook('fields')); - } -}); - -var CollectionView = AmpersandView.extend({ - model: Schema, - initialize: function(opts) { - this.model.ns = opts.ns; - this.model.fetch(); - }, - template: require('./collection.jade'), - subviews: { - fields: { - hook: 'fields-container', - prepareView: function(el) { - return new FieldListView({ - el: el, - parent: this, - collection: this.model.fields - }); - } - } - } -}); diff --git a/examples/simple.js b/examples/simple.js deleted file mode 100755 index cad4420..0000000 --- a/examples/simple.js +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env node - -var schema = require('../')(); -var es = require('event-stream'); - -var docs = [ - { - _id: 1, - username: 'Adam' - }, - { - _id: 2, - username: 'Brian' - }, - { - _id: 3, - username: 'Cat' - } -]; - - -es.readArray(docs).pipe(schema.stream().on('end', function(){ - console.log('Finalized schema has fields: ', JSON.stringify(schema.fields, null, 2)); -})).pipe(es.stringify()).pipe(process.stdout); diff --git a/lib/field-collection.js b/lib/field-collection.js new file mode 100644 index 0000000..6eee95c --- /dev/null +++ b/lib/field-collection.js @@ -0,0 +1,11 @@ +var Collection = require('./collection'); + +/** + * Container for a list of Fields. + */ +var FieldCollection = Collection.extend({ + model: function(attrs, options) { + return new attrs.klass(attrs, options); + } +}); +module.exports = FieldCollection; diff --git a/lib/schema.js b/lib/field.js similarity index 68% rename from lib/schema.js rename to lib/field.js index df0f9eb..484bf78 100644 --- a/lib/schema.js +++ b/lib/field.js @@ -1,23 +1,9 @@ -var es = require('event-stream'); var _ = require('lodash'); -var raf = require('raf'); - -var debug = require('debug')('mongodb-schema'); - -var Collection = require('./collection'); var State = require('./state'); -var Type = require('./type'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); - -/** - * Container for a list of Fields. - */ -var FieldCollection = Collection.extend({ - model: function(attrs, options) { - return new attrs.klass(attrs, options); - } -}); +var FieldCollection = require('./field-collection'); +var debug = require('debug')('mongodb-schema:field'); /** * Like a property. @@ -259,21 +245,15 @@ var Field = State.extend({ return res; }, }); +module.exports = Field; /** * A basic field has no descendant fields, such as `String`, `ObjectID`, * `Boolean`, or `Date`. - * @class - * @implements Field */ -var BasicField = Field.extend({}); +module.exports.Basic = Field.extend({}); -/** - * An embedded array of Fields. - * @class - * @implements Field - */ -var EmbeddedArrayField = Field.extend({ +module.exports.EmbeddedArray = Field.extend({ props: { type: { type: 'string', @@ -296,11 +276,7 @@ var EmbeddedArrayField = Field.extend({ } }); -/** - * @class - * @implements Field - */ -var EmbeddedDocumentField = Field.extend({ +module.exports.EmbeddedDocument = Field.extend({ props: { type: { type: 'string', @@ -308,129 +284,3 @@ var EmbeddedDocumentField = Field.extend({ } } }); - -function onFieldSampled(schema, _id, value) { - var type_id = Type.getNameFromValue(value); - if (type_id === 'Array') { - onEmbeddedArray(schema, _id, type_id, value); - } else if (type_id === 'Object') { - onEmbeddedDocument(schema, _id, type_id, value); - } else { - onBasicField(schema, _id, type_id, value); - } -} - -function onBasicField(schema, _id, type_id, value) { - var field = schema.fields.get(_id); - if (!field) { - field = schema.fields.add({ - _id: _id, - klass: BasicField, - parent: schema - }); - } - field.count += 1; - - var type = field.types.get(type_id); - if (!type) { - type = field.types.add({ - _id: type_id, - }); - } - type.count += 1; - - type.values.add({ - _id: value - }); -} - -function onEmbeddedArray(schema, _id, type_id, value) { - var field = schema.fields.get(_id); - - if (!field) { - field = schema.fields.add({ - _id: _id, - klass: EmbeddedArrayField, - parent: schema - }); - } - - field.count += 1; - field.lengths.push(value.length); - field.trigger('change:lengths'); - _.each(value, function(d) { - var type_id = Type.getNameFromValue(d); - if (type_id === 'Object') { - _.each(d, function(val, key) { - onBasicField(field, key, Type.getNameFromValue(val), val); - }); - } else { - onBasicField(field, '__basic__', type_id, d); - } - }); -} - -function onEmbeddedDocument(schema, _id, type_id, value) { - var field = schema.fields.get(_id); - - if (!field) { - field = schema.fields.add({ - _id: _id, - klass: EmbeddedDocumentField, - parent: schema - }); - } - field.count += 1; - _.each(value, function(val, key) { - onFieldSampled(field, key, val); - }); -} - -/** - * The top level schema state. - * @class - */ -var Schema = State.extend({ - idAttribute: 'ns', - props: { - ns: { - type: 'string' - }, - count: { - type: 'number', - default: 0 - } - }, - collections: { - fields: FieldCollection - }, - parse: function(doc, done) { - var schema = this; - schema.count += 1; - _.each(doc, function(val, key) { - onFieldSampled(schema, key, val); - }); - schema.fields.map(function(field) { - field.commit(); - }); - if (_.isFunction(done)) { - done(); - } - }, - stream: function() { - var schema = this; - return es.map(function(doc, done) { - raf(function() { - schema.parse(doc, function(err) { - done(err, doc); - }); - }); - }); - } -}); - -module.exports = Schema; -module.exports.FieldCollection = FieldCollection; -module.exports.BasicField = BasicField; -module.exports.EmbeddedArrayField = EmbeddedArrayField; -module.exports.EmbeddedDocumentField = EmbeddedDocumentField; diff --git a/lib/index.js b/lib/index.js index aadfaf6..9bd1496 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,22 +1,30 @@ var Schema = require('./schema'); var es = require('event-stream'); -var assert = require('assert'); +/** + * Convenience shortcut for parsing schemas. + * @param {String} ns The namespace of the collection being parsed. + * @param {Cursor|Array} docs An array of documents or a Cursor returned by `.find()` + * @param {Function} fn Callback which will be passed `(err, schema)` + * @returns {Schema} + */ module.exports = function(ns, docs, fn) { - assert(Array.isArray(docs), 'docs must be an array'); var schema = new Schema({ ns: ns }); + var src; - es.readArray(docs).pipe(schema.stream()).on('end', fn); + if(docs.stream){ + src = docs.stream(); + } + else{ + src = es.readArray(docs); + } + + src.pipe(schema.stream()).on('end', function(){ + fn.call(null, null, schema); + }); return schema; }; -module.exports.extend = Schema.extend.bind(Schema); module.exports.Schema = Schema; -module.exports.getType = require('./type').getNameFromValue; -module.exports.FieldCollection = Schema.FieldCollection; -module.exports.BasicField = Schema.BasicField; -module.exports.EmbeddedArrayField = Schema.EmbeddedArrayField; -module.exports.EmbeddedDocumentField = Schema.EmbeddedDocumentField; -module.exports.TypeCollection = require('./type-collection'); diff --git a/lib/parser.js b/lib/parser.js new file mode 100644 index 0000000..cbc3336 --- /dev/null +++ b/lib/parser.js @@ -0,0 +1,82 @@ +var _ = require('lodash'); +var getTypeId = require('./type').getNameFromValue; +var Field = require('./field'); + +function onFieldSampled(schema, _id, value) { + var type_id = getTypeId(value); + if (type_id === 'Array') { + onEmbeddedArray(schema, _id, type_id, value); + } else if (type_id === 'Object') { + onEmbeddedDocument(schema, _id, type_id, value); + } else { + onBasicField(schema, _id, type_id, value); + } +} + +function onBasicField(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + if (!field) { + field = schema.fields.add({ + _id: _id, + klass: Field.Basic, + parent: schema + }); + } + field.count += 1; + + var type = field.types.get(type_id); + if (!type) { + type = field.types.add({ + _id: type_id, + }); + } + type.count += 1; + + type.values.add({ + _id: value + }); +} + +function onEmbeddedArray(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + + if (!field) { + field = schema.fields.add({ + _id: _id, + klass: Field.EmbeddedArray, + parent: schema + }); + } + + field.count += 1; + field.lengths.push(value.length); + field.trigger('change:lengths'); + _.each(value, function(d) { + var type_id = getTypeId(d); + if (type_id === 'Object') { + _.each(d, function(val, key) { + onBasicField(field, key, getTypeId(val), val); + }); + } else { + onBasicField(field, '__basic__', type_id, d); + } + }); +} + +function onEmbeddedDocument(schema, _id, type_id, value) { + var field = schema.fields.get(_id); + + if (!field) { + field = schema.fields.add({ + _id: _id, + klass: Field.EmbeddedDocument, + parent: schema + }); + } + field.count += 1; + _.each(value, function(val, key) { + onFieldSampled(field, key, val); + }); +} + +module.exports.parse = onFieldSampled; diff --git a/lib/type-collection.js b/lib/type-collection.js index 66c27aa..dad3cd8 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -1,14 +1,11 @@ var Collection = require('./collection'); var type = require('./type'); +var assert = require('assert'); module.exports = Collection.extend({ model: function(attrs, options) { var Klass = type[attrs._id]; - - if (!Klass) { - throw new TypeError('No value type for ' + attrs._id); - } - + assert(Klass, 'No value type for ' + attrs._id); return new Klass(attrs, options); }, /** diff --git a/lib/value.js b/lib/value.js index 735f5c4..e2e3232 100644 --- a/lib/value.js +++ b/lib/value.js @@ -12,8 +12,5 @@ module.exports = State.extend({ initialize: function(attrs) { this.value = attrs._id; this._id = this.cid + '-' + attrs._id; - }, - valueOf: function() { - return this.value; } }); diff --git a/package.json b/package.json index 0c26802..c045996 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "description": "Infer the probabilistic schema for a MongoDB collection.", "version": "2.1.1", "author": "Thomas Rueckstiess ", - "license": "Apache2", + "license": "Apache-2.0", "homepage": "http://github.com/mongodb-js/mongodb-schema", "repository": { "type": "git", @@ -14,7 +14,8 @@ }, "scripts": { "start": "zuul --local 3001 --open -- test/*.test.js", - "test": "mocha" + "test": "mocha", + "ci": "./node_modules/istanbul/lib/cli.js cover _mocha -- -R spec ./test/*.test.js" }, "keywords": [ "mongodb", @@ -31,6 +32,8 @@ "raf": "^3.0.0" }, "devDependencies": { + "coveralls": "^2.11.2", + "istanbul": "^0.3.15", "mocha": "^2.0.1", "mongodb-extended-json": "^1.3.0", "zuul": "^3.0.0" From 821a93cdd75122dcabc6788c7d712b4e0d2f1b66 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 15:28:46 -0400 Subject: [PATCH 74/79] stupid github.app --- lib/schema.js | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 lib/schema.js diff --git a/lib/schema.js b/lib/schema.js new file mode 100644 index 0000000..c3076b5 --- /dev/null +++ b/lib/schema.js @@ -0,0 +1,49 @@ +var es = require('event-stream'); +var _ = require('lodash'); +var raf = require('raf'); +var State = require('./state'); +var parser = require('./parser'); +var FieldCollection = require('./field-collection'); + +/** + * The top level schema state. + * @class + */ +var Schema = State.extend({ + idAttribute: 'ns', + props: { + ns: { + type: 'string' + }, + count: { + type: 'number', + default: 0 + } + }, + collections: { + fields: FieldCollection + }, + parse: function(doc, done) { + var schema = this; + schema.count += 1; + _.each(doc, function(val, key) { + parser.parse(schema, key, val); + }); + schema.fields.map(function(field) { + field.commit(); + }); + if (_.isFunction(done)) { + done(); + } + }, + stream: function() { + var schema = this; + return es.map(function(doc, done) { + schema.parse(doc, function(err) { + done(err, doc); + }); + }); + } +}); + +module.exports = Schema; From 6fa624d0806170e84e544a1039529795f925caad Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 15:33:26 -0400 Subject: [PATCH 75/79] bump bson to latest --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index c045996..e786ba5 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "ampersand-collection": "^1.4.5", "ampersand-collection-lodash-mixin": "^2.0.1", "ampersand-state": "^4.5.4", - "bson": "^0.3.1", + "bson": "^0.4.0", "debug": "^2.1.3", "event-stream": "^3.3.0", "lodash": "^3.8.0", From 1e4b5238cff246b54cc838b11667e4602541cb97 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Mon, 22 Jun 2015 15:34:38 -0400 Subject: [PATCH 76/79] readme formatting --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 477c9ea..7825595 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ in a collection to derive the schema of: ``` 4. When we run the above with `node parse-schema.js`, we'll see something like the following: + ```json { "//": "The number of documents sampled", From 1b1ddf103edcca815738a1e6a0f92d1f58ed3cbc Mon Sep 17 00:00:00 2001 From: Thomas Rueckstiess Date: Wed, 24 Jun 2015 10:56:17 +1000 Subject: [PATCH 77/79] field sort order: _id on top, case-insensitive. --- lib/field-collection.js | 9 +++++++++ test/field-order.test.js | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 test/field-order.test.js diff --git a/lib/field-collection.js b/lib/field-collection.js index 6eee95c..c93fff2 100644 --- a/lib/field-collection.js +++ b/lib/field-collection.js @@ -4,6 +4,15 @@ var Collection = require('./collection'); * Container for a list of Fields. */ var FieldCollection = Collection.extend({ + comparator: function (a, b) { + // make sure _id is always at top, even in presence of uppercase fields + var aId = a.getId(); + var bId = b.getId(); + if (aId === '_id') return -1; + if (bId === '_id') return 1; + // otherwise sort case-insensitively + return (aId.toLowerCase() < bId.toLowerCase()) ? -1 : 1; + }, model: function(attrs, options) { return new attrs.klass(attrs, options); } diff --git a/test/field-order.test.js b/test/field-order.test.js new file mode 100644 index 0000000..9bdd676 --- /dev/null +++ b/test/field-order.test.js @@ -0,0 +1,26 @@ +var getSchema = require('../'); +var assert = require('assert'); +var debug = require('debug')('mongodb-schema:test:field-order'); + +describe('order of fields', function () { + it('should have _id fields always at top, even with uppercase fields', function(done) { + var docs = [ {FOO: 1, _id: 1, BAR: 1, zoo: 1} ]; + getSchema('field.order', docs, function (err, schema) { + assert.ifError(err); + assert.deepEqual(schema.fields.map(function (field) { + return field.getId(); + }), ['_id', 'BAR', 'FOO', 'zoo']); + done(); + }); + }); + it('should sort in case-insensitive manner', function (done) { + var docs = [ {cb: 1, Ca: 1, cC: 1, a:1, b: 1} ]; + getSchema('field.order', docs, function (err, schema) { + assert.ifError(err); + assert.deepEqual(schema.fields.map(function (field) { + return field.getId(); + }), ['a', 'b', 'Ca', 'cb', 'cC']); + done(); + }); + }); +}); From 1606e15afbc7afc3dc542e13f547ecb53ecb7b6f Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Wed, 24 Jun 2015 14:22:37 -0400 Subject: [PATCH 78/79] cleanup --- index.js | 2 +- lib/schema.js | 1 - lib/type.js | 2 -- package.json | 3 +-- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/index.js b/index.js index fb88a3c..bb0a047 100644 --- a/index.js +++ b/index.js @@ -1 +1 @@ -var schema = module.exports = require('./lib'); +module.exports = require('./lib'); diff --git a/lib/schema.js b/lib/schema.js index c3076b5..2ba15c6 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,6 +1,5 @@ var es = require('event-stream'); var _ = require('lodash'); -var raf = require('raf'); var State = require('./state'); var parser = require('./parser'); var FieldCollection = require('./field-collection'); diff --git a/lib/type.js b/lib/type.js index 8113e62..351dba3 100644 --- a/lib/type.js +++ b/lib/type.js @@ -1,7 +1,6 @@ var State = require('./state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); -var debug = require('debug')('mongodb-schema:type'); var Type = State.extend({ props: { @@ -155,4 +154,3 @@ exports.Array = Type.extend({ } } }); - diff --git a/package.json b/package.json index e786ba5..362868f 100644 --- a/package.json +++ b/package.json @@ -28,8 +28,7 @@ "bson": "^0.4.0", "debug": "^2.1.3", "event-stream": "^3.3.0", - "lodash": "^3.8.0", - "raf": "^3.0.0" + "lodash": "^3.8.0" }, "devDependencies": { "coveralls": "^2.11.2", From e5f1b00c1b39b20e9c8243e171253080cd1cd7f2 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Wed, 24 Jun 2015 15:03:24 -0400 Subject: [PATCH 79/79] updates from code review --- README.md | 72 ++++++++++++--------------- docs/mongodb-schema_diagram.md | 8 +-- lib/collection.js | 7 --- lib/field-collection.js | 6 ++- lib/field.js | 30 +++++------ lib/parser.js | 48 +++++++++--------- lib/schema.js | 2 +- lib/state.js | 5 -- lib/type-collection.js | 10 ++-- lib/type.js | 39 +++++++-------- lib/value-collection.js | 6 ++- lib/value.js | 11 ++-- test/basic-embedded-documents.test.js | 4 +- test/basic.test.js | 4 +- test/mixed-type-order.test.js | 2 +- test/values.test.js | 2 +- 16 files changed, 120 insertions(+), 136 deletions(-) delete mode 100644 lib/collection.js delete mode 100644 lib/state.js diff --git a/README.md b/README.md index 7825595..ec87c1f 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,12 @@ Infer a probabilistic schema for a MongoDB collection. ## Example -`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples so we'll install the node.js driver. As well, we'll need some data +`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. As well, we'll need some data in a collection to derive the schema of: -1. `npm i mongodb mongodb-schema`. +1. `npm install mongodb mongodb-schema`. 2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test` -3. Paste the below into `parse-schema.js`: +3. Create a new file `parse-schema.js` and paste in the following code: ```javascript var parseSchema = require('mongodb-schema'); var connect = require('mongodb'); @@ -31,57 +31,49 @@ in a collection to derive the schema of: ``` 4. When we run the above with `node parse-schema.js`, we'll see something like the following: - - ```json + + ```javascript { - "//": "The number of documents sampled", - "count": 4, - "//": "A collection of Field objects", - "//": "@see lib/field.js", - "fields": [ + ns: 'test.test', + count: 4, // The number of documents sampled + fields: [ // A collection of Field objects @see lib/field.js { - "_id": "_id", - "//": "Just as we expected, all 4 documents had `_id`", - "probability": 1, - "//": "All 4 values for `_id` were unique", - "unique": 4, - "//": "The only type seen was a Number", - "types": [ + name: "_id", + probability: 1, // Just as we expected, all 4 documents had `_id` + unique: 4, // All 4 values for `_id` were unique + types: [ { - "_id": "Number", - "probability": 1, - "unique": 4 + name: "Number", // The only type seen was a Number + probability: 1, + unique: 4 } ] }, { - "_id": "a", - "//": "Unlike `_id`, `a` was present in only 3 of 4 documents", - "probability": 0.75, - "//": "Of the 3 values seen, all 3 were unique", - "unique": 3, - "//": "As expected, Boolean, String, and Number values were seen", - "//": "A handy instance of `Undefined` is also provided to represent missing data", + name: "a", // Unlike `_id`, `a` was present in only 3 of 4 documents + probability: 0.75, + unique: 3, // Of the 3 values seen, all 3 were unique + // As expected, Boolean, String, and Number values were seen. + // A handy instance of `Undefined` is also provided to represent missing data", "types": [ { - "_id": "Boolean", - "probability": 0.25, - "unique": 1 + name: "Boolean", + probability: 0.25, + unique: 1 }, { - "_id": "String", - "probability": 0.25, - "unique": 1 + name: "String", + probability: 0.25, + unique: 1 }, { - "_id": "Number", - "probability": 0.25, - "unique": 1 + name: "Number", + probability: 0.25, + unique: 1 }, { - "_id": "Undefined", - "probability": 0.25, - "unique": 0 + name: "Undefined", + probability: 0.25 } ] } @@ -91,7 +83,7 @@ in a collection to derive the schema of: ### More Examples -`mongodb-schema` is quite powerful and supports all [BSON types][bson-types]. +`mongodb-schema` supports all [BSON types][bson-types]. Checkout [the tests][tests] for more usage examples. diff --git a/docs/mongodb-schema_diagram.md b/docs/mongodb-schema_diagram.md index 0205032..2969c3f 100644 --- a/docs/mongodb-schema_diagram.md +++ b/docs/mongodb-schema_diagram.md @@ -4,7 +4,7 @@ [ mongodb-schema#field| [Field| - _id: String; + name: String; count: Number; probability: Number; unique: Number; @@ -21,14 +21,14 @@ [Field]-->[Field#values] [BasicField] -:> [Field] [EmbeddedDocumentField]-:> [Field] - [EmbeddedArrayField|lengths: int;average_length: int]-:> [Field] + [EmbeddedArrayField|lengths: Array;average_length: int]-:> [Field] [Field#values| - [ValueCollection]+-> 1..*[Value|_id: String|value: *] + [ValueCollection]+-> 1..*[Value|id: String|value: *] ] [Field#values] o-> [Field#types] [Field#types| - [Type|_id: String;count: Number;probability: Number;unique: Number|values: ValueCollection] + [Type|name: String;count: Number;probability: Number;unique: Number|values: ValueCollection] [TypeCollection] [TypeCollection]+-> 1..*[Type] ] diff --git a/lib/collection.js b/lib/collection.js deleted file mode 100644 index 0b85d85..0000000 --- a/lib/collection.js +++ /dev/null @@ -1,7 +0,0 @@ -var lodashMixin = require('ampersand-collection-lodash-mixin'); -var Collection = require('ampersand-collection'); - -module.exports = Collection.extend(lodashMixin, { - mainIndex: '_id', - comparator: '_id' -}); diff --git a/lib/field-collection.js b/lib/field-collection.js index c93fff2..8ff84fa 100644 --- a/lib/field-collection.js +++ b/lib/field-collection.js @@ -1,9 +1,11 @@ -var Collection = require('./collection'); +var Collection = require('ampersand-collection'); +var lodashMixin = require('ampersand-collection-lodash-mixin'); /** * Container for a list of Fields. */ -var FieldCollection = Collection.extend({ +var FieldCollection = Collection.extend(lodashMixin, { + mainIndex: 'name', comparator: function (a, b) { // make sure _id is always at top, even in presence of uppercase fields var aId = a.getId(); diff --git a/lib/field.js b/lib/field.js index 484bf78..6b9cfee 100644 --- a/lib/field.js +++ b/lib/field.js @@ -1,19 +1,20 @@ var _ = require('lodash'); -var State = require('./state'); +var State = require('ampersand-state'); var TypeCollection = require('./type-collection'); var ValueCollection = require('./value-collection'); var FieldCollection = require('./field-collection'); var debug = require('debug')('mongodb-schema:field'); /** - * Like a property. + * Describes a single field in the schema based on sampled values. */ var Field = State.extend({ + idAttribute: 'name', props: { /** * The key in the `parent`. */ - _id: { + name: { type: 'string', required: true }, @@ -41,13 +42,13 @@ var Field = State.extend({ /** * If using shortened keys to save space, it is expected this be the "real" * name of the field that could be input by the user. For example, - * if `u` is the field's `_id`, `username` is the field's title + * if `u` is the field's `name`, `username` is the field's title * and is much friendlier for humans. */ title: { type: 'string', default: function() { - return this._id; + return this.name; } }, /** @@ -67,7 +68,7 @@ var Field = State.extend({ }, derived: { /** - * The most common type seen for this field. + * Type of the values. String for single type, array of strings for multiple types. * * @see http://spacetelescope.github.io/understanding-json-schema/reference/type.html */ @@ -78,19 +79,21 @@ var Field = State.extend({ return undefined; } if (this.types.length === 1) { - return this.types.at(0)._id; + return this.types.at(0).name; } - return this.types.pluck('_id'); + return this.types.pluck('name'); } }, /** * The total number of documents we would see if always set. + * This differs from `#count` as it is the value used to calculate + * the probability of this field's children. @see #commit() */ total: { deps: ['count', 'probability'], fn: function() { if (this.probability === 1) return this.count; - var parentIsArray = this.collection.parent.lengths !== undefined; + var parentIsArray = this.collection.parent.type === 'Array'; if (parentIsArray) { return _.sum(this.types.pluck('count')); } @@ -100,7 +103,6 @@ var Field = State.extend({ /** * Does this field contain any duplicate values? * @returns {Boolean} - * @prop */ has_duplicates: { deps: ['unique', 'count'], @@ -133,7 +135,7 @@ var Field = State.extend({ }, /** * When new types are added, trigger a change event to recalculate `this.type` - * and add listeners so any operations on `type.values` are relfected on + * and add listeners so any operations on `type.values` are reflected on * `this.values`. * * @param {Type} type that's being added. @@ -203,15 +205,15 @@ var Field = State.extend({ if(undef){ debug('removing extraneous Undefined for `%s`', this.getId()); this.types.remove({ - _id: 'Undefined' + name: 'Undefined' }); } - // No undefined types to manage') + // No undefined types to manage } else { if (!undef) { debug('adding Undefined for `%s`', this.getId()); undef = this.types.add({ - _id: 'Undefined', + name: 'Undefined', unique: 1 }); } diff --git a/lib/parser.js b/lib/parser.js index cbc3336..656c10a 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,48 +1,48 @@ var _ = require('lodash'); -var getTypeId = require('./type').getNameFromValue; +var getTypeName = require('./type').getNameFromValue; var Field = require('./field'); -function onFieldSampled(schema, _id, value) { - var type_id = getTypeId(value); - if (type_id === 'Array') { - onEmbeddedArray(schema, _id, type_id, value); - } else if (type_id === 'Object') { - onEmbeddedDocument(schema, _id, type_id, value); +function onFieldSampled(schema, name, value) { + var typeName = getTypeName(value); + if (typeName === 'Array') { + onEmbeddedArray(schema, name, typeName, value); + } else if (typeName === 'Object') { + onEmbeddedDocument(schema, name, typeName, value); } else { - onBasicField(schema, _id, type_id, value); + onBasicField(schema, name, typeName, value); } } -function onBasicField(schema, _id, type_id, value) { - var field = schema.fields.get(_id); +function onBasicField(schema, name, typeName, value) { + var field = schema.fields.get(name); if (!field) { field = schema.fields.add({ - _id: _id, + name: name, klass: Field.Basic, parent: schema }); } field.count += 1; - var type = field.types.get(type_id); + var type = field.types.get(typeName); if (!type) { type = field.types.add({ - _id: type_id, + name: typeName, }); } type.count += 1; type.values.add({ - _id: value + value: value }); } -function onEmbeddedArray(schema, _id, type_id, value) { - var field = schema.fields.get(_id); +function onEmbeddedArray(schema, name, typeName, value) { + var field = schema.fields.get(name); if (!field) { field = schema.fields.add({ - _id: _id, + name: name, klass: Field.EmbeddedArray, parent: schema }); @@ -52,23 +52,23 @@ function onEmbeddedArray(schema, _id, type_id, value) { field.lengths.push(value.length); field.trigger('change:lengths'); _.each(value, function(d) { - var type_id = getTypeId(d); - if (type_id === 'Object') { + var typeName = getTypeName(d); + if (typeName === 'Object') { _.each(d, function(val, key) { - onBasicField(field, key, getTypeId(val), val); + onBasicField(field, key, getTypeName(val), val); }); } else { - onBasicField(field, '__basic__', type_id, d); + onBasicField(field, '__basic__', typeName, d); } }); } -function onEmbeddedDocument(schema, _id, type_id, value) { - var field = schema.fields.get(_id); +function onEmbeddedDocument(schema, name, typeName, value) { + var field = schema.fields.get(name); if (!field) { field = schema.fields.add({ - _id: _id, + name: name, klass: Field.EmbeddedDocument, parent: schema }); diff --git a/lib/schema.js b/lib/schema.js index 2ba15c6..730691a 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -1,6 +1,6 @@ var es = require('event-stream'); var _ = require('lodash'); -var State = require('./state'); +var State = require('ampersand-state'); var parser = require('./parser'); var FieldCollection = require('./field-collection'); diff --git a/lib/state.js b/lib/state.js deleted file mode 100644 index 4219d92..0000000 --- a/lib/state.js +++ /dev/null @@ -1,5 +0,0 @@ -var State = require('ampersand-state'); - -module.exports = State.extend({ - idAttribute: '_id' -}); diff --git a/lib/type-collection.js b/lib/type-collection.js index dad3cd8..606d3a4 100644 --- a/lib/type-collection.js +++ b/lib/type-collection.js @@ -1,11 +1,13 @@ -var Collection = require('./collection'); +var Collection = require('ampersand-collection'); +var lodashMixin = require('ampersand-collection-lodash-mixin'); var type = require('./type'); var assert = require('assert'); -module.exports = Collection.extend({ +module.exports = Collection.extend(lodashMixin, { + mainIndex: 'name', model: function(attrs, options) { - var Klass = type[attrs._id]; - assert(Klass, 'No value type for ' + attrs._id); + var Klass = type[attrs.name]; + assert(Klass, 'No value type for ' + attrs.name); return new Klass(attrs, options); }, /** diff --git a/lib/type.js b/lib/type.js index 351dba3..2e72514 100644 --- a/lib/type.js +++ b/lib/type.js @@ -1,10 +1,11 @@ -var State = require('./state'); +var State = require('ampersand-state'); var _ = require('lodash'); var ValueCollection = require('./value-collection'); var Type = State.extend({ + idAttribute: 'name', props: { - _id: { + name: { type: 'string' }, count: { @@ -22,12 +23,6 @@ var Type = State.extend({ }, collections: { values: ValueCollection - }, - serialize: function() { - return this.getAttributes({ - props: true, - derived: true - }, true); } }); @@ -43,7 +38,7 @@ exports.getNameFromValue = function(value) { exports.String = Type.extend({ props: { - _id: { + name: { default: 'String' } } @@ -51,7 +46,7 @@ exports.String = Type.extend({ exports.Number = Type.extend({ props: { - _id: { + name: { default: 'Number' } } @@ -59,7 +54,7 @@ exports.Number = Type.extend({ exports.Long = Type.extend({ props: { - _id: { + name: { default: 'Long' } } @@ -67,7 +62,7 @@ exports.Long = Type.extend({ exports.Null = Type.extend({ props: { - _id: { + name: { default: 'Null' } } @@ -75,7 +70,7 @@ exports.Null = Type.extend({ exports.Timestamp = Type.extend({ props: { - _id: { + name: { default: 'Timestamp' } } @@ -83,7 +78,7 @@ exports.Timestamp = Type.extend({ exports.Boolean = Type.extend({ props: { - _id: { + name: { default: 'Boolean' } } @@ -91,7 +86,7 @@ exports.Boolean = Type.extend({ exports.Date = Type.extend({ props: { - _id: { + name: { default: 'Date' } } @@ -99,7 +94,7 @@ exports.Date = Type.extend({ exports.ObjectID = Type.extend({ props: { - _id: { + name: { default: 'ObjectID' } } @@ -107,7 +102,7 @@ exports.ObjectID = Type.extend({ exports.Undefined = Type.extend({ props: { - _id: { + name: { default: 'Undefined' } } @@ -115,7 +110,7 @@ exports.Undefined = Type.extend({ exports.Binary = Type.extend({ props: { - _id: { + name: { default: 'Binary' } } @@ -123,7 +118,7 @@ exports.Binary = Type.extend({ exports.MaxKey = Type.extend({ props: { - _id: { + name: { default: 'MaxKey' } } @@ -131,7 +126,7 @@ exports.MaxKey = Type.extend({ exports.MinKey = Type.extend({ props: { - _id: { + name: { default: 'MinKey' } } @@ -139,7 +134,7 @@ exports.MinKey = Type.extend({ exports.Object = Type.extend({ props: { - _id: { + name: { type: 'string', default: 'Object' } @@ -148,7 +143,7 @@ exports.Object = Type.extend({ exports.Array = Type.extend({ props: { - _id: { + name: { type: 'string', default: 'Array' } diff --git a/lib/value-collection.js b/lib/value-collection.js index e7cee5d..076d81a 100644 --- a/lib/value-collection.js +++ b/lib/value-collection.js @@ -1,7 +1,9 @@ -var Collection = require('./collection'); +var Collection = require('ampersand-collection'); +var lodashMixin = require('ampersand-collection-lodash-mixin'); var Value = require('./value'); -module.exports = Collection.extend({ +module.exports = Collection.extend(lodashMixin, { + mainIndex: 'id', model: Value, serialize: function() { return this.pluck('value'); diff --git a/lib/value.js b/lib/value.js index e2e3232..943ac5e 100644 --- a/lib/value.js +++ b/lib/value.js @@ -1,16 +1,17 @@ -var State = require('./state'); +var State = require('ampersand-state'); module.exports = State.extend({ + idAttribute: 'id', props: { - _id: { - type: 'any' + id: { + type: 'string' }, value: { type: 'any' } }, initialize: function(attrs) { - this.value = attrs._id; - this._id = this.cid + '-' + attrs._id; + this.value = attrs.value; + this.id = this.cid + '-' + attrs.value; } }); diff --git a/test/basic-embedded-documents.test.js b/test/basic-embedded-documents.test.js index f975d0e..a4a5444 100644 --- a/test/basic-embedded-documents.test.js +++ b/test/basic-embedded-documents.test.js @@ -35,7 +35,7 @@ describe('basic embedded documents', function() { }); it('should detect all fields', function() { - var field_ids = [ + var field_names = [ '_id', 'created_at', 'email', @@ -45,7 +45,7 @@ describe('basic embedded documents', function() { 'stats', 'twitter' ]; - assert.deepEqual(users.fields.pluck('_id'), field_ids); + assert.deepEqual(users.fields.pluck('name'), field_names); }); it('should serialize correctly', function() { assert.doesNotThrow(function() { diff --git a/test/basic.test.js b/test/basic.test.js index ef84ef9..ab8ebda 100644 --- a/test/basic.test.js +++ b/test/basic.test.js @@ -28,7 +28,7 @@ describe('using only basic fields', function() { it('should detect all fields', function() { // assert.equal(users.fields.length, 11); - var field_ids = [ + var field_names = [ '_id', 'android_push_token', 'apple_push_token', @@ -41,7 +41,7 @@ describe('using only basic fields', function() { 'stats_friends', 'twitter_username' ]; - assert.deepEqual(users.fields.pluck('_id'), field_ids); + assert.deepEqual(users.fields.pluck('name'), field_names); }); it('should detect the correct type for each field', function() { diff --git a/test/mixed-type-order.test.js b/test/mixed-type-order.test.js index ed2e906..72a82b2 100644 --- a/test/mixed-type-order.test.js +++ b/test/mixed-type-order.test.js @@ -38,7 +38,7 @@ describe('mixed type order', function() { assert.equal(schema.fields.get('registered').types.length, 3); }); it('should return the order of types as ["String", "Number", "Undefined"]', function(done) { - assert.deepEqual(schema.fields.get('registered').types.pluck('_id'), ['String', 'Number', 'Undefined']); + assert.deepEqual(schema.fields.get('registered').types.pluck('name'), ['String', 'Number', 'Undefined']); done(); }); }); diff --git a/test/values.test.js b/test/values.test.js index de68186..0a26c7a 100644 --- a/test/values.test.js +++ b/test/values.test.js @@ -6,7 +6,7 @@ describe('values', function() { var schema = new Schema(); schema.fields.on('add', function(field) { assert.equal(field.getId(), '_id'); - field.values.on('add', function(value, collection) { + field.values.on('add', function(value) { assert.equal(value.value, 1); done(); });