From f6baf06c26d3528e176ddc0b41a1cfb58bb47e68 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 16:59:13 -0400 Subject: [PATCH 1/6] fix(export): Pick up nested objects to include in projection --- src/modules/export.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/modules/export.js b/src/modules/export.js index 5eed9aa..2a21177 100644 --- a/src/modules/export.js +++ b/src/modules/export.js @@ -13,6 +13,7 @@ const createProgressStream = require('progress-stream'); import { createLogger } from 'utils/logger'; import { createCSVFormatter, createJSONFormatter } from 'utils/formatters'; +import dotnotation from '../utils/dotnotation'; const debug = createLogger('export'); @@ -346,7 +347,9 @@ export const sampleFields = () => { return onError(findErr); } - const fields = Object.keys(docs[0]).sort().reduce((obj, field) => { + // Use `dotnotation.serialize()` to recurse into documents and + // pick up all possible paths. + const fields = Object.keys(dotnotation.serialize(docs[0])).sort().reduce((obj, field) => { obj[field] = 1; return obj; From 40783eac8fe5512be8a2e062fb9d549b1b34ae1f Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 16:59:47 -0400 Subject: [PATCH 2/6] chore(import): :art: Missed a console.groupEnd() --- src/modules/import.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/import.js b/src/modules/import.js index 539a27f..3d19b6c 100644 --- a/src/modules/import.js +++ b/src/modules/import.js @@ -256,9 +256,7 @@ export const startImport = () => { progress, dest, function(err) { - debugger; console.timeEnd('import:start'); - console.groupEnd(); /** * Refresh data (docs, aggregations) regardless of whether we have a * partial import or full import @@ -311,6 +309,8 @@ export const startImport = () => { transform.length > 0 ) ); + console.groupEnd(); + console.groupEnd(); } ); }; From 751265e50753af1555eff0ed05083a84e0da02f5 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 17:00:10 -0400 Subject: [PATCH 3/6] chore(import): remove console.log from test --- src/modules/import.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/import.spec.js b/src/modules/import.spec.js index 0a810f4..6413a2d 100644 --- a/src/modules/import.spec.js +++ b/src/modules/import.spec.js @@ -123,7 +123,7 @@ describe('import [module]', () => { // source: undefined, // dest: undefined // }; - console.log('subscribe touched', { args: arguments, actions: test.store.getActions()}); + // console.log('subscribe touched', { args: arguments, actions: test.store.getActions()}); const expected = { isOpen: false, progress: 0, From a0939331a59e9709b90777081f46c326e85a82f6 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 17:02:10 -0400 Subject: [PATCH 4/6] fix(import): Make type detection/casting work right. --- src/utils/bson-csv.js | 65 +++++++++++++++++++++++++++++------ src/utils/bson-csv.spec.js | 58 ++++++++++++++++++++++++++----- src/utils/dotnotation.spec.js | 4 +-- 3 files changed, 106 insertions(+), 21 deletions(-) diff --git a/src/utils/bson-csv.js b/src/utils/bson-csv.js index 8ad188e..2f99511 100644 --- a/src/utils/bson-csv.js +++ b/src/utils/bson-csv.js @@ -18,11 +18,16 @@ * 3. etc. */ import bson from 'bson'; +import _ from 'lodash'; -const BOOLEAN_TRUE = ['1', 'true', 'TRUE']; -const BOOLEAN_FALSE = ['0', 'false', 'FALSE', 'null', '', 'NULL']; +import { createLogger } from './logger'; -export default { +const debug = createLogger('apply-import-type-and-projection'); + +const BOOLEAN_TRUE = ['1', 'true', 'TRUE', true]; +const BOOLEAN_FALSE = ['0', 'false', 'FALSE', 'null', '', 'NULL', false]; + +const casters = { String: { fromString: function(s) { return '' + s; @@ -52,25 +57,36 @@ export default { }, Date: { fromString: function(s) { + if (s instanceof Date) { + return s; + } return new Date('' + s); } }, - ObjectId: { + ObjectID: { fromString: function(s) { - if (s instanceof bson.ObjectId) { + if (s instanceof bson.ObjectID) { // EJSON being imported return s; } - return new bson.ObjectId(s); + return new bson.ObjectID(s); } }, Long: { fromString: function(s) { + if (s instanceof bson.Long) { + // EJSON being imported + return s; + } return bson.Long.fromString(s); } }, RegExpr: { fromString: function(s) { + if (s instanceof bson.BSONRegExp) { + // EJSON being imported + return s; + } // TODO: lucas: detect any specified regex options later. // // if (s.startsWith('/')) { @@ -83,21 +99,33 @@ export default { }, Binary: { fromString: function(s) { + if (s instanceof bson.Binary) { + return s; + } return new bson.Binary(s, bson.Binary.SUBTYPE_DEFAULT); } }, UUID: { fromString: function(s) { + if (s instanceof bson.Binary) { + return s; + } return new bson.Binary(s, bson.Binary.SUBTYPE_UUID); } }, MD5: { fromString: function(s) { + if (s instanceof bson.Binary) { + return s; + } return new bson.Binary(s, bson.Binary.SUBTYPE_MD5); } }, Timestamp: { fromString: function(s) { + if (s instanceof bson.Timestamp) { + return s; + } return bson.Timestamp.fromString(s); } }, @@ -117,6 +145,9 @@ export default { } } }; +casters.ObjectId = casters.ObjectID; +casters.BSONRegExp = casters.RegExpr; +export default casters; /** * [`Object.prototype.toString.call(value)`, `string type name`] @@ -136,14 +167,17 @@ const TYPE_FOR_TO_STRING = new Map([ ]); export function detectType(value) { - const l = Object.prototype.toString.call(value); - const t = TYPE_FOR_TO_STRING.get(l); - return t; + if (value && value._bsontype) { + return value._bsontype; + } + const o = Object.prototype.toString.call(value); + return TYPE_FOR_TO_STRING.get(o); } export function getTypeDescriptorForValue(value) { const t = detectType(value); - const _bsontype = t === 'Object' && value._bsontype; + const _bsontype = (t === 'Object' && value._bsontype) || (t === 'BSONRegExp' ? 'BSONRegExp' : '') || (t === 'ObjectID' ? 'ObjectID' : ''); + debug('detected type', {t, _bsontype}); return { type: _bsontype ? _bsontype : t, isBSON: !!_bsontype @@ -178,6 +212,7 @@ export const serialize = function(doc) { * does instead of hex string/EJSON: https://github.com/mongodb/mongo-tools-common/blob/master/json/csv_format.go */ + debug('serialize', {isBSON, type, value}); // BSON values if (isBSON) { if (type === 'BSONRegExp') { @@ -203,6 +238,16 @@ export const serialize = function(doc) { return; } + if (BOOLEAN_TRUE.includes(value)) { + output[newKey] = 'true'; + return; + } + + if (BOOLEAN_FALSE.includes(value)) { + output[newKey] = 'false'; + return; + } + // Embedded documents if ( type === 'Object' && diff --git a/src/utils/bson-csv.spec.js b/src/utils/bson-csv.spec.js index c42f718..1fd15d0 100644 --- a/src/utils/bson-csv.spec.js +++ b/src/utils/bson-csv.spec.js @@ -1,14 +1,17 @@ -import bsonCSV, { serialize } from './bson-csv'; -import { EJSON, ObjectId, Long, BSONRegExp, Double } from 'bson'; +import bsonCSV, { serialize, detectType } from './bson-csv'; +import { EJSON, ObjectId, Long, BSONRegExp, Double, ObjectID } from 'bson'; // TODO: lucas: probably dumb but think about that later. describe('bson-csv', () => { describe('Native', () => { describe('String', () => { - it('should work', () => { + it('should stringify value:', () => { expect(bsonCSV.String.fromString(1)).to.equal('1'); }); + it('should stringify value:', () => { + expect(bsonCSV.String.fromString('1')).to.equal('1'); + }); }); describe('Boolean', () => { it('should deserialize falsy values', () => { @@ -66,6 +69,29 @@ describe('bson-csv', () => { }); }); }); + describe('Date', () => { + it('should detect value: as Date', () => { + expect( + detectType(new Date('2020-03-19T20:02:48.406Z')) + ).to.be.equal('Date'); + }); + it('should not lose percision', () => { + expect(bsonCSV.Date.fromString(new Date('2020-03-19T20:02:48.406Z'))).to.deep.equal( + new Date('2020-03-19T20:02:48.406Z') + ); + }); + it('should serialize as a string', () => { + expect(serialize({ value: new BSONRegExp('^mongodb') })).to.deep.equal({ + value: '/^mongodb/' + }); + + expect( + serialize({ value: new BSONRegExp('^mongodb', 'm') }) + ).to.deep.equal({ + value: '/^mongodb/m' + }); + }); + }); describe('Array', () => { it('should serialize as a string of extended JSON', () => { expect( @@ -112,10 +138,23 @@ describe('bson-csv', () => { value: 'true' }); }); + it('should serialize as normalized string', () => { + expect(serialize({ value: 'FALSE' })).to.deep.equal({ + value: 'false' + }); + expect(serialize({ value: 'TRUE' })).to.deep.equal({ + value: 'true' + }); + }); }); }); describe('bson', () => { describe('ObjectId', () => { + it('should detect value: as ObjectID', () => { + expect( + detectType(new ObjectID('5dd080acc15c0d5ee3ab6ad2')) + ).to.be.equal('ObjectID'); + }); it('should serialize ObjectId as the hex string value', () => { const oid = '5dd080acc15c0d5ee3ab6ad2'; const deserialized = bsonCSV.ObjectId.fromString(oid); @@ -130,17 +169,18 @@ describe('bson-csv', () => { }); }); }); - describe('Double', () => { - it('should not lose percision', () => { - expect(bsonCSV.Double.fromString('79.8911483764648')).to.deep.equal(new Double('79.8911483764648')); - }); - }); describe('BSONRegExp', () => { + it('should detect value:', () => { + expect( + detectType(new BSONRegExp('^mongodb')) + ).to.be.equal('BSONRegExp'); + }); it('should serialize as a string', () => { expect(serialize({ value: new BSONRegExp('^mongodb') })).to.deep.equal({ value: '/^mongodb/' }); - + }); + it('should serialize value: as a String with flags', () => { expect( serialize({ value: new BSONRegExp('^mongodb', 'm') }) ).to.deep.equal({ diff --git a/src/utils/dotnotation.spec.js b/src/utils/dotnotation.spec.js index 455bbcb..643829a 100644 --- a/src/utils/dotnotation.spec.js +++ b/src/utils/dotnotation.spec.js @@ -1,6 +1,6 @@ /* eslint-disable no-var */ import dotnotation from './dotnotation'; -import { ObjectId } from 'bson'; +import { ObjectId, ObjectID } from 'bson'; describe('dotnotation', () => { it('should handle simplest case', () => { @@ -28,7 +28,7 @@ describe('dotnotation', () => { }); it('should handle not recurse into bson types', () => { - var oid = new ObjectId('5df51e94e92c7b5b333d6c4f'); + var oid = new ObjectID('5df51e94e92c7b5b333d6c4f'); var doc = { _id: oid From 8ddd27589525a7b638bab796ebf890c7e50fc648 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 17:02:34 -0400 Subject: [PATCH 5/6] chore(export): cleanup --- src/utils/formatters.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/utils/formatters.js b/src/utils/formatters.js index de2c397..d893d53 100644 --- a/src/utils/formatters.js +++ b/src/utils/formatters.js @@ -16,10 +16,6 @@ import { EOL } from 'os'; * @returns {Stream.Transform} */ export const createJSONFormatter = function({ brackets = true } = {}) { - // if (brackets) { - // return JSONStream.stringify(open, sep, close); - // } - return new Transform({ readableObjectMode: false, writableObjectMode: true, @@ -56,6 +52,8 @@ export const createJSONFormatter = function({ brackets = true } = {}) { export const createCSVFormatter = function() { return csv.format({ headers: true, - transform: row => flatten(row) + transform: row => { + return flatten(row); + } }); }; From df70ff90db112ec761fb0f4655e3dd06138a5d05 Mon Sep 17 00:00:00 2001 From: Lucas Hrabovsky Date: Thu, 19 Mar 2020 17:03:18 -0400 Subject: [PATCH 6/6] fix(export): Add tests to make sure csv's have correct headers. --- src/utils/formatters.spec.js | 55 +++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/utils/formatters.spec.js b/src/utils/formatters.spec.js index a5d3a8f..2dc2834 100644 --- a/src/utils/formatters.spec.js +++ b/src/utils/formatters.spec.js @@ -1,6 +1,7 @@ -import { createJSONFormatter } from './formatters'; +import { createJSONFormatter, createCSVFormatter } from './formatters'; import stream from 'stream'; import bson, { EJSON } from 'bson'; +import { createCSVParser } from './import-parser'; import fs from 'fs'; import path from 'path'; import { promisify } from 'util'; @@ -22,6 +23,7 @@ const FIXTURES = { JSON_SINGLE_DOC: path.join(BASE_FIXTURE_PATH, 'export-single-doc.json'), JSON_MULTI_SMALL_DOCS: path.join(BASE_FIXTURE_PATH, 'export-multi-small-docs.json'), JSONL: path.join(BASE_FIXTURE_PATH, 'export-two-docs.jsonl'), + CSV_FLAT_HEADERS: path.join(BASE_FIXTURE_PATH, 'export-flat-headers.csv'), }; describe('formatters', () => { @@ -80,4 +82,55 @@ describe('formatters', () => { .then(() => rm(FIXTURES.JSONL)); }); }); + describe('csv', () => { + /** + * TODO: dedupe boilerplate between these tests. + */ + it('should flatten nested documents as dotnotation headers', () => { + const docs = [ + {_id: {foo: 'bar'}} + ]; + const source = stream.Readable.from(docs); + const formatter = createCSVFormatter(); + const dest = fs.createWriteStream(FIXTURES.CSV_FLAT_HEADERS); + + return pipeline(source, formatter, dest) + .then(() => readFile(FIXTURES.CSV_FLAT_HEADERS)) + .then((buf) => { + return pipeline(fs.createReadStream(FIXTURES.CSV_FLAT_HEADERS), createCSVParser(), new stream.Writable({ + objectMode: true, + write: function(chunk, encoding, callback) { + expect(chunk).to.deep.equal({ '_id.foo': 'bar' }); + callback(); + } + })); + }) + .then(() => rm(FIXTURES.CSV_FLAT_HEADERS)); + }); + + /** + * TODO: figure out how make `flat` in dotnotation bson aware to fix this test. + */ + it('should not flatten bson props as nested headers', () => { + const docs = [ + {_id: new bson.ObjectId('5e5ea7558d35931a05eafec0')}, + ]; + const source = stream.Readable.from(docs); + const formatter = createCSVFormatter(); + const dest = fs.createWriteStream(FIXTURES.CSV_FLAT_HEADERS); + + return pipeline(source, formatter, dest) + .then(() => readFile(FIXTURES.CSV_FLAT_HEADERS)) + .then((buf) => { + return pipeline(fs.createReadStream(FIXTURES.CSV_FLAT_HEADERS), createCSVParser(), new stream.Writable({ + objectMode: true, + write: function(chunk, encoding, callback) { + expect(chunk).to.deep.equal({ '_id': '5e5ea7558d35931a05eafec0' }); + callback(); + } + })); + }) + .then(() => rm(FIXTURES.CSV_FLAT_HEADERS)); + }); + }); });