Skip to content

Commit

Permalink
chore: Fix bin/mongodb-schema script (#206)
Browse files Browse the repository at this point in the history
* fix bin/mongodb-schema

* remove mongodb-collection-sample
  • Loading branch information
lerouxb committed Oct 25, 2023
1 parent c15e4b7 commit c7b07ea
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 565 deletions.
72 changes: 32 additions & 40 deletions bin/mongodb-schema
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
/* eslint-disable no-console */
/* eslint-disable @typescript-eslint/no-var-requires */

const { stream: parseSchemaStream, schemaStats } = require('../');
const { SchemaAnalyzer, schemaStats: _schemaStats } = require('../');
const schemaStats = _schemaStats.default;

const { MongoClient } = require('mongodb');
const sample = require('mongodb-collection-sample');
const toNS = require('mongodb-ns');
const yaml = require('js-yaml');
const pkg = require('../package.json');
Expand Down Expand Up @@ -36,12 +36,6 @@ const argv = require('yargs')
describe: 'Print the computed schema to stdout.',
default: true
})
.option('r', {
alias: 'repeat',
type: 'number',
describe: 'Repeat experiment n times.',
default: 1
})
.option('s', {
alias: 'stats',
type: 'boolean',
Expand Down Expand Up @@ -75,9 +69,9 @@ const argv = require('yargs')
.help('h')
.wrap(100)
.example(
'$0 localhost:27017 mongodb.fanclub --number 1000 --repeat 5 --stats ' +
'$0 localhost:27017 mongodb.fanclub --number 1000 --stats ' +
'--no-output', 'analyze 1000 docs from the mongodb.fanclub ' +
'collection, repeat 5 times and only show statistics.'
'collection and only show statistics.'
)
.example(
'$0 localhost:27017 test.foo --format table',
Expand Down Expand Up @@ -129,13 +123,23 @@ function getTable(schema) {
}

const bar = new ProgressBar('analyzing [:bar] :percent :etas ', {
total: argv.number * argv.repeat,
total: argv.number,
width: 60,
complete: '=',
incomplete: ' ',
clear: true
});

function sample(collection, size = 1000) {
return collection.aggregate([{
$sample: {
size
}
}], {
allowDiskUse: true
});
}

const client = new MongoClient(uri);

(async function main() {
Expand All @@ -158,46 +162,30 @@ const client = new MongoClient(uri);
promoteValues: argv.promote
};

let schema;
const schemaOptions = {
storeValues: argv.values,
semanticTypes: argv.semanticTypes
};

const analyzer = new SchemaAnalyzer(schemaOptions);
try {
for (let i = 0; i < argv.repeat; i++) {
await new Promise((resolve, reject) => {
const source = argv.sampling
? sample(db, ns.collection, options)
: db.collection(ns.collection).find(options.query, {
promoteValues: options.promoteValues
}).limit(options.size).stream();

source
.once('data', function() {
ts = new Date();
})
.pipe(parseSchemaStream(schemaOptions))
.on('progress', function() {
bar.tick();
})
.on('data', function(data) {
schema = data;
})
.on('error', function(err) {
reject(err);
})
.on('end', function() {
const duration = new Date() - ts;
resolve(duration);
});
});
const input = argv.sampling
? sample(db.collection(ns.collection), sampleSize)
: db.collection(ns.collection).find(options.query, {
promoteValues: options.promoteValues
}).limit(options.size);

for await (const doc of input) {
bar.tick();
analyzer.analyzeDoc(doc);
}
} catch (err) {
console.error('error:', err.message);
process.exit(1);
}

const schema = analyzer.getResult();

if (argv.output) {
let output = '';
if (argv.format === 'yaml') {
Expand All @@ -209,6 +197,7 @@ const client = new MongoClient(uri);
}
console.log(output);
}

if (argv.stats) {
let branchOutput = '[';
const branchingFactors = schemaStats.branch(schema);
Expand All @@ -218,11 +207,14 @@ const client = new MongoClient(uri);
branchOutput += branchingFactors.join(',') + ']';
}

console.error('execution count: ' + argv.repeat);
console.error('toplevel fields:', schema.fields.length);
console.error('branching factors:', branchOutput);
console.error('schema width: ' + schemaStats.width(schema));
console.error('schema depth: ' + schemaStats.depth(schema));
}

console.dir(analyzer.getSchemaPaths());
console.dir(analyzer.getSimplifiedSchema(), { depth: null });

client.close();
})();
Loading

0 comments on commit c7b07ea

Please sign in to comment.