Skip to content

Commit

Permalink
redo api;allow geojson join on id vs prop
Browse files Browse the repository at this point in the history
  • Loading branch information
mhkeller committed Jan 2, 2017
1 parent 4d8e043 commit 9255f13
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 22,786 deletions.
46 changes: 29 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ A simple utility for SQL-like joins with Json or GeoJson data. Also creates join
To install as a Node.js module:

````
npm install joiner
npm install --save joiner
````

Or to install as a command-line utility:
Expand Down Expand Up @@ -40,18 +40,30 @@ report: {
}
````

__.left__
### .left(config)

_.left(leftData, leftDataKey, rightData, rightDataKey, [nestedKeyName])_
Perform a left join on the two array of object json datasets. Optionally, you can pass in a key name under `path` in case the left data's attribute dictionary is nested.

Perform a left join on the two array-of-object json datasets. Optionally, you can pass in a key name in case the left data's attribute dictionary is nested, such as in GeoJson where the attributes are under a `properties` object.
| parameter | type | description |
| :------------|:-------- |:---------------|
| leftData | Array | existing data |
| leftDataKey | String | key to join on |
| rightData | Array | new data |
| rightDataKey | String | key to join on |
| path | [String] | optional, key name of attribute |


__.geoJson__
### .geoJson(config)

_.geoJson(leftData, leftDataKey, rightData, rightDataKey, 'properties')_
Performs a left join on the `properties` object of each feature in a geojson array. By default it will join on the `id` property. You can also join on a value in the `properties` object by setting `leftDataKey` to the desired key name and `path` to the string `'properties'`.

Does the same thing as __.left__ but passes in `properties` as the nested key name.
| parameter | type | description |
| -------------|--------- |----------------|
| leftData | Array | existing data |
| leftDataKey | [String] default='id'| Optional, key to join on |
| rightData | Array | new data |
| rightDataKey | String | key to join on |
| path | [String] | optional, key name of attribute |

## Usage

Expand All @@ -62,22 +74,22 @@ As you can see, it puts a lot of data in memory, so it's probably best to avoid
## Command line interface

````
Usage: joiner
-a FILE_PATH
-k DATASET_A_KEY
-b FILE_PATH
-l DATASET_B_KEY
-m (json|geojson) # defaults to `json`
-n NEST_ID
-o OUT_FILE_PATH
Usage: joiner
-a DATASET_A_PATH
-k DATASET_A_KEY
-b DATASET_B_PATH
-l DATASET_B_KEY
-f (json|geojson) # defaults to `json`
-p NESTED_PATH_ID
-o OUT_FILE_PATH
-d (summary|full) # defaults to `summary`
````

The first four parameters, `-a`, `-k`, `-b` and `-l` are required.
The first four parameters, `-a`, `-k`, `-b` and `-l` are required.

If you specify an output file, it will write the join report to the same directory. For example, `-o path/to/output.csv` will also write `-o path/to/output-report.json`

`-m` defaults to `json`. `-m geojson` acts the same as the `.geoJson` method above.
`-f` defaults to `json`. `-f geojson` acts the same as the `.geoJson` method above.

Supported input and output formats: `json`, `csv`, `csv`, `psv`. Format will be inferred from the file ending on both input and output file paths. For example, `-a path/to/input/file.csv` will read in a csv. `-o path/to/output/file.csv` will write a csv.

Expand Down
151 changes: 55 additions & 96 deletions bin/index.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
#!/usr/bin/env node

var fs = require('fs'),
dsv = require('dsv'),
optimist = require('optimist'),
_ = require('underscore'),
joiner = require('../src/index.js');
var io = require('indian-ocean')
var optimist = require('optimist')
var joiner = require('../src/index.js')

var argv = optimist
.usage('Usage: joiner -a FILE_PATH -k DATASET_A_KEY -b FILE_PATH -l DATASET_B_KEY -m (json|geojson) -n NEST_ID -o OUT_FILE_PATH -d (summary|full)')
.usage('Usage: joiner -a DATASET_A_PATH -k DATASET_A_KEY -b DATASET_B_PATH -l DATASET_B_KEY -m (json|geojson) -n NEST_ID -o OUT_FILE_PATH -d (summary|full)')
.options('h', {
alias: 'help',
describe: 'Display help',
default: false
})
.options('a', {
alias: 'adata',
describe: 'Dataset A',
alias: 'apath',
describe: 'Dataset A path',
default: null
})
.options('k', {
Expand All @@ -24,123 +22,84 @@ var argv = optimist
default: null
})
.options('b', {
alias: 'bdata',
describe: 'Dataset B',
alias: 'bpath',
describe: 'Dataset B path',
default: null
})
.options('l', {
alias: 'bkey',
describe: 'Dataset B key',
default: null
})
.options('m', {
alias: 'mode',
.options('f', {
alias: 'format',
describe: 'json or geojson',
default: 'json'
})
.options('n', {
alias: 'nester',
describe: 'Nested id',
default: false
.options('p', {
alias: 'path',
describe: 'Nested path id',
default: null
})
.options('o', {
alias: 'out',
describe: 'Out file',
default: false
describe: 'Out path',
default: null
})
.options('r', {
alias: 'report',
describe: 'Report format',
default: 'summary'
})
.check(function(argv) {
if ( (!argv['a'] || !argv['adata']) && (!argv['a'] || !argv['adata']) && (!argv['b'] || !argv['bdata']) && (!argv['k'] || !argv['akey']) && (!argv['l'] || !argv['bkey']) ) throw 'What do you want to do?';
.check(function (argv) {
if ((!argv['a'] || !argv['adata']) && (!argv['a'] || !argv['adata']) && (!argv['b'] || !argv['bdata']) && (!argv['k'] || !argv['akey']) && (!argv['l'] || !argv['bkey'])) {
throw 'What do you want to do?' // eslint-disable-line no-throw-literal
}
})
.argv;

if (argv.h || argv.help) return optimist.showHelp();

var adata = argv.a || argv['adata'],
akey = argv.k || argv['akey'],
bdata = argv.b || argv['bdata'],
bkey = argv.l || argv['bkey'],
format = argv.m || argv['mode'],
nester = argv.n || argv['nester'],
out_file = argv.o || argv['out'],
report_desc = argv.r || argv['report'];

// Given a file name, get its extension
function discernFormat(file_name) {
var name_arr = file_name.split('\.')
format_name = name_arr[name_arr.length - 1];
return format_name
}
.argv

function discernFileFormatter(file_name){
var format = discernFormat(file_name);
var formatMap = {
json: function(file){
return JSON.stringify(file)
},
csv: function(file){
return dsv.csv.format(file)
},
tsv: function(file){
return dsv.tsv.format(file)
},
psv: function(file){
return dsv.dsv('|').format(file)
}
}
return formatMap[format]
if (argv.h || argv.help) {
optimist.showHelp()
}

function writeDataSync(file_path, data){
var fileFormatter = discernFileFormatter(file_path);
fs.WriteFileSync(file_path, fileFormatter(data))
}

function writeReportSync(file_path, report){
file_path = JSON.stringify(file_path.replace(discernFormat(file_path), '').replace('.','') + '-report.json', null, 2)
fs.WriteFileSync(file_path, report)
}
var aPath = argv.a || argv['apath']
var aKey = argv.k || argv['akey']
var bPath = argv.b || argv['bpath']
var bKey = argv.l || argv['bkey']
var format = argv.f || argv['format']
var path = argv.p || argv['path']
var outPath = argv.o || argv['out']
var reportDesc = argv.r || argv['report']

var aData = io.readDataSync(aPath)
var bData = io.readDataSync(bPath)

// Given a file name, return teh appropriate date parser
function discernParser(file_name) {
var format = discernFormat(file_name);
var parserMap = {
json: JSON.parse,
csv: dsv.csv.parse,
tsv: dsv.tsv.parse,
psv: dsv.dsv('|').parse
}
return parserMap[format]
var config = {
leftData: aData,
leftDataKey: aKey,
rightData: bData,
rightDataKey: bKey,
path: path
}

// Parse data
var aparser = discernParser(adata),
bparser = discernParser(bdata);
adata = aparser(adata)
bdata = bparser(bdata)

// Join data
var jd;
if (format != 'geojson'){
jd = joiner.left(adata, akey, bdata, bkey, nester)
}else{
jd = joiner.geoJson(adata, akey, bdata, bkey, nester)
if (format !== 'json' && format !== 'geojson') {
throw new Error('Format must be either json or geojson')
}
var jd = joiner[format](config)

if (out_file){
writeDataSync(out_file, jd.data)
writeReportSync(out_file, jd.report)
}else{
if (report_desc == 'summary'){
console.log(jd.report.prose.summary)
}else{
console.log(jd.report.prose.full)
}
if (outPath !== null) {
io.writeDataSync(outPath, jd.data)
io.writeDataSync(stripExtension(outPath) + 'report.json', jd.report)
} else {
if (reportDesc === 'summary') {
console.log(jd.report.prose.summary)
} else {
console.log(jd.report.prose.full)
}
}


function stripExtension (fullPath) {
var ext = io.discernFormat(fullPath)
return fullPath.replace(ext, '')
}

0 comments on commit 9255f13

Please sign in to comment.