OpenRefine client in Node.js
JavaScript
Latest commit f003b7e Dec 5, 2015 @pm5 update documentation
Permalink
Failed to load latest commit information.
lib
test
LICENSE
README.md
index.js
package.json

README.md

node-openrefine

Node.js client library for controlling OpenRefine.

TODO / Features

  • [] upload, apply operations, download results, delete project
  • pipe
  • CLI tool

Usage

var openrefine = require('openrefine')

// another server; same usage
var server = openrefine.server('http://localhost:3333')

// projects metadata
openrefine
  .projects()
  .then(project_metadata => ...)

Project metadata format:

{
  "[project_id]": {
    "name": "[project_name]",
    "created": "[project_creation_time]",
    "modified": "[project_modification_time]",
    "customMetadata": {}
  },
  ...[More projects]...
}

Create a project and clean up some data:

var project = openrefine
  .create('data_cleanup_project')     // .create() auto-generates a project name
  .accept('csv')
  .accept({
    separator: ',',
    ignoreLines: 1
  })
  .expose('csv')
  .keep(true)   // keep data after end() or pipe; default is not keeping
  .use([
    {
      "op": "core/column-split",
      "description": "Split column DATE by separator",
      "engineConfig": {
        "facets": [],
        "mode": "row-based"
      },
      "columnName": "DATE",
      "guessCellType": true,
      "removeOriginalColumn": true,
      "mode": "separator",
      "separator": "-",
      "regex": false,
      "maxColumns": 0
    }
  ])
  .use(customCleanupAddress())    // customCleanupAddress() returns an array of operations

project
  .load('input.csv')
  .end(function (data) {
    // ...
  })
  .then(() => project.destroy())

Or use the stream interface:

fs.createStream('input.csv')
  .pipe(project)
  .pipe(fs.createWriteStream('output.csv'))

A project may have some internal states (project metadata such as name and ID, data imported previously, etc.) To open an existing project, use numeric ID of OpenRefine:

server.open(1234567980)

Delete all data in a project:

project.clean()

Destroy a project after use:

project.destroy()

See also