OpenRefine client in Node.js
JavaScript
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
lib
test
LICENSE
README.md
index.js
package.json

README.md

node-openrefine

Node.js client library for controlling OpenRefine.

TODO / Features

  • [] upload, apply operations, download results, delete project
  • pipe
  • CLI tool

Usage

var openrefine = require('openrefine')

// another server; same usage
var server = openrefine.server('http://localhost:3333')

// projects metadata
openrefine
  .projects()
  .then(project_metadata => ...)

Project metadata format:

{
  "[project_id]": {
    "name": "[project_name]",
    "created": "[project_creation_time]",
    "modified": "[project_modification_time]",
    "customMetadata": {}
  },
  ...[More projects]...
}

Create a project and clean up some data:

var project = openrefine
  .create('data_cleanup_project')     // .create() auto-generates a project name
  .accept('csv')
  .accept({
    separator: ',',
    ignoreLines: 1
  })
  .expose('csv')
  .keep(true)   // keep data after end() or pipe; default is not keeping
  .use([
    {
      "op": "core/column-split",
      "description": "Split column DATE by separator",
      "engineConfig": {
        "facets": [],
        "mode": "row-based"
      },
      "columnName": "DATE",
      "guessCellType": true,
      "removeOriginalColumn": true,
      "mode": "separator",
      "separator": "-",
      "regex": false,
      "maxColumns": 0
    }
  ])
  .use(customCleanupAddress())    // customCleanupAddress() returns an array of operations

project
  .load('input.csv')
  .end(function (data) {
    // ...
  })
  .then(() => project.destroy())

Or use the stream interface:

fs.createStream('input.csv')
  .pipe(project)
  .pipe(fs.createWriteStream('output.csv'))

A project may have some internal states (project metadata such as name and ID, data imported previously, etc.) To open an existing project, use numeric ID of OpenRefine:

server.open(1234567980)

Delete all data in a project:

project.clean()

Destroy a project after use:

project.destroy()

See also