Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial commit.

  • Loading branch information...
commit fd6535eeaefaa95e686bf33eb260da0e1be04563 0 parents
@mpareja authored
1  .gitignore
@@ -0,0 +1 @@
+node_modules/
30 grunt.js
@@ -0,0 +1,30 @@
+module.exports = function (grunt) {
+ grunt.loadNpmTasks('grunt-jslint'); // load the task
+
+ grunt.initConfig({
+ watch: {
+ files: '<config:jslint.files>',
+ tasks: 'jslint'
+ },
+
+ jslint: { // configure the task
+ files: [ '*.js' ],
+ exclude: [ 'node_modules/*' ],
+ directives: {
+ devel: true,
+ node: true,
+ vars: true,
+ maxerr: 100,
+ indent: 2,
+ sloppy: true, // don't require use strict
+ nomen: true, // don't give warnings for __dirname
+ undef: true,
+ plusplus: true,
+ minusminus: true
+ }
+ }
+ });
+
+ grunt.registerTask('default', 'watch');
+};
+
15 index.js
@@ -0,0 +1,15 @@
+var scanner = require('./scanner');
+var parser = require('./parser');
+var combiner = require('stream-combiner');
+var map = require('../csv_fixer/node_modules/map-stream');
+
+module.exports = function () {
+ var s = scanner(),
+ p = parser(),
+ c = combiner(s, p);
+
+ // open up a seam for debugging
+ c._scanner = s;
+ c._parser = p;
+ return c;
+};
35 package.json
@@ -0,0 +1,35 @@
+{
+ "name": "decsv",
+ "version": "0.0.0",
+ "description": "Streaming CSV scanner and parser",
+ "main": "index.js",
+ "scripts": {
+ "test": "node test.js"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/mpareja/node-decsv"
+ },
+ "keywords": [
+ "csv",
+ "parser",
+ "scanner",
+ "stream"
+ ],
+ "author": {
+ "name": "Mario Pareja",
+ "email": "pareja.mario@gmail.com",
+ "url": "http://www.mariopareja.com/blog"
+ },
+ "license": "MIT",
+ "dependencies": {
+ "stream-combiner": "0.0.1",
+ "through": "~2.1.0"
+ },
+ "devDependencies": {
+ "chai": "~1.5.0",
+ "grunt": "~0.3.17",
+ "grunt-jslint": "~0.2.5",
+ "jslint": "~0.1.9"
+ }
+}
96 parser.js
@@ -0,0 +1,96 @@
+var through = require('through');
+
+function Parser(tokens) {
+ var i = 0;
+ var t = tokens.length > 0 ? tokens[0] : null;
+ var self = this;
+
+ self.parse = function () {
+ if (t === null) { return null; }
+ switch (t.type) {
+ case 'literal':
+ var value = t.value;
+ consume();
+ if (t === null) { return nodify('unterminatedCell', value); }
+ switch (t.type) {
+ case 'separator':
+ consume();
+ return nodify('cell', value);
+ case 'eol':
+ consume();
+ return nodify('lastcell', value);
+ case 'literal':
+ self.emit('error', new Error('Unexpected literal after a literal. (position: ' + t.position + ')'));
+ return self.parse(); // skip first literal
+ default:
+ throw new Error('Unexpected token: ' + t.type);
+ }
+ case 'separator':
+ consume();
+ return nodify('cell', '');
+ case 'eol':
+ consume();
+ return nodify('lastcell', '');
+ default:
+ throw new Error('Unexpected token: ' + t.type);
+ }
+ };
+
+ self.position = function () { return i; };
+
+ function nodify(type, value) {
+ var node = { type: type };
+ if (value !== undefined) {
+ node.value = value;
+ }
+ return node;
+ }
+
+ function consume() {
+ i++;
+ t = i < tokens.length ? tokens[i] : null;
+ }
+}
+
+module.exports = function () {
+ var buffer = [];
+
+ function main() {
+ return through(ondata, onend);
+ }
+
+ function ondata(newTokens) {
+ var tokens = buffer.concat(newTokens);
+ var parser = new Parser(tokens);
+ var values = [], committed = 0, node;
+
+ while ((node = parser.parse()) && node.type !== 'unterminatedCell') {
+ values.push(node.value);
+ if (node.type === 'lastcell') {
+ this.queue(values);
+ values = [];
+ committed = parser.position();
+ }
+ }
+ buffer = tokens.slice(committed);
+ }
+
+ function onend() {
+ var parser = new Parser(buffer);
+ var values = [], node;
+
+ // accept unterminatedCell and include in row values
+ while (node = parser.parse()) {
+ values.push(node.value);
+ }
+
+ buffer = [];
+ if (values.length) {
+ this.queue(values);
+ }
+ this.emit('end');
+ }
+
+ return main();
+};
+
110 scanner.js
@@ -0,0 +1,110 @@
+var through = require('through');
+module.exports = function () {
+ var buffer = '';
+ var position = 0;
+
+ return through(ondata/*, onend */);
+
+ function ondata(data) {
+ data = buffer + data;
+ var i = 0;
+ var c = data.length > 0 ? data[0] : null;
+ var self = this;
+
+ main();
+
+ function main() {
+ var tokens = [], committed = 0, token;
+ for (token = scan(); token; token = scan()) {
+ tokens.push(token);
+ token.position = position + committed + 1;
+ committed = i;
+ }
+ buffer = data.substring(committed);
+ position += i;
+ if (tokens.length > 0) {
+ self.queue(tokens);
+ }
+ }
+
+ function scan() {
+ if (c === null) { return null; }
+ switch (c) {
+ case '"': return quote();
+ case ',': return separator();
+ case '\n': return eol('\r');
+ case '\r': return eol('\n');
+ default: return unquoted();
+ }
+ }
+
+ function quote() {
+ var literal = '';
+ while (consume()) {
+ switch (c) {
+ case '"':
+ consume();
+ return tokenize('literal', literal);
+ case '\\':
+ consume(); // \
+ if (c === null) {
+ return null;
+ } else if (c === '"') {
+ literal += '"';
+ consume(); // "
+ } else {
+ literal += "\\" + c;
+ consume();
+ }
+ break;
+ default:
+ literal += c;
+ break;
+ }
+ }
+ return null;
+ }
+
+ function eol(optionalChar) {
+ consume(); // \r or \n
+ if (c === null) {
+ return null;
+ }
+ if (c === optionalChar) {
+ consume();
+ }
+ return tokenize('eol');
+ }
+
+ function unquoted() {
+ var literal = c;
+ while (consume() && c !== ',' && c !== '\n' && c !== '\r') {
+ literal += c;
+ }
+ return tokenize('literal', literal);
+ }
+
+ function separator() {
+ consume();
+ return tokenize('separator');
+ }
+
+ function consume() {
+ i++;
+ c = i < data.length ? data[i] : null;
+ return c !== null;
+ }
+
+ function tokenize(type, value) {
+ var token = { type: type };
+ if (value !== undefined) {
+ token.value = value;
+ }
+ return token;
+ }
+ }
+
+ function onend() {
+ // TODO: just raise unexpected EOF if buffer remains
+ }
+};
62 test.js
@@ -0,0 +1,62 @@
+var async = require('async');
+var decsv = require('./');
+var expect = require('chai').expect;
+var queue = [];
+
+test('first,second,third', [['first', 'second', 'third']]);
+test('first,second,third\nfourth,fifth', [['first', 'second', 'third'], ['fourth', 'fifth']]);
+test('first,second,third\r\nfourth,fifth', [['first', 'second', 'third'], ['fourth', 'fifth']]);
+test('first,second,third\rfourth,fifth', [['first', 'second', 'third'], ['fourth', 'fifth']]);
+test('first,second,third\rfourth,fifth\n', [['first', 'second', 'third'], ['fourth', 'fifth']]);
+test('first,second,third\rfourth,fifth\nanother,andother', [['first', 'second', 'third'], ['fourth', 'fifth'], ['another', 'andother']]);
+test('"first",second,third', [['first', 'second', 'third']]);
+test('"first",s"econd,third', [['first', 's"econd', 'third']]);
+test('"first","second",third', [['first', 'second', 'third']]);
+test('first,"seco\nnd",third', [['first', 'seco\nnd', 'third']]);
+test('first,"seco,nd",third', [['first', 'seco,nd', 'third']]);
+
+run();
+
+function test(input, expected) {
+ queue.push({input: input, expected: expected});
+}
+
+function run() {
+ var fns = queue.map(function (test) {
+ return function (cb) {
+ var d = decsv();
+ var found = [], errors = [], scanned = [], parsed = [];
+ d.on('data', function (values) {
+ found.push(values);
+ });
+ d.on('end', function () {
+ try {
+ expect(found).to.deep.equal(test.expected);
+ cb(null);
+ } catch (e) {
+ console.log('scanned:'); console.log(scanned);
+ console.log('parsed:'); console.log(parsed);
+ cb(e);
+ }
+ });
+ d.on('error', function (err) {
+ errors.push(err);
+ });
+ d._scanner.on('data', function (data) { scanned.push(data); });
+ d._parser.on('data', function (data) { parsed.push(data); });
+ d.write(test.input);
+ d.end();
+ };
+ });
+ async.series(fns, function (err) {
+ if (err) {
+ console.log('FAIL!');
+ console.log(err.message);
+ process.exit(1);
+ } else {
+ console.log('Pass.');
+ process.exit(0);
+ }
+ });
+}
+
Please sign in to comment.
Something went wrong with that request. Please try again.