From 98be9c8f22d4d22e46817841f24ed1074d5a71d0 Mon Sep 17 00:00:00 2001 From: Michael Elsdoerfer Date: Sun, 17 Apr 2011 00:48:45 +0200 Subject: [PATCH] Make it possible to feed CvsReader pieces of data from memory. In a way, this is a workaround for the lack of a memory-based stream object in Node. --- README.md | 22 +++++++++++++++++++++- lib/ya-csv.js | 45 +++++++++++++++++++++++++++------------------ test/stream.js | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 test/stream.js diff --git a/README.md b/README.md index f0fa9b0..5253123 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ Current version requires at least Node.js v0.1.99 and it's tested with Node.js v - event based, suitable for processing big CSV streams - configurable separator, quote and escape characters (comma, double-quote and double-quote by default) -- ignores lines starting with configurable comment character (off by default) + - ignores lines starting with configurable comment character (off by default) + - supports memory-only streaming ## More examples @@ -70,3 +71,22 @@ Convert the `/etc/passwd` file to comma separated format, drop commented lines a reader.addListener('data', function(data) { writer.writeRecord(data); }); + +Parsing an upload as the data comes in, using node-formidable: + + upload_form.onPart = function(part) { + if (!part.filename) { upload_form.handlePart(part); return } + + var reader = csv.createCsvFileReader({'comment': '#'}); + reader.addListener('data', function(data) { + saveRecord(data); + }); + + part.on('data', function(buffer) { + // Pipe incoming data into the reader. + reader.parse(buffer); + }); + part.on('end', function() { + reader.end() + } + } diff --git a/lib/ya-csv.js b/lib/ya-csv.js index 995fbff..fe894d3 100644 --- a/lib/ya-csv.js +++ b/lib/ya-csv.js @@ -21,23 +21,11 @@ var CsvReader = csv.CsvReader = function(readStream, options) { commentedLine: false }; - readStream.addListener('data', function(data) { self.parse(data) }); - readStream.addListener('error', function() { self.emit('error') }); - readStream.addListener('end', function() { - var ps = self.parsingStatus; - if (ps.quotedField) { - self.emit('error', new Error('Input stream ended but closing quotes expected')); - } else { - // dump open record - if (ps.openField) { - self._addField(); - } - if (ps.openRecord.length > 0) { - self._addRecord(); - } - self.emit('end'); - } - }); + if (readStream) { + readStream.addListener('data', function(data) { self.parse(data) }); + readStream.addListener('error', function() { self.emit('error') }); + readStream.addListener('end', function() { self.end() }); + } }; sys.inherits(CsvReader, events.EventEmitter); @@ -114,6 +102,22 @@ CsvReader.prototype.parse = function(data) { } }; +CsvReader.prototype.end = function() { + var ps = this.parsingStatus; + if (ps.quotedField) { + this.emit('error', new Error('Input stream ended but closing quotes expected')); + } else { + // dump open record + if (ps.openField) { + this._addField(); + } + if (ps.openRecord.length > 0) { + this._addRecord(); + } + this.emit('end'); + } +} + CsvReader.prototype._isEscapable = function(c) { if ((c === this.escapechar) || (c === this.quotechar)) { return true; @@ -169,8 +173,13 @@ csv.createCsvFileReader = function(path, options) { }; csv.createCsvStreamReader = function(readStream, options) { + if (options === undefined && typeof readStream === 'object') { + options = readStream; + readStream = undefined; + } options = options || {}; - readStream.setEncoding(options.encoding ? options.encoding : 'utf8'); + if (readStream) + readStream.setEncoding(options.encoding ? options.encoding : 'utf8'); return new CsvReader(readStream, options); }; diff --git a/test/stream.js b/test/stream.js new file mode 100644 index 0000000..3537b52 --- /dev/null +++ b/test/stream.js @@ -0,0 +1,45 @@ +var csv = require('../lib/ya-csv'), + sys = require('sys'), + fs = require('fs'); + +sys.debug('start'); + +if (process.argv.length < 3) { + sys.error("Usage: node " + process.argv[1] + " "); + process.exit(1); +} + +var csvIn = csv.createCsvStreamReader({ + 'separator': ',', + 'quote': '"', + 'comment': '#' +}); + +var lines = 0; +var columns = 0; + +csvIn.addListener('end', function() { + sys.debug('end'); + sys.debug(columns + ' columns, ' + lines + ' lines'); +}); + +csvIn.addListener('error', function(e) { + sys.debug('error'); + sys.debug(e); +}); + +csvIn.addListener('data', function(data) { + lines++; + columns += data.length; +}); + +var file = process.argv[2]; +var fileIn = fs.createReadStream(file, {flags: 'r', bufferSize: 10}); +fileIn.setEncoding('utf8'); +fileIn.on('data', function(data) { + sys.debug(data); + csvIn.parse(data); +}); +fileIn.on('end', function(data) { + csvIn.end(); +});