diff --git a/.eslintrc.js b/.eslintrc.js index c3ed7c56..365ef212 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -1,6 +1,7 @@ module.exports = { "parserOptions": { - "ecmaVersion": 5 + "ecmaVersion": 2020, + "sourceType": "module" }, "env": { "browser": true, diff --git a/Gruntfile.js b/Gruntfile.js deleted file mode 100644 index 93b1256f..00000000 --- a/Gruntfile.js +++ /dev/null @@ -1,27 +0,0 @@ -module.exports = function(grunt) { - grunt.initConfig({ - uglify: { - options: { - compress: { - global_defs: { - 'PAPA_BROWSER_CONTEXT': true - }, - dead_code: true - }, - output: { - comments: 'some', - }, - }, - min: { - files: { - 'papaparse.min.js': ['papaparse.js'] - }, - }, - }, - }); - - grunt.loadNpmTasks('grunt-contrib-uglify'); - - grunt.registerTask('build', ['uglify']); - grunt.registerTask('default', ['uglify']); -}; diff --git a/package.json b/package.json index af0afbfe..d835b96d 100644 --- a/package.json +++ b/package.json @@ -34,21 +34,26 @@ "url": "https://twitter.com/mholt6" }, "license": "MIT", - "main": "papaparse.js", - "browser": "papaparse.min.js", + "main": "./papaparse.js", + "browser": "./papaparse.min.js", + "exports": { + "import": "./papaparse.mjs", + "require": "./papaparse.js" + }, "devDependencies": { "chai": "^4.2.0", - "connect": "^3.3.3", - "eslint": "^4.19.1", - "grunt": "^1.0.2", - "grunt-contrib-uglify": "^3.3.0", - "mocha": "^5.2.0", - "mocha-headless-chrome": "^2.0.1", + "connect": "^3.7.0", + "eslint": "^7.28.0", + "mocha": "^9.0.0", + "mocha-headless-chrome": "^3.1.0", "open": "7.0.0", - "serve-static": "^1.7.1" + "rollup": "^2.51.1", + "rollup-plugin-terser": "^7.0.2", + "serve-static": "^1.14.1" }, "scripts": { - "lint": "eslint --no-ignore papaparse.js Gruntfile.js .eslintrc.js 'tests/**/*.js'", + "build": "rollup -c rollup.config.mjs", + "lint": "eslint --no-ignore papaparse.mjs .eslintrc.js 'tests/**/*.js'", "test-browser": "node tests/test.js", "test-mocha-headless-chrome": "node tests/test.js --mocha-headless-chrome", "test-node": "mocha tests/node-tests.js tests/test-cases.js", diff --git a/papaparse.js b/papaparse.js old mode 100755 new mode 100644 index 090c6634..e7ee20c1 --- a/papaparse.js +++ b/papaparse.js @@ -1,36 +1,15 @@ -/* @license -Papa Parse -v5.3.1 -https://github.com/mholt/PapaParse -License: MIT -*/ - -(function(root, factory) -{ - /* globals define */ - if (typeof define === 'function' && define.amd) - { - // AMD. Register as an anonymous module. - define([], factory); - } - else if (typeof module === 'object' && typeof exports !== 'undefined') - { - // Node. Does not work with strict CommonJS, but - // only CommonJS-like environments that support module.exports, - // like Node. - module.exports = factory(); - } - else - { - // Browser globals (root is window) - root.Papa = factory(); - } - // in strict mode we cannot access arguments.callee, so we need a named reference to - // stringify the factory method for the blob worker - // eslint-disable-next-line func-name -}(this, function moduleFactory() -{ - 'use strict'; +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : + typeof define === 'function' && define.amd ? define(['exports'], factory) : + (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.Papa = {})); +}(this, (function (exports) { 'use strict'; + + /* @license + Papa Parse + v5.3.1 + https://github.com/mholt/PapaParse + License: MIT + */ var global = (function() { // alternative method, similar to `Function('return this')()` @@ -45,44 +24,45 @@ License: MIT return {}; })(); - - function getWorkerBlob() { - var URL = global.URL || global.webkitURL || null; - var code = moduleFactory.toString(); - return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'}))); - } - - var IS_WORKER = !global.document && !!global.postMessage, - IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol); var workers = {}, workerIdCounter = 0; + var PAPA_WORKER_NAME = 'papa-worker'; + var WORKER_ID; + var SCRIPT_URL = global && global.document && global.document.currentScript && global.document.currentScript.src; + var IS_WORKER = !global.document && !!global.postMessage, + IS_PAPA_WORKER = IS_WORKER && global.name === PAPA_WORKER_NAME; - var Papa = {}; - - Papa.parse = CsvToJson; - Papa.unparse = JsonToCsv; + var parse = CsvToJson; + var unparse = JsonToCsv; - Papa.RECORD_SEP = String.fromCharCode(30); - Papa.UNIT_SEP = String.fromCharCode(31); - Papa.BYTE_ORDER_MARK = '\ufeff'; - Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK]; - Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker; - Papa.NODE_STREAM_INPUT = 1; + var RECORD_SEP = String.fromCharCode(30); + var UNIT_SEP = String.fromCharCode(31); + var BYTE_ORDER_MARK = '\ufeff'; + var BAD_DELIMITERS = ['\r', '\n', '"', BYTE_ORDER_MARK]; + var WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker && SCRIPT_URL; + var NODE_STREAM_INPUT = 1; // Configurable chunk sizes for local and remote files, respectively - Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB - Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB - Papa.DefaultDelimiter = ','; // Used if not specified and detection fails + var LocalChunkSize = 1024 * 1024 * 10; // 10 MB + var RemoteChunkSize = 1024 * 1024 * 5; // 5 MB + var DefaultDelimiter = ','; // Used if not specified and detection fails // Exposed for testing and development only - Papa.Parser = Parser; - Papa.ParserHandle = ParserHandle; - Papa.NetworkStreamer = NetworkStreamer; - Papa.FileStreamer = FileStreamer; - Papa.StringStreamer = StringStreamer; - Papa.ReadableStreamStreamer = ReadableStreamStreamer; - if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { - Papa.DuplexStreamStreamer = DuplexStreamStreamer; - } + /** @private */ + var Parser = _Parser; + /** @private */ + var ParserHandle = _ParserHandle; + /** @private */ + var NetworkStreamer = _NetworkStreamer; + /** @private */ + var FileStreamer = _FileStreamer; + /** @private */ + var StringStreamer = _StringStreamer; + /** @private */ + var ReadableStreamStreamer = _ReadableStreamStreamer; + /** @private */ + var DuplexStreamStreamer = typeof PAPA_BROWSER_CONTEXT === 'undefined' + ? _DuplexStreamStreamer + : undefined; if (global.jQuery) { @@ -161,7 +141,7 @@ License: MIT fileComplete(); }; - Papa.parse(f.file, f.instanceConfig); + parse(f.file, f.instanceConfig); } function error(name, file, elem, reason) @@ -200,7 +180,7 @@ License: MIT _config.transform = isFunction(_config.transform) ? _config.transform : false; - if (_config.worker && Papa.WORKERS_SUPPORTED) + if (_config.worker && WORKERS_SUPPORTED) { var w = newWorker(); @@ -225,26 +205,26 @@ License: MIT } var streamer = null; - if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') + if (_input === NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') { // create a node Duplex stream for use // with .pipe - streamer = new DuplexStreamStreamer(_config); + streamer = new _DuplexStreamStreamer(_config); return streamer.getStream(); } else if (typeof _input === 'string') { if (_config.download) - streamer = new NetworkStreamer(_config); + streamer = new _NetworkStreamer(_config); else - streamer = new StringStreamer(_config); + streamer = new _StringStreamer(_config); } else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) { - streamer = new ReadableStreamStreamer(_config); + streamer = new _ReadableStreamStreamer(_config); } else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106) - streamer = new FileStreamer(_config); + streamer = new _FileStreamer(_config); return streamer.stream(_input); } @@ -333,7 +313,7 @@ License: MIT return; if (typeof _config.delimiter === 'string' - && !Papa.BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length) + && !BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length) { _delimiter = _config.delimiter; } @@ -453,7 +433,7 @@ License: MIT var needsQuotes = (typeof _quotes === 'boolean' && _quotes) || (typeof _quotes === 'function' && _quotes(str, col)) || (Array.isArray(_quotes) && _quotes[col]) - || hasAny(escapedQuoteStr, Papa.BAD_DELIMITERS) + || hasAny(escapedQuoteStr, BAD_DELIMITERS) || escapedQuoteStr.indexOf(_delimiter) > -1 || escapedQuoteStr.charAt(0) === ' ' || escapedQuoteStr.charAt(escapedQuoteStr.length - 1) === ' '; @@ -531,7 +511,7 @@ License: MIT { global.postMessage({ results: results, - workerId: Papa.WORKER_ID, + workerId: WORKER_ID, finished: finishedIncludingPreview }); } @@ -570,7 +550,7 @@ License: MIT else if (IS_PAPA_WORKER && this._config.error) { global.postMessage({ - workerId: Papa.WORKER_ID, + workerId: WORKER_ID, error: error, finished: false }); @@ -584,18 +564,18 @@ License: MIT configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings! if (!config.step && !config.chunk) configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196 - this._handle = new ParserHandle(configCopy); + this._handle = new _ParserHandle(configCopy); this._handle.streamer = this; this._config = configCopy; // persist the copy to the caller } } - function NetworkStreamer(config) + function _NetworkStreamer(config) { config = config || {}; if (!config.chunkSize) - config.chunkSize = Papa.RemoteChunkSize; + config.chunkSize = RemoteChunkSize; ChunkStreamer.call(this, config); var xhr; @@ -704,15 +684,15 @@ License: MIT return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1)); } } - NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype); - NetworkStreamer.prototype.constructor = NetworkStreamer; + _NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype); + _NetworkStreamer.prototype.constructor = _NetworkStreamer; - function FileStreamer(config) + function _FileStreamer(config) { config = config || {}; if (!config.chunkSize) - config.chunkSize = Papa.LocalChunkSize; + config.chunkSize = LocalChunkSize; ChunkStreamer.call(this, config); var reader, slice; @@ -771,11 +751,11 @@ License: MIT }; } - FileStreamer.prototype = Object.create(ChunkStreamer.prototype); - FileStreamer.prototype.constructor = FileStreamer; + _FileStreamer.prototype = Object.create(ChunkStreamer.prototype); + _FileStreamer.prototype.constructor = _FileStreamer; - function StringStreamer(config) + function _StringStreamer(config) { config = config || {}; ChunkStreamer.call(this, config); @@ -802,11 +782,11 @@ License: MIT return this.parseChunk(chunk); }; } - StringStreamer.prototype = Object.create(StringStreamer.prototype); - StringStreamer.prototype.constructor = StringStreamer; + _StringStreamer.prototype = Object.create(_StringStreamer.prototype); + _StringStreamer.prototype.constructor = _StringStreamer; - function ReadableStreamStreamer(config) + function _ReadableStreamStreamer(config) { config = config || {}; @@ -896,11 +876,11 @@ License: MIT this._input.removeListener('error', this._streamError); }, this); } - ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); - ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer; + _ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); + _ReadableStreamStreamer.prototype.constructor = _ReadableStreamStreamer; - function DuplexStreamStreamer(_config) { + function _DuplexStreamStreamer(_config) { var Duplex = require('stream').Duplex; var config = copy(_config); var parseOnWrite = true; @@ -996,13 +976,13 @@ License: MIT stream.once('finish', bindFunction(this._onWriteComplete, this)); } if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { - DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); - DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer; + _DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); + _DuplexStreamStreamer.prototype.constructor = _DuplexStreamStreamer; } // Use one ParserHandle per entire CSV file or string - function ParserHandle(_config) + function _ParserHandle(_config) { // One goal is to minimize the use of regular expressions... var MAX_FLOAT = Math.pow(2, 53); @@ -1072,7 +1052,7 @@ License: MIT else { _delimiterError = true; // add error after parsing (otherwise it would be overwritten) - _config.delimiter = Papa.DefaultDelimiter; + _config.delimiter = DefaultDelimiter; } _results.meta.delimiter = _config.delimiter; } @@ -1087,7 +1067,7 @@ License: MIT parserConfig.preview++; // to compensate for header row _input = input; - _parser = new Parser(parserConfig); + _parser = new _Parser(parserConfig); _results = _parser.parse(_input, baseIndex, ignoreLastRow); processResults(); return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); @@ -1153,7 +1133,7 @@ License: MIT { if (_results && _delimiterError) { - addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + Papa.DefaultDelimiter + '\''); + addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + DefaultDelimiter + '\''); _delimiterError = false; } @@ -1290,14 +1270,14 @@ License: MIT function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) { var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount; - delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]; + delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', RECORD_SEP, UNIT_SEP]; for (var i = 0; i < delimitersToGuess.length; i++) { var delim = delimitersToGuess[i]; var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; fieldCountPrevRow = undefined; - var preview = new Parser({ + var preview = new _Parser({ comments: comments, delimiter: delim, newline: newline, @@ -1388,7 +1368,7 @@ License: MIT } /** The core parser implements speedy and correct CSV parsing */ - function Parser(config) + function _Parser(config) { // Unpack the config object config = config || {}; @@ -1412,7 +1392,7 @@ License: MIT // Delimiter must be valid if (typeof delim !== 'string' - || Papa.BAD_DELIMITERS.indexOf(delim) > -1) + || BAD_DELIMITERS.indexOf(delim) > -1) delim = ','; // Comment character must be valid @@ -1421,7 +1401,7 @@ License: MIT else if (comments === true) comments = '#'; else if (typeof comments !== 'string' - || Papa.BAD_DELIMITERS.indexOf(comments) > -1) + || BAD_DELIMITERS.indexOf(comments) > -1) comments = false; // Newline must be valid: \r, \n, or \r\n @@ -1665,9 +1645,9 @@ License: MIT } /** - * checks if there are extra spaces after closing quote and given index without any text - * if Yes, returns the number of spaces - */ + * checks if there are extra spaces after closing quote and given index without any text + * if Yes, returns the number of spaces + */ function extraSpaces(index) { var spaceLength = 0; if (index !== -1) { @@ -1752,11 +1732,11 @@ License: MIT function newWorker() { - if (!Papa.WORKERS_SUPPORTED) + if (!WORKERS_SUPPORTED) return false; - var workerUrl = getWorkerBlob(); - var w = new global.Worker(workerUrl); + var workerUrl = SCRIPT_URL; + var w = new global.Worker(workerUrl, { name: PAPA_WORKER_NAME, type: true ? undefined : 'module' }); w.onmessage = mainThreadReceivedMessage; w.id = workerIdCounter++; workers[w.id] = w; @@ -1827,23 +1807,23 @@ License: MIT { var msg = e.data; - if (typeof Papa.WORKER_ID === 'undefined' && msg) - Papa.WORKER_ID = msg.workerId; + if (typeof WORKER_ID === 'undefined' && msg) + WORKER_ID = msg.workerId; if (typeof msg.input === 'string') { global.postMessage({ - workerId: Papa.WORKER_ID, - results: Papa.parse(msg.input, msg.config), + workerId: WORKER_ID, + results: parse(msg.input, msg.config), finished: true }); } else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106) { - var results = Papa.parse(msg.input, msg.config); + var results = parse(msg.input, msg.config); if (results) global.postMessage({ - workerId: Papa.WORKER_ID, + workerId: WORKER_ID, results: results, finished: true }); @@ -1871,5 +1851,25 @@ License: MIT return typeof func === 'function'; } - return Papa; -})); + exports.BAD_DELIMITERS = BAD_DELIMITERS; + exports.BYTE_ORDER_MARK = BYTE_ORDER_MARK; + exports.DefaultDelimiter = DefaultDelimiter; + exports.DuplexStreamStreamer = DuplexStreamStreamer; + exports.FileStreamer = FileStreamer; + exports.LocalChunkSize = LocalChunkSize; + exports.NODE_STREAM_INPUT = NODE_STREAM_INPUT; + exports.NetworkStreamer = NetworkStreamer; + exports.Parser = Parser; + exports.ParserHandle = ParserHandle; + exports.RECORD_SEP = RECORD_SEP; + exports.ReadableStreamStreamer = ReadableStreamStreamer; + exports.RemoteChunkSize = RemoteChunkSize; + exports.StringStreamer = StringStreamer; + exports.UNIT_SEP = UNIT_SEP; + exports.WORKERS_SUPPORTED = WORKERS_SUPPORTED; + exports.parse = parse; + exports.unparse = unparse; + + Object.defineProperty(exports, '__esModule', { value: true }); + +}))); diff --git a/papaparse.min.js b/papaparse.min.js index 96aced42..d752b56c 100644 --- a/papaparse.min.js +++ b/papaparse.min.js @@ -1,7 +1,7 @@ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).Papa={})}(this,(function(e){"use strict"; /* @license -Papa Parse -v5.3.1 -https://github.com/mholt/PapaParse -License: MIT -*/ -!function(e,t){"function"==typeof define&&define.amd?define([],t):"object"==typeof module&&"undefined"!=typeof exports?module.exports=t():e.Papa=t()}(this,function s(){"use strict";var f="undefined"!=typeof self?self:"undefined"!=typeof window?window:void 0!==f?f:{};var n=!f.document&&!!f.postMessage,o=n&&/blob:/i.test((f.location||{}).protocol),a={},h=0,b={parse:function(e,t){var i=(t=t||{}).dynamicTyping||!1;M(i)&&(t.dynamicTypingFunction=i,i={});if(t.dynamicTyping=i,t.transform=!!M(t.transform)&&t.transform,t.worker&&b.WORKERS_SUPPORTED){var r=function(){if(!b.WORKERS_SUPPORTED)return!1;var e=(i=f.URL||f.webkitURL||null,r=s.toString(),b.BLOB_URL||(b.BLOB_URL=i.createObjectURL(new Blob(["(",r,")();"],{type:"text/javascript"})))),t=new f.Worker(e);var i,r;return t.onmessage=_,t.id=h++,a[t.id]=t}();return r.userStep=t.step,r.userChunk=t.chunk,r.userComplete=t.complete,r.userError=t.error,t.step=M(t.step),t.chunk=M(t.chunk),t.complete=M(t.complete),t.error=M(t.error),delete t.worker,void r.postMessage({input:e,config:t,workerId:r.id})}var n=null;b.NODE_STREAM_INPUT,"string"==typeof e?n=t.download?new l(t):new p(t):!0===e.readable&&M(e.read)&&M(e.on)?n=new g(t):(f.File&&e instanceof File||e instanceof Object)&&(n=new c(t));return n.stream(e)},unparse:function(e,t){var n=!1,_=!0,m=",",y="\r\n",s='"',a=s+s,i=!1,r=null,o=!1;!function(){if("object"!=typeof t)return;"string"!=typeof t.delimiter||b.BAD_DELIMITERS.filter(function(e){return-1!==t.delimiter.indexOf(e)}).length||(m=t.delimiter);("boolean"==typeof t.quotes||"function"==typeof t.quotes||Array.isArray(t.quotes))&&(n=t.quotes);"boolean"!=typeof t.skipEmptyLines&&"string"!=typeof t.skipEmptyLines||(i=t.skipEmptyLines);"string"==typeof t.newline&&(y=t.newline);"string"==typeof t.quoteChar&&(s=t.quoteChar);"boolean"==typeof t.header&&(_=t.header);if(Array.isArray(t.columns)){if(0===t.columns.length)throw new Error("Option columns is empty");r=t.columns}void 0!==t.escapeChar&&(a=t.escapeChar+s);"boolean"==typeof t.escapeFormulae&&(o=t.escapeFormulae)}();var h=new RegExp(j(s),"g");"string"==typeof e&&(e=JSON.parse(e));if(Array.isArray(e)){if(!e.length||Array.isArray(e[0]))return u(null,e,i);if("object"==typeof e[0])return u(r||Object.keys(e[0]),e,i)}else if("object"==typeof e)return"string"==typeof e.data&&(e.data=JSON.parse(e.data)),Array.isArray(e.data)&&(e.fields||(e.fields=e.meta&&e.meta.fields),e.fields||(e.fields=Array.isArray(e.data[0])?e.fields:"object"==typeof e.data[0]?Object.keys(e.data[0]):[]),Array.isArray(e.data[0])||"object"==typeof e.data[0]||(e.data=[e.data])),u(e.fields||[],e.data||[],i);throw new Error("Unable to serialize unrecognized input");function u(e,t,i){var r="";"string"==typeof e&&(e=JSON.parse(e)),"string"==typeof t&&(t=JSON.parse(t));var n=Array.isArray(e)&&0=this._config.preview;if(o)f.postMessage({results:n,workerId:b.WORKER_ID,finished:a});else if(M(this._config.chunk)&&!t){if(this._config.chunk(n,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);n=void 0,this._completeResults=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(n.data),this._completeResults.errors=this._completeResults.errors.concat(n.errors),this._completeResults.meta=n.meta),this._completed||!a||!M(this._config.complete)||n&&n.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),a||n&&n.meta.paused||this._nextChunk(),n}this._halted=!0},this._sendError=function(e){M(this._config.error)?this._config.error(e):o&&this._config.error&&f.postMessage({workerId:b.WORKER_ID,error:e,finished:!1})}}function l(e){var r;(e=e||{}).chunkSize||(e.chunkSize=b.RemoteChunkSize),u.call(this,e),this._nextChunk=n?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(r=new XMLHttpRequest,this._config.withCredentials&&(r.withCredentials=this._config.withCredentials),n||(r.onload=v(this._chunkLoaded,this),r.onerror=v(this._chunkError,this)),r.open(this._config.downloadRequestBody?"POST":"GET",this._input,!n),this._config.downloadRequestHeaders){var e=this._config.downloadRequestHeaders;for(var t in e)r.setRequestHeader(t,e[t])}if(this._config.chunkSize){var i=this._start+this._config.chunkSize-1;r.setRequestHeader("Range","bytes="+this._start+"-"+i)}try{r.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}n&&0===r.status&&this._chunkError()}},this._chunkLoaded=function(){4===r.readyState&&(r.status<200||400<=r.status?this._chunkError():(this._start+=this._config.chunkSize?this._config.chunkSize:r.responseText.length,this._finished=!this._config.chunkSize||this._start>=function(e){var t=e.getResponseHeader("Content-Range");if(null===t)return-1;return parseInt(t.substring(t.lastIndexOf("/")+1))}(r),this.parseChunk(r.responseText)))},this._chunkError=function(e){var t=r.statusText||e;this._sendError(new Error(t))}}function c(e){var r,n;(e=e||{}).chunkSize||(e.chunkSize=b.LocalChunkSize),u.call(this,e);var s="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,n=e.slice||e.webkitSlice||e.mozSlice,s?((r=new FileReader).onload=v(this._chunkLoaded,this),r.onerror=v(this._chunkError,this)):r=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(r.error)}}function p(e){var i;u.call(this,e=e||{}),this.stream=function(e){return i=e,this._nextChunk()},this._nextChunk=function(){if(!this._finished){var e,t=this._config.chunkSize;return t?(e=i.substring(0,t),i=i.substring(t)):(e=i,i=""),this._finished=!i,this.parseChunk(e)}}}function g(e){u.call(this,e=e||{});var t=[],i=!0,r=!1;this.pause=function(){u.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){u.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){r&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):i=!0},this._streamData=v(function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),i&&(i=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}},this),this._streamError=v(function(e){this._streamCleanUp(),this._sendError(e)},this),this._streamEnd=v(function(){this._streamCleanUp(),r=!0,this._streamData("")},this),this._streamCleanUp=v(function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)},this)}function i(m){var a,o,h,r=Math.pow(2,53),n=-r,s=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/,u=/^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))$/,t=this,i=0,f=0,d=!1,e=!1,l=[],c={data:[],errors:[],meta:{}};if(M(m.step)){var p=m.step;m.step=function(e){if(c=e,_())g();else{if(g(),0===c.data.length)return;i+=e.data.length,m.preview&&i>m.preview?o.abort():(c.data=c.data[0],p(c,t))}}}function y(e){return"greedy"===m.skipEmptyLines?""===e.join("").trim():1===e.length&&0===e[0].length}function g(){if(c&&h&&(k("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to '"+b.DefaultDelimiter+"'"),h=!1),m.skipEmptyLines)for(var e=0;e=l.length?"__parsed_extra":l[i]),m.transform&&(s=m.transform(s,n)),s=v(n,s),"__parsed_extra"===n?(r[n]=r[n]||[],r[n].push(s)):r[n]=s}return m.header&&(i>l.length?k("FieldMismatch","TooManyFields","Too many fields: expected "+l.length+" fields but parsed "+i,f+t):i=r.length/2?"\r\n":"\r"}(e,r)),h=!1,m.delimiter)M(m.delimiter)&&(m.delimiter=m.delimiter(e),c.meta.delimiter=m.delimiter);else{var n=function(e,t,i,r,n){var s,a,o,h;n=n||[",","\t","|",";",b.RECORD_SEP,b.UNIT_SEP];for(var u=0;u=D)return C(!0)}else for(m=F,F++;;){if(-1===(m=r.indexOf(S,m+1)))return i||u.push({type:"Quotes",code:"MissingQuotes",message:"Quoted field unterminated",row:h.length,index:F}),E();if(m===n-1)return E(r.substring(F,m).replace(_,S));if(S!==L||r[m+1]!==L){if(S===L||0===m||r[m-1]!==L){-1!==p&&p=D)return C(!0);break}u.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:h.length,index:F}),m++}}else m++}return E();function k(e){h.push(e),d=F}function b(e){var t=0;if(-1!==e){var i=r.substring(m+1,e);i&&""===i.trim()&&(t=i.length)}return t}function E(e){return i||(void 0===e&&(e=r.substring(F)),f.push(e),F=n,k(f),o&&R()),C()}function w(e){F=e,k(f),f=[],g=r.indexOf(x,F)}function C(e){return{data:h,errors:u,meta:{delimiter:O,linebreak:x,aborted:z,truncated:!!e,cursor:d+(t||0)}}}function R(){T(C()),h=[],u=[]}},this.abort=function(){z=!0},this.getCharIndex=function(){return F}}function _(e){var t=e.data,i=a[t.workerId],r=!1;if(t.error)i.userError(t.error,t.file);else if(t.results&&t.results.data){var n={abort:function(){r=!0,m(t.workerId,{data:[],errors:[],meta:{aborted:!0}})},pause:y,resume:y};if(M(i.userStep)){for(var s=0;s0,h=!Array.isArray(t[0]);if(o&&r){for(var u=0;u0&&(a+=n),a+=p(e[u],u);t.length>0&&(a+=s)}for(var f=0;f0&&!c&&(a+=n);var v=o&&h?e[y]:y;a+=p(t[f][v],y)}f0&&!c)&&(a+=s)}}return a}function p(e,t){if(null==e)return"";if(e.constructor===Date)return JSON.stringify(e).slice(1,25);!0===f&&"string"==typeof e&&null!==e.match(/^[=+\-@].*$/)&&(e="'"+e);var r=e.toString().replace(d,o);return"boolean"==typeof i&&i||"function"==typeof i&&i(e,t)||Array.isArray(i)&&i[t]||function(e,t){for(var i=0;i-1)return!0;return!1}(r,c)||r.indexOf(n)>-1||" "===r.charAt(0)||" "===r.charAt(r.length-1)?a+r+a:r}},d=String.fromCharCode(30),l=String.fromCharCode(31),c=["\r","\n",'"',"\ufeff"],p=!o&&!!i.Worker&&a,g=10485760,m=5242880,_=j,y=F,v=x,k=O,b=R,C=T,w="undefined"==typeof PAPA_BROWSER_CONTEXT?A:void 0;if(i.jQuery){var E=i.jQuery;E.fn.parse=function(e){var t=e.config||{},r=[];return this.each((function(e){if(!("INPUT"===E(this).prop("tagName").toUpperCase()&&"file"===E(this).attr("type").toLowerCase()&&i.FileReader)||!this.files||0===this.files.length)return!0;for(var n=0;n=this._config.preview;if(h)i.postMessage({results:a,workerId:t,finished:u});else if(q(this._config.chunk)&&!r){if(this._config.chunk(a,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);a=void 0,this._completeResults=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(a.data),this._completeResults.errors=this._completeResults.errors.concat(a.errors),this._completeResults.meta=a.meta),this._completed||!u||!q(this._config.complete)||a&&a.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),u||a&&a.meta.paused||this._nextChunk(),a}this._halted=!0},this._sendError=function(e){q(this._config.error)?this._config.error(e):h&&this._config.error&&i.postMessage({workerId:t,error:e,finished:!1})}}function x(e){var t;(e=e||{}).chunkSize||(e.chunkSize=m),S.call(this,e),this._nextChunk=o?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(t=new XMLHttpRequest,this._config.withCredentials&&(t.withCredentials=this._config.withCredentials),o||(t.onload=P(this._chunkLoaded,this),t.onerror=P(this._chunkError,this)),t.open(this._config.downloadRequestBody?"POST":"GET",this._input,!o),this._config.downloadRequestHeaders){var e=this._config.downloadRequestHeaders;for(var i in e)t.setRequestHeader(i,e[i])}if(this._config.chunkSize){var r=this._start+this._config.chunkSize-1;t.setRequestHeader("Range","bytes="+this._start+"-"+r)}try{t.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}o&&0===t.status&&this._chunkError()}},this._chunkLoaded=function(){4===t.readyState&&(t.status<200||t.status>=400?this._chunkError():(this._start+=this._config.chunkSize?this._config.chunkSize:t.responseText.length,this._finished=!this._config.chunkSize||this._start>=function(e){var t=e.getResponseHeader("Content-Range");if(null===t)return-1;return parseInt(t.substring(t.lastIndexOf("/")+1))}(t),this.parseChunk(t.responseText)))},this._chunkError=function(e){var i=t.statusText||e;this._sendError(new Error(i))}}function O(e){var t,i;(e=e||{}).chunkSize||(e.chunkSize=g),S.call(this,e);var r="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,i=e.slice||e.webkitSlice||e.mozSlice,r?((t=new FileReader).onload=P(this._chunkLoaded,this),t.onerror=P(this._chunkError,this)):t=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(t.error)}}function R(e){var t;e=e||{},S.call(this,e),this.stream=function(e){return t=e,this._nextChunk()},this._nextChunk=function(){if(!this._finished){var e,i=this._config.chunkSize;return i?(e=t.substring(0,i),t=t.substring(i)):(e=t,t=""),this._finished=!t,this.parseChunk(e)}}}function T(e){e=e||{},S.call(this,e);var t=[],i=!0,r=!1;this.pause=function(){S.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){S.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){r&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):i=!0},this._streamData=P((function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),i&&(i=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}}),this),this._streamError=P((function(e){this._streamCleanUp(),this._sendError(e)}),this),this._streamEnd=P((function(){this._streamCleanUp(),r=!0,this._streamData("")}),this),this._streamCleanUp=P((function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)}),this)}function A(e){var t=require("stream").Duplex,i=M(e),r=!0,n=!1,s=[],a=null;this._onCsvData=function(e){var t=e.data;a.push(t)||this._handle.paused()||this._handle.pause()},this._onCsvComplete=function(){a.push(null)},i.step=P(this._onCsvData,this),i.complete=P(this._onCsvComplete,this),S.call(this,i),this._nextChunk=function(){n&&1===s.length&&(this._finished=!0),s.length?s.shift()():r=!0},this._addToParseQueue=function(e,t){s.push(P((function(){if(this.parseChunk("string"==typeof e?e:e.toString(i.encoding)),q(t))return t()}),this)),r&&(r=!1,this._nextChunk())},this._onRead=function(){this._handle.paused()&&this._handle.resume()},this._onWrite=function(e,t,i){this._addToParseQueue(e,i)},this._onWriteComplete=function(){n=!0,this._addToParseQueue("")},this.getStream=function(){return a},(a=new t({readableObjectMode:!0,decodeStrings:!1,read:P(this._onRead,this),write:P(this._onWrite,this)})).once("finish",P(this._onWriteComplete,this))}function F(e){var t,i,r,n=Math.pow(2,53),s=-n,a=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/,o=/^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))$/,h=this,u=0,f=0,c=!1,p=!1,g=[],m={data:[],errors:[],meta:{}};if(q(e.step)){var _=e.step;e.step=function(t){if(m=t,k())v();else{if(v(),0===m.data.length)return;u+=t.data.length,e.preview&&u>e.preview?i.abort():(m.data=m.data[0],_(m,h))}}}function y(t){return"greedy"===e.skipEmptyLines?""===t.join("").trim():1===t.length&&0===t[0].length}function v(){if(m&&r&&(C("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to ','"),r=!1),e.skipEmptyLines)for(var t=0;t=g.length?"__parsed_extra":g[r]),e.transform&&(a=e.transform(a,s)),a=b(s,a),"__parsed_extra"===s?(n[s]=n[s]||[],n[s].push(a)):n[s]=a}return e.header&&(r>g.length?C("FieldMismatch","TooManyFields","Too many fields: expected "+g.length+" fields but parsed "+r,f+i):rs&&t1&&n[0].length=r.length/2?"\r\n":"\r"}(n,o)),r=!1,e.delimiter)q(e.delimiter)&&(e.delimiter=e.delimiter(n),m.meta.delimiter=e.delimiter);else{var h=function(t,i,r,n,s){var a,o,h,u;s=s||[",","\t","|",";",d,l];for(var f=0;f0&&(p+=Math.abs(k-h),h=k):h=k}_.data.length>0&&(g/=_.data.length-m),(void 0===o||p<=o)&&(void 0===u||g>u)&&g>1.99&&(o=p,a=c,u=g)}return e.delimiter=a,{successful:!!a,bestDelimiter:a}}(n,e.newline,e.skipEmptyLines,e.comments,e.delimitersToGuess);h.successful?e.delimiter=h.bestDelimiter:(r=!0,e.delimiter=","),m.meta.delimiter=e.delimiter}var u=M(e);return e.preview&&e.header&&u.preview++,t=n,i=new j(u),m=i.parse(t,s,a),v(),c?{meta:{paused:!0}}:m||{meta:{paused:!1}}},this.paused=function(){return c},this.pause=function(){c=!0,i.abort(),t=q(e.chunk)?"":t.substring(i.getCharIndex())},this.resume=function(){h.streamer._halted?(c=!1,h.streamer.parseChunk(t,!0)):setTimeout(h.resume,3)},this.aborted=function(){return p},this.abort=function(){p=!0,i.abort(),m.meta.aborted=!0,q(e.complete)&&e.complete(m),t=""}}function I(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function j(e){var t,i=(e=e||{}).delimiter,r=e.newline,n=e.comments,s=e.step,a=e.preview,o=e.fastMode,h=t=void 0===e.quoteChar?'"':e.quoteChar;if(void 0!==e.escapeChar&&(h=e.escapeChar),("string"!=typeof i||c.indexOf(i)>-1)&&(i=","),n===i)throw new Error("Comment character same as delimiter");!0===n?n="#":("string"!=typeof n||c.indexOf(n)>-1)&&(n=!1),"\n"!==r&&"\r"!==r&&"\r\n"!==r&&(r="\n");var u=0,f=!1;this.parse=function(e,d,l){if("string"!=typeof e)throw new Error("Input must be a string");var c=e.length,p=i.length,g=r.length,m=n.length,_=q(s);u=0;var y=[],v=[],k=[],b=0;if(!e)return z();if(o||!1!==o&&-1===e.indexOf(t)){for(var C=e.split(r),w=0;w=a)return y=y.slice(0,a),z(!0)}}return z()}for(var E=e.indexOf(i,u),S=e.indexOf(r,u),x=new RegExp(I(h)+I(t),"g"),O=e.indexOf(t,u);;)if(e[u]!==t)if(n&&0===k.length&&e.substring(u,u+m)===n){if(-1===S)return z();u=S+g,S=e.indexOf(r,u),E=e.indexOf(i,u)}else if(-1!==E&&(E=a)return z(!0)}else for(O=u,u++;;){if(-1===(O=e.indexOf(t,O+1)))return l||v.push({type:"Quotes",code:"MissingQuotes",message:"Quoted field unterminated",row:y.length,index:u}),j();if(O===c-1)return j(e.substring(u,O).replace(x,t));if(t!==h||e[O+1]!==h){if(t===h||0===O||e[O-1]!==h){-1!==E&&E=a)return z(!0);break}v.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:y.length,index:u}),O++}}else O++}return j();function A(e){y.push(e),b=u}function F(t){var i=0;if(-1!==t){var r=e.substring(O+1,t);r&&""===r.trim()&&(i=r.length)}return i}function j(t){return l||(void 0===t&&(t=e.substring(u)),k.push(t),u=c,A(k),_&&D()),z()}function L(t){u=t,A(k),k=[],S=e.indexOf(r,u)}function z(e){return{data:y,errors:v,meta:{delimiter:i,linebreak:r,aborted:f,truncated:!!e,cursor:b+(d||0)}}}function D(){s(z()),y=[],v=[]}},this.abort=function(){f=!0},this.getCharIndex=function(){return u}}function L(e){var t=e.data,i=r[t.workerId],n=!1;if(t.error)i.userError(t.error,t.file);else if(t.results&&t.results.data){var s={abort:function(){n=!0,z(t.workerId,{data:[],errors:[],meta:{aborted:!0}})},pause:D,resume:D};if(q(i.userStep)){for(var a=0;a" */ + var _escapedQuote = _quoteChar + _quoteChar; + + /** whether to skip empty lines */ + var _skipEmptyLines = false; + + /** the columns (keys) we expect when we unparse objects */ + var _columns = null; + + /** whether to prevent outputting cells that can be parsed as formulae by spreadsheet software (Excel and LibreOffice) */ + var _escapeFormulae = false; + + unpackConfig(); + + var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g'); + + if (typeof _input === 'string') + _input = JSON.parse(_input); + + if (Array.isArray(_input)) + { + if (!_input.length || Array.isArray(_input[0])) + return serialize(null, _input, _skipEmptyLines); + else if (typeof _input[0] === 'object') + return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines); + } + else if (typeof _input === 'object') + { + if (typeof _input.data === 'string') + _input.data = JSON.parse(_input.data); + + if (Array.isArray(_input.data)) + { + if (!_input.fields) + _input.fields = _input.meta && _input.meta.fields; + + if (!_input.fields) + _input.fields = Array.isArray(_input.data[0]) + ? _input.fields + : typeof _input.data[0] === 'object' + ? Object.keys(_input.data[0]) + : []; + + if (!(Array.isArray(_input.data[0])) && typeof _input.data[0] !== 'object') + _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf'] + } + + return serialize(_input.fields || [], _input.data || [], _skipEmptyLines); + } + + // Default (any valid paths should return before this) + throw new Error('Unable to serialize unrecognized input'); + + + function unpackConfig() + { + if (typeof _config !== 'object') + return; + + if (typeof _config.delimiter === 'string' + && !BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length) + { + _delimiter = _config.delimiter; + } + + if (typeof _config.quotes === 'boolean' + || typeof _config.quotes === 'function' + || Array.isArray(_config.quotes)) + _quotes = _config.quotes; + + if (typeof _config.skipEmptyLines === 'boolean' + || typeof _config.skipEmptyLines === 'string') + _skipEmptyLines = _config.skipEmptyLines; + + if (typeof _config.newline === 'string') + _newline = _config.newline; + + if (typeof _config.quoteChar === 'string') + _quoteChar = _config.quoteChar; + + if (typeof _config.header === 'boolean') + _writeHeader = _config.header; + + if (Array.isArray(_config.columns)) { + + if (_config.columns.length === 0) throw new Error('Option columns is empty'); + + _columns = _config.columns; + } + + if (_config.escapeChar !== undefined) { + _escapedQuote = _config.escapeChar + _quoteChar; + } + + if (typeof _config.escapeFormulae === 'boolean') + _escapeFormulae = _config.escapeFormulae; + } + + + /** The double for loop that iterates the data and writes out a CSV string including header row */ + function serialize(fields, data, skipEmptyLines) + { + var csv = ''; + + if (typeof fields === 'string') + fields = JSON.parse(fields); + if (typeof data === 'string') + data = JSON.parse(data); + + var hasHeader = Array.isArray(fields) && fields.length > 0; + var dataKeyedByField = !(Array.isArray(data[0])); + + // If there a header row, write it first + if (hasHeader && _writeHeader) + { + for (var i = 0; i < fields.length; i++) + { + if (i > 0) + csv += _delimiter; + csv += safe(fields[i], i); + } + if (data.length > 0) + csv += _newline; + } + + // Then write out the data + for (var row = 0; row < data.length; row++) + { + var maxCol = hasHeader ? fields.length : data[row].length; + + var emptyLine = false; + var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0; + if (skipEmptyLines && !hasHeader) + { + emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0; + } + if (skipEmptyLines === 'greedy' && hasHeader) { + var line = []; + for (var c = 0; c < maxCol; c++) { + var cx = dataKeyedByField ? fields[c] : c; + line.push(data[row][cx]); + } + emptyLine = line.join('').trim() === ''; + } + if (!emptyLine) + { + for (var col = 0; col < maxCol; col++) + { + if (col > 0 && !nullLine) + csv += _delimiter; + var colIdx = hasHeader && dataKeyedByField ? fields[col] : col; + csv += safe(data[row][colIdx], col); + } + if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) + { + csv += _newline; + } + } + } + return csv; + } + + /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */ + function safe(str, col) + { + if (typeof str === 'undefined' || str === null) + return ''; + + if (str.constructor === Date) + return JSON.stringify(str).slice(1, 25); + + if (_escapeFormulae === true && typeof str === "string" && (str.match(/^[=+\-@].*$/) !== null)) { + str = "'" + str; + } + + var escapedQuoteStr = str.toString().replace(quoteCharRegex, _escapedQuote); + + var needsQuotes = (typeof _quotes === 'boolean' && _quotes) + || (typeof _quotes === 'function' && _quotes(str, col)) + || (Array.isArray(_quotes) && _quotes[col]) + || hasAny(escapedQuoteStr, BAD_DELIMITERS) + || escapedQuoteStr.indexOf(_delimiter) > -1 + || escapedQuoteStr.charAt(0) === ' ' + || escapedQuoteStr.charAt(escapedQuoteStr.length - 1) === ' '; + + return needsQuotes ? _quoteChar + escapedQuoteStr + _quoteChar : escapedQuoteStr; + } + + function hasAny(str, substrings) + { + for (var i = 0; i < substrings.length; i++) + if (str.indexOf(substrings[i]) > -1) + return true; + return false; + } +} + +/** ChunkStreamer is the base prototype for various streamer implementations. */ +function ChunkStreamer(config) +{ + this._handle = null; + this._finished = false; + this._completed = false; + this._halted = false; + this._input = null; + this._baseIndex = 0; + this._partialLine = ''; + this._rowCount = 0; + this._start = 0; + this._nextChunk = null; + this.isFirstChunk = true; + this._completeResults = { + data: [], + errors: [], + meta: {} + }; + replaceConfig.call(this, config); + + this.parseChunk = function(chunk, isFakeChunk) + { + // First chunk pre-processing + if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) + { + var modifiedChunk = this._config.beforeFirstChunk(chunk); + if (modifiedChunk !== undefined) + chunk = modifiedChunk; + } + this.isFirstChunk = false; + this._halted = false; + + // Rejoin the line we likely just split in two by chunking the file + var aggregate = this._partialLine + chunk; + this._partialLine = ''; + + var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); + + if (this._handle.paused() || this._handle.aborted()) { + this._halted = true; + return; + } + + var lastIndex = results.meta.cursor; + + if (!this._finished) + { + this._partialLine = aggregate.substring(lastIndex - this._baseIndex); + this._baseIndex = lastIndex; + } + + if (results && results.data) + this._rowCount += results.data.length; + + var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview); + + if (IS_PAPA_WORKER) + { + global.postMessage({ + results: results, + workerId: WORKER_ID, + finished: finishedIncludingPreview + }); + } + else if (isFunction(this._config.chunk) && !isFakeChunk) + { + this._config.chunk(results, this._handle); + if (this._handle.paused() || this._handle.aborted()) { + this._halted = true; + return; + } + results = undefined; + this._completeResults = undefined; + } + + if (!this._config.step && !this._config.chunk) { + this._completeResults.data = this._completeResults.data.concat(results.data); + this._completeResults.errors = this._completeResults.errors.concat(results.errors); + this._completeResults.meta = results.meta; + } + + if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) { + this._config.complete(this._completeResults, this._input); + this._completed = true; + } + + if (!finishedIncludingPreview && (!results || !results.meta.paused)) + this._nextChunk(); + + return results; + }; + + this._sendError = function(error) + { + if (isFunction(this._config.error)) + this._config.error(error); + else if (IS_PAPA_WORKER && this._config.error) + { + global.postMessage({ + workerId: WORKER_ID, + error: error, + finished: false + }); + } + }; + + function replaceConfig(config) + { + // Deep-copy the config so we can edit it + var configCopy = copy(config); + configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings! + if (!config.step && !config.chunk) + configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196 + this._handle = new _ParserHandle(configCopy); + this._handle.streamer = this; + this._config = configCopy; // persist the copy to the caller + } +} + + +function _NetworkStreamer(config) +{ + config = config || {}; + if (!config.chunkSize) + config.chunkSize = RemoteChunkSize; + ChunkStreamer.call(this, config); + + var xhr; + + if (IS_WORKER) + { + this._nextChunk = function() + { + this._readChunk(); + this._chunkLoaded(); + }; + } + else + { + this._nextChunk = function() + { + this._readChunk(); + }; + } + + this.stream = function(url) + { + this._input = url; + this._nextChunk(); // Starts streaming + }; + + this._readChunk = function() + { + if (this._finished) + { + this._chunkLoaded(); + return; + } + + xhr = new XMLHttpRequest(); + + if (this._config.withCredentials) + { + xhr.withCredentials = this._config.withCredentials; + } + + if (!IS_WORKER) + { + xhr.onload = bindFunction(this._chunkLoaded, this); + xhr.onerror = bindFunction(this._chunkError, this); + } + + xhr.open(this._config.downloadRequestBody ? 'POST' : 'GET', this._input, !IS_WORKER); + // Headers can only be set when once the request state is OPENED + if (this._config.downloadRequestHeaders) + { + var headers = this._config.downloadRequestHeaders; + + for (var headerName in headers) + { + xhr.setRequestHeader(headerName, headers[headerName]); + } + } + + if (this._config.chunkSize) + { + var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive + xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end); + } + + try { + xhr.send(this._config.downloadRequestBody); + } + catch (err) { + this._chunkError(err.message); + } + + if (IS_WORKER && xhr.status === 0) + this._chunkError(); + }; + + this._chunkLoaded = function() + { + if (xhr.readyState !== 4) + return; + + if (xhr.status < 200 || xhr.status >= 400) + { + this._chunkError(); + return; + } + + // Use chunckSize as it may be a diference on reponse lentgh due to characters with more than 1 byte + this._start += this._config.chunkSize ? this._config.chunkSize : xhr.responseText.length; + this._finished = !this._config.chunkSize || this._start >= getFileSize(xhr); + this.parseChunk(xhr.responseText); + }; + + this._chunkError = function(errorMessage) + { + var errorText = xhr.statusText || errorMessage; + this._sendError(new Error(errorText)); + }; + + function getFileSize(xhr) + { + var contentRange = xhr.getResponseHeader('Content-Range'); + if (contentRange === null) { // no content range, then finish! + return -1; + } + return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1)); + } +} +_NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype); +_NetworkStreamer.prototype.constructor = _NetworkStreamer; + + +function _FileStreamer(config) +{ + config = config || {}; + if (!config.chunkSize) + config.chunkSize = LocalChunkSize; + ChunkStreamer.call(this, config); + + var reader, slice; + + // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862 + // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76 + var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105 + + this.stream = function(file) + { + this._input = file; + slice = file.slice || file.webkitSlice || file.mozSlice; + + if (usingAsyncReader) + { + reader = new FileReader(); // Preferred method of reading files, even in workers + reader.onload = bindFunction(this._chunkLoaded, this); + reader.onerror = bindFunction(this._chunkError, this); + } + else + reader = new FileReaderSync(); // Hack for running in a web worker in Firefox + + this._nextChunk(); // Starts streaming + }; + + this._nextChunk = function() + { + if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview)) + this._readChunk(); + }; + + this._readChunk = function() + { + var input = this._input; + if (this._config.chunkSize) + { + var end = Math.min(this._start + this._config.chunkSize, this._input.size); + input = slice.call(input, this._start, end); + } + var txt = reader.readAsText(input, this._config.encoding); + if (!usingAsyncReader) + this._chunkLoaded({ target: { result: txt } }); // mimic the async signature + }; + + this._chunkLoaded = function(event) + { + // Very important to increment start each time before handling results + this._start += this._config.chunkSize; + this._finished = !this._config.chunkSize || this._start >= this._input.size; + this.parseChunk(event.target.result); + }; + + this._chunkError = function() + { + this._sendError(reader.error); + }; + +} +_FileStreamer.prototype = Object.create(ChunkStreamer.prototype); +_FileStreamer.prototype.constructor = _FileStreamer; + + +function _StringStreamer(config) +{ + config = config || {}; + ChunkStreamer.call(this, config); + + var remaining; + this.stream = function(s) + { + remaining = s; + return this._nextChunk(); + }; + this._nextChunk = function() + { + if (this._finished) return; + var size = this._config.chunkSize; + var chunk; + if(size) { + chunk = remaining.substring(0, size); + remaining = remaining.substring(size); + } else { + chunk = remaining; + remaining = ''; + } + this._finished = !remaining; + return this.parseChunk(chunk); + }; +} +_StringStreamer.prototype = Object.create(_StringStreamer.prototype); +_StringStreamer.prototype.constructor = _StringStreamer; + + +function _ReadableStreamStreamer(config) +{ + config = config || {}; + + ChunkStreamer.call(this, config); + + var queue = []; + var parseOnData = true; + var streamHasEnded = false; + + this.pause = function() + { + ChunkStreamer.prototype.pause.apply(this, arguments); + this._input.pause(); + }; + + this.resume = function() + { + ChunkStreamer.prototype.resume.apply(this, arguments); + this._input.resume(); + }; + + this.stream = function(stream) + { + this._input = stream; + + this._input.on('data', this._streamData); + this._input.on('end', this._streamEnd); + this._input.on('error', this._streamError); + }; + + this._checkIsFinished = function() + { + if (streamHasEnded && queue.length === 1) { + this._finished = true; + } + }; + + this._nextChunk = function() + { + this._checkIsFinished(); + if (queue.length) + { + this.parseChunk(queue.shift()); + } + else + { + parseOnData = true; + } + }; + + this._streamData = bindFunction(function(chunk) + { + try + { + queue.push(typeof chunk === 'string' ? chunk : chunk.toString(this._config.encoding)); + + if (parseOnData) + { + parseOnData = false; + this._checkIsFinished(); + this.parseChunk(queue.shift()); + } + } + catch (error) + { + this._streamError(error); + } + }, this); + + this._streamError = bindFunction(function(error) + { + this._streamCleanUp(); + this._sendError(error); + }, this); + + this._streamEnd = bindFunction(function() + { + this._streamCleanUp(); + streamHasEnded = true; + this._streamData(''); + }, this); + + this._streamCleanUp = bindFunction(function() + { + this._input.removeListener('data', this._streamData); + this._input.removeListener('end', this._streamEnd); + this._input.removeListener('error', this._streamError); + }, this); +} +_ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); +_ReadableStreamStreamer.prototype.constructor = _ReadableStreamStreamer; + + +function _DuplexStreamStreamer(_config) { + var Duplex = require('stream').Duplex; + var config = copy(_config); + var parseOnWrite = true; + var writeStreamHasFinished = false; + var parseCallbackQueue = []; + var stream = null; + + this._onCsvData = function(results) + { + var data = results.data; + if (!stream.push(data) && !this._handle.paused()) { + // the writeable consumer buffer has filled up + // so we need to pause until more items + // can be processed + this._handle.pause(); + } + }; + + this._onCsvComplete = function() + { + // node will finish the read stream when + // null is pushed + stream.push(null); + }; + + config.step = bindFunction(this._onCsvData, this); + config.complete = bindFunction(this._onCsvComplete, this); + ChunkStreamer.call(this, config); + + this._nextChunk = function() + { + if (writeStreamHasFinished && parseCallbackQueue.length === 1) { + this._finished = true; + } + if (parseCallbackQueue.length) { + parseCallbackQueue.shift()(); + } else { + parseOnWrite = true; + } + }; + + this._addToParseQueue = function(chunk, callback) + { + // add to queue so that we can indicate + // completion via callback + // node will automatically pause the incoming stream + // when too many items have been added without their + // callback being invoked + parseCallbackQueue.push(bindFunction(function() { + this.parseChunk(typeof chunk === 'string' ? chunk : chunk.toString(config.encoding)); + if (isFunction(callback)) { + return callback(); + } + }, this)); + if (parseOnWrite) { + parseOnWrite = false; + this._nextChunk(); + } + }; + + this._onRead = function() + { + if (this._handle.paused()) { + // the writeable consumer can handle more data + // so resume the chunk parsing + this._handle.resume(); + } + }; + + this._onWrite = function(chunk, encoding, callback) + { + this._addToParseQueue(chunk, callback); + }; + + this._onWriteComplete = function() + { + writeStreamHasFinished = true; + // have to write empty string + // so parser knows its done + this._addToParseQueue(''); + }; + + this.getStream = function() + { + return stream; + }; + stream = new Duplex({ + readableObjectMode: true, + decodeStrings: false, + read: bindFunction(this._onRead, this), + write: bindFunction(this._onWrite, this) + }); + stream.once('finish', bindFunction(this._onWriteComplete, this)); +} +if (typeof PAPA_BROWSER_CONTEXT === 'undefined') { + _DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype); + _DuplexStreamStreamer.prototype.constructor = _DuplexStreamStreamer; +} + + +// Use one ParserHandle per entire CSV file or string +function _ParserHandle(_config) +{ + // One goal is to minimize the use of regular expressions... + var MAX_FLOAT = Math.pow(2, 53); + var MIN_FLOAT = -MAX_FLOAT; + var FLOAT = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/; + var ISO_DATE = /^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))$/; + var self = this; + var _stepCounter = 0; // Number of times step was called (number of rows parsed) + var _rowCounter = 0; // Number of rows that have been parsed so far + var _input; // The input being parsed + var _parser; // The core parser being used + var _paused = false; // Whether we are paused or not + var _aborted = false; // Whether the parser has aborted or not + var _delimiterError; // Temporary state between delimiter detection and processing results + var _fields = []; // Fields are from the header row of the input, if there is one + var _results = { // The last results returned from the parser + data: [], + errors: [], + meta: {} + }; + + if (isFunction(_config.step)) + { + var userStep = _config.step; + _config.step = function(results) + { + _results = results; + + if (needsHeaderRow()) + processResults(); + else // only call user's step function after header row + { + processResults(); + + // It's possbile that this line was empty and there's no row here after all + if (_results.data.length === 0) + return; + + _stepCounter += results.data.length; + if (_config.preview && _stepCounter > _config.preview) + _parser.abort(); + else { + _results.data = _results.data[0]; + userStep(_results, self); + } + } + }; + } + + /** + * Parses input. Most users won't need, and shouldn't mess with, the baseIndex + * and ignoreLastRow parameters. They are used by streamers (wrapper functions) + * when an input comes in multiple chunks, like from a file. + */ + this.parse = function(input, baseIndex, ignoreLastRow) + { + var quoteChar = _config.quoteChar || '"'; + if (!_config.newline) + _config.newline = guessLineEndings(input, quoteChar); + + _delimiterError = false; + if (!_config.delimiter) + { + var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess); + if (delimGuess.successful) + _config.delimiter = delimGuess.bestDelimiter; + else + { + _delimiterError = true; // add error after parsing (otherwise it would be overwritten) + _config.delimiter = DefaultDelimiter; + } + _results.meta.delimiter = _config.delimiter; + } + else if(isFunction(_config.delimiter)) + { + _config.delimiter = _config.delimiter(input); + _results.meta.delimiter = _config.delimiter; + } + + var parserConfig = copy(_config); + if (_config.preview && _config.header) + parserConfig.preview++; // to compensate for header row + + _input = input; + _parser = new _Parser(parserConfig); + _results = _parser.parse(_input, baseIndex, ignoreLastRow); + processResults(); + return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } }); + }; + + this.paused = function() + { + return _paused; + }; + + this.pause = function() + { + _paused = true; + _parser.abort(); + + // If it is streaming via "chunking", the reader will start appending correctly already so no need to substring, + // otherwise we can get duplicate content within a row + _input = isFunction(_config.chunk) ? "" : _input.substring(_parser.getCharIndex()); + }; + + this.resume = function() + { + if(self.streamer._halted) { + _paused = false; + self.streamer.parseChunk(_input, true); + } else { + // Bugfix: #636 In case the processing hasn't halted yet + // wait for it to halt in order to resume + setTimeout(self.resume, 3); + } + }; + + this.aborted = function() + { + return _aborted; + }; + + this.abort = function() + { + _aborted = true; + _parser.abort(); + _results.meta.aborted = true; + if (isFunction(_config.complete)) + _config.complete(_results); + _input = ''; + }; + + function testEmptyLine(s) { + return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0; + } + + function testFloat(s) { + if (FLOAT.test(s)) { + var floatValue = parseFloat(s); + if (floatValue > MIN_FLOAT && floatValue < MAX_FLOAT) { + return true; + } + } + return false; + } + + function processResults() + { + if (_results && _delimiterError) + { + addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + DefaultDelimiter + '\''); + _delimiterError = false; + } + + if (_config.skipEmptyLines) + { + for (var i = 0; i < _results.data.length; i++) + if (testEmptyLine(_results.data[i])) + _results.data.splice(i--, 1); + } + + if (needsHeaderRow()) + fillHeaderFields(); + + return applyHeaderAndDynamicTypingAndTransformation(); + } + + function needsHeaderRow() + { + return _config.header && _fields.length === 0; + } + + function fillHeaderFields() + { + if (!_results) + return; + + function addHeader(header, i) + { + if (isFunction(_config.transformHeader)) + header = _config.transformHeader(header, i); + + _fields.push(header); + } + + if (Array.isArray(_results.data[0])) + { + for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) + _results.data[i].forEach(addHeader); + + _results.data.splice(0, 1); + } + // if _results.data[0] is not an array, we are in a step where _results.data is the row. + else + _results.data.forEach(addHeader); + } + + function shouldApplyDynamicTyping(field) { + // Cache function values to avoid calling it for each row + if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) { + _config.dynamicTyping[field] = _config.dynamicTypingFunction(field); + } + return (_config.dynamicTyping[field] || _config.dynamicTyping) === true; + } + + function parseDynamic(field, value) + { + if (shouldApplyDynamicTyping(field)) + { + if (value === 'true' || value === 'TRUE') + return true; + else if (value === 'false' || value === 'FALSE') + return false; + else if (testFloat(value)) + return parseFloat(value); + else if (ISO_DATE.test(value)) + return new Date(value); + else + return (value === '' ? null : value); + } + return value; + } + + function applyHeaderAndDynamicTypingAndTransformation() + { + if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform)) + return _results; + + function processRow(rowSource, i) + { + var row = _config.header ? {} : []; + + var j; + for (j = 0; j < rowSource.length; j++) + { + var field = j; + var value = rowSource[j]; + + if (_config.header) + field = j >= _fields.length ? '__parsed_extra' : _fields[j]; + + if (_config.transform) + value = _config.transform(value,field); + + value = parseDynamic(field, value); + + if (field === '__parsed_extra') + { + row[field] = row[field] || []; + row[field].push(value); + } + else + row[field] = value; + } + + + if (_config.header) + { + if (j > _fields.length) + addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i); + else if (j < _fields.length) + addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i); + } + + return row; + } + + var incrementBy = 1; + if (!_results.data.length || Array.isArray(_results.data[0])) + { + _results.data = _results.data.map(processRow); + incrementBy = _results.data.length; + } + else + _results.data = processRow(_results.data, 0); + + + if (_config.header && _results.meta) + _results.meta.fields = _fields; + + _rowCounter += incrementBy; + return _results; + } + + function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) { + var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount; + + delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', RECORD_SEP, UNIT_SEP]; + + for (var i = 0; i < delimitersToGuess.length; i++) { + var delim = delimitersToGuess[i]; + var delta = 0, avgFieldCount = 0, emptyLinesCount = 0; + fieldCountPrevRow = undefined; + + var preview = new _Parser({ + comments: comments, + delimiter: delim, + newline: newline, + preview: 10 + }).parse(input); + + for (var j = 0; j < preview.data.length; j++) { + if (skipEmptyLines && testEmptyLine(preview.data[j])) { + emptyLinesCount++; + continue; + } + var fieldCount = preview.data[j].length; + avgFieldCount += fieldCount; + + if (typeof fieldCountPrevRow === 'undefined') { + fieldCountPrevRow = fieldCount; + continue; + } + else if (fieldCount > 0) { + delta += Math.abs(fieldCount - fieldCountPrevRow); + fieldCountPrevRow = fieldCount; + } + } + + if (preview.data.length > 0) + avgFieldCount /= (preview.data.length - emptyLinesCount); + + if ((typeof bestDelta === 'undefined' || delta <= bestDelta) + && (typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount) && avgFieldCount > 1.99) { + bestDelta = delta; + bestDelim = delim; + maxFieldCount = avgFieldCount; + } + } + + _config.delimiter = bestDelim; + + return { + successful: !!bestDelim, + bestDelimiter: bestDelim + }; + } + + function guessLineEndings(input, quoteChar) + { + input = input.substring(0, 1024 * 1024); // max length 1 MB + // Replace all the text inside quotes + var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm'); + input = input.replace(re, ''); + + var r = input.split('\r'); + + var n = input.split('\n'); + + var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length); + + if (r.length === 1 || nAppearsFirst) + return '\n'; + + var numWithN = 0; + for (var i = 0; i < r.length; i++) + { + if (r[i][0] === '\n') + numWithN++; + } + + return numWithN >= r.length / 2 ? '\r\n' : '\r'; + } + + function addError(type, code, msg, row) + { + var error = { + type: type, + code: code, + message: msg + }; + if(row !== undefined) { + error.row = row; + } + _results.errors.push(error); + } +} + +/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */ +function escapeRegExp(string) +{ + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** The core parser implements speedy and correct CSV parsing */ +function _Parser(config) +{ + // Unpack the config object + config = config || {}; + var delim = config.delimiter; + var newline = config.newline; + var comments = config.comments; + var step = config.step; + var preview = config.preview; + var fastMode = config.fastMode; + var quoteChar; + /** Allows for no quoteChar by setting quoteChar to undefined in config */ + if (config.quoteChar === undefined) { + quoteChar = '"'; + } else { + quoteChar = config.quoteChar; + } + var escapeChar = quoteChar; + if (config.escapeChar !== undefined) { + escapeChar = config.escapeChar; + } + + // Delimiter must be valid + if (typeof delim !== 'string' + || BAD_DELIMITERS.indexOf(delim) > -1) + delim = ','; + + // Comment character must be valid + if (comments === delim) + throw new Error('Comment character same as delimiter'); + else if (comments === true) + comments = '#'; + else if (typeof comments !== 'string' + || BAD_DELIMITERS.indexOf(comments) > -1) + comments = false; + + // Newline must be valid: \r, \n, or \r\n + if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') + newline = '\n'; + + // We're gonna need these at the Parser scope + var cursor = 0; + var aborted = false; + + this.parse = function(input, baseIndex, ignoreLastRow) + { + // For some reason, in Chrome, this speeds things up (!?) + if (typeof input !== 'string') + throw new Error('Input must be a string'); + + // We don't need to compute some of these every time parse() is called, + // but having them in a more local scope seems to perform better + var inputLen = input.length, + delimLen = delim.length, + newlineLen = newline.length, + commentsLen = comments.length; + var stepIsFunction = isFunction(step); + + // Establish starting state + cursor = 0; + var data = [], errors = [], row = [], lastCursor = 0; + + if (!input) + return returnable(); + + if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) + { + var rows = input.split(newline); + for (var i = 0; i < rows.length; i++) + { + row = rows[i]; + cursor += row.length; + if (i !== rows.length - 1) + cursor += newline.length; + else if (ignoreLastRow) + return returnable(); + if (comments && row.substring(0, commentsLen) === comments) + continue; + if (stepIsFunction) + { + data = []; + pushRow(row.split(delim)); + doStep(); + if (aborted) + return returnable(); + } + else + pushRow(row.split(delim)); + if (preview && i >= preview) + { + data = data.slice(0, preview); + return returnable(true); + } + } + return returnable(); + } + + var nextDelim = input.indexOf(delim, cursor); + var nextNewline = input.indexOf(newline, cursor); + var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g'); + var quoteSearch = input.indexOf(quoteChar, cursor); + + // Parser loop + for (;;) + { + // Field has opening quote + if (input[cursor] === quoteChar) + { + // Start our search for the closing quote where the cursor is + quoteSearch = cursor; + + // Skip the opening quote + cursor++; + + for (;;) + { + // Find closing quote + quoteSearch = input.indexOf(quoteChar, quoteSearch + 1); + + //No other quotes are found - no other delimiters + if (quoteSearch === -1) + { + if (!ignoreLastRow) { + // No closing quote... what a pity + errors.push({ + type: 'Quotes', + code: 'MissingQuotes', + message: 'Quoted field unterminated', + row: data.length, // row has yet to be inserted + index: cursor + }); + } + return finish(); + } + + // Closing quote at EOF + if (quoteSearch === inputLen - 1) + { + var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar); + return finish(value); + } + + // If this quote is escaped, it's part of the data; skip it + // If the quote character is the escape character, then check if the next character is the escape character + if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) + { + quoteSearch++; + continue; + } + + // If the quote character is not the escape character, then check if the previous character was the escape character + if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) + { + continue; + } + + if(nextDelim !== -1 && nextDelim < (quoteSearch + 1)) { + nextDelim = input.indexOf(delim, (quoteSearch + 1)); + } + if(nextNewline !== -1 && nextNewline < (quoteSearch + 1)) { + nextNewline = input.indexOf(newline, (quoteSearch + 1)); + } + // Check up to nextDelim or nextNewline, whichever is closest + var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline); + var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo); + + // Closing quote followed by delimiter or 'unnecessary spaces + delimiter' + if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) + { + row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen; + + // If char after following delimiter is not quoteChar, we find next quote char position + if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar) + { + quoteSearch = input.indexOf(quoteChar, cursor); + } + nextDelim = input.indexOf(delim, cursor); + nextNewline = input.indexOf(newline, cursor); + break; + } + + var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline); + + // Closing quote followed by newline or 'unnecessary spaces + newLine' + if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline) + { + row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar)); + saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen); + nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field + quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line + + if (stepIsFunction) + { + doStep(); + if (aborted) + return returnable(); + } + + if (preview && data.length >= preview) + return returnable(true); + + break; + } + + + // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string + errors.push({ + type: 'Quotes', + code: 'InvalidQuotes', + message: 'Trailing quote on quoted field is malformed', + row: data.length, // row has yet to be inserted + index: cursor + }); + + quoteSearch++; + continue; + + } + + continue; + } + + // Comment found at start of new line + if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments) + { + if (nextNewline === -1) // Comment ends at EOF + return returnable(); + cursor = nextNewline + newlineLen; + nextNewline = input.indexOf(newline, cursor); + nextDelim = input.indexOf(delim, cursor); + continue; + } + + // Next delimiter comes before next newline, so we've reached end of field + if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) + { + row.push(input.substring(cursor, nextDelim)); + cursor = nextDelim + delimLen; + // we look for next delimiter char + nextDelim = input.indexOf(delim, cursor); + continue; + } + + // End of row + if (nextNewline !== -1) + { + row.push(input.substring(cursor, nextNewline)); + saveRow(nextNewline + newlineLen); + + if (stepIsFunction) + { + doStep(); + if (aborted) + return returnable(); + } + + if (preview && data.length >= preview) + return returnable(true); + + continue; + } + + break; + } + + + return finish(); + + + function pushRow(row) + { + data.push(row); + lastCursor = cursor; + } + + /** + * checks if there are extra spaces after closing quote and given index without any text + * if Yes, returns the number of spaces + */ + function extraSpaces(index) { + var spaceLength = 0; + if (index !== -1) { + var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index); + if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') { + spaceLength = textBetweenClosingQuoteAndIndex.length; + } + } + return spaceLength; + } + + /** + * Appends the remaining input from cursor to the end into + * row, saves the row, calls step, and returns the results. + */ + function finish(value) + { + if (ignoreLastRow) + return returnable(); + if (typeof value === 'undefined') + value = input.substring(cursor); + row.push(value); + cursor = inputLen; // important in case parsing is paused + pushRow(row); + if (stepIsFunction) + doStep(); + return returnable(); + } + + /** + * Appends the current row to the results. It sets the cursor + * to newCursor and finds the nextNewline. The caller should + * take care to execute user's step function and check for + * preview and end parsing if necessary. + */ + function saveRow(newCursor) + { + cursor = newCursor; + pushRow(row); + row = []; + nextNewline = input.indexOf(newline, cursor); + } + + /** Returns an object with the results, errors, and meta. */ + function returnable(stopped) + { + return { + data: data, + errors: errors, + meta: { + delimiter: delim, + linebreak: newline, + aborted: aborted, + truncated: !!stopped, + cursor: lastCursor + (baseIndex || 0) + } + }; + } + + /** Executes the user's step function and resets data & errors. */ + function doStep() + { + step(returnable()); + data = []; + errors = []; + } + }; + + /** Sets the abort flag */ + this.abort = function() + { + aborted = true; + }; + + /** Gets the cursor position */ + this.getCharIndex = function() + { + return cursor; + }; +} + + +function newWorker() +{ + if (!WORKERS_SUPPORTED) + return false; + + var workerUrl = SCRIPT_URL; + var w = new global.Worker(workerUrl, { name: PAPA_WORKER_NAME, type: import.meta.papaIsUMD ? undefined : 'module' }); + w.onmessage = mainThreadReceivedMessage; + w.id = workerIdCounter++; + workers[w.id] = w; + return w; +} + +/** Callback when main thread receives a message */ +function mainThreadReceivedMessage(e) +{ + var msg = e.data; + var worker = workers[msg.workerId]; + var aborted = false; + + if (msg.error) + worker.userError(msg.error, msg.file); + else if (msg.results && msg.results.data) + { + var abort = function() { + aborted = true; + completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } }); + }; + + var handle = { + abort: abort, + pause: notImplemented, + resume: notImplemented + }; + + if (isFunction(worker.userStep)) + { + for (var i = 0; i < msg.results.data.length; i++) + { + worker.userStep({ + data: msg.results.data[i], + errors: msg.results.errors, + meta: msg.results.meta + }, handle); + if (aborted) + break; + } + delete msg.results; // free memory ASAP + } + else if (isFunction(worker.userChunk)) + { + worker.userChunk(msg.results, handle, msg.file); + delete msg.results; + } + } + + if (msg.finished && !aborted) + completeWorker(msg.workerId, msg.results); +} + +function completeWorker(workerId, results) { + var worker = workers[workerId]; + if (isFunction(worker.userComplete)) + worker.userComplete(results); + worker.terminate(); + delete workers[workerId]; +} + +function notImplemented() { + throw new Error('Not implemented.'); +} + +/** Callback when worker thread receives a message */ +function workerThreadReceivedMessage(e) +{ + var msg = e.data; + + if (typeof WORKER_ID === 'undefined' && msg) + WORKER_ID = msg.workerId; + + if (typeof msg.input === 'string') + { + global.postMessage({ + workerId: WORKER_ID, + results: parse(msg.input, msg.config), + finished: true + }); + } + else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106) + { + var results = parse(msg.input, msg.config); + if (results) + global.postMessage({ + workerId: WORKER_ID, + results: results, + finished: true + }); + } +} + +/** Makes a deep copy of an array or object (mostly) */ +function copy(obj) +{ + if (typeof obj !== 'object' || obj === null) + return obj; + var cpy = Array.isArray(obj) ? [] : {}; + for (var key in obj) + cpy[key] = copy(obj[key]); + return cpy; +} + +function bindFunction(f, self) +{ + return function() { f.apply(self, arguments); }; +} + +function isFunction(func) +{ + return typeof func === 'function'; +} diff --git a/rollup.config.mjs b/rollup.config.mjs new file mode 100644 index 00000000..c4da7054 --- /dev/null +++ b/rollup.config.mjs @@ -0,0 +1,32 @@ +import { terser } from "rollup-plugin-terser"; + +export default { + input: 'papaparse.mjs', + output: [ + { + name: 'Papa', + file: 'papaparse.js', + format: 'umd' + }, + { + name: 'Papa', + file: 'papaparse.min.js', + format: 'umd', + plugins: [terser()] + } + ], + plugins: [ + { + resolveImportMeta(prop, { format }) { + if (format === 'umd') { + if (prop === 'url') { + return 'global && global.document && global.document.currentScript && global.document.currentScript.src'; + } + if (prop === 'papaIsUMD') { + return 'true'; + } + } + } + } + ] +}; diff --git a/tests/test.js b/tests/test.js index 27418318..c68584e5 100644 --- a/tests/test.js +++ b/tests/test.js @@ -6,15 +6,22 @@ var childProcess = require('child_process'); var server = connect().use(serveStatic(path.join(__dirname, '/..'))).listen(8071, function() { if (process.argv.indexOf('--mocha-headless-chrome') !== -1) { - childProcess.spawn('node_modules/.bin/mocha-headless-chrome', ['-f', 'http://localhost:8071/tests/tests.html'], { + console.log('Running UMD tests...'); + childProcess.spawn('node_modules/.bin/mocha-headless-chrome', ['-f', 'http://localhost:8071/tests/tests.umd.html'], { stdio: 'inherit' - }).on('exit', function(code) { - server.close(); - process.exit(code); // eslint-disable-line no-process-exit + }).on('exit', function() { + console.log('Running ESM tests...'); + childProcess.spawn('node_modules/.bin/mocha-headless-chrome', ['-f', 'http://localhost:8071/tests/tests.esm.html'], { + stdio: 'inherit' + }).on('exit', function(code) { + server.close(); + process.exit(code); // eslint-disable-line no-process-exit + }); }); } else { - open('http://localhost:8071/tests/tests.html'); + open('http://localhost:8071/tests/tests.umd.html'); + open('http://localhost:8071/tests/tests.esm.html'); console.log('Serving tests...'); } }); diff --git a/tests/tests.esm.html b/tests/tests.esm.html new file mode 100644 index 00000000..da901003 --- /dev/null +++ b/tests/tests.esm.html @@ -0,0 +1,21 @@ + + + Papa Parse Tests + + + + +
+ + + + + + + + diff --git a/tests/tests.html b/tests/tests.html deleted file mode 100644 index a3ce51e7..00000000 --- a/tests/tests.html +++ /dev/null @@ -1,22 +0,0 @@ - - - Papa Parse Tests - - - - - - - - - - - -
- - - - diff --git a/tests/tests.umd.html b/tests/tests.umd.html new file mode 100644 index 00000000..409bbd8b --- /dev/null +++ b/tests/tests.umd.html @@ -0,0 +1,22 @@ + + + Papa Parse Tests + + + + +
+ + + + + + +