diff --git a/src/containers/monitor.jsx b/src/containers/monitor.jsx index 18a6bbeab2d..61d3a29195e 100644 --- a/src/containers/monitor.jsx +++ b/src/containers/monitor.jsx @@ -1,5 +1,6 @@ import bindAll from 'lodash.bindall'; import React from 'react'; +import Papa from 'papaparse'; import PropTypes from 'prop-types'; import {injectIntl, intlShape, defineMessages} from 'react-intl'; @@ -163,15 +164,10 @@ class Monitor extends React.Component { this.element = monitorElt; } handleImport () { - importCSV().then(rows => { - const numberOfColumns = rows[0].length; - let columnNumber = 1; - if (numberOfColumns > 1) { - const msg = this.props.intl.formatMessage(messages.columnPrompt, {numberOfColumns}); - columnNumber = parseInt(prompt(msg), 10); // eslint-disable-line no-alert - } - const newListValue = rows.map(row => row[columnNumber - 1]) - .filter(item => typeof item === 'string'); // CSV importer can leave undefineds + importCSV(numberOfColumns => { + const msg = this.props.intl.formatMessage(messages.columnPrompt, {numberOfColumns}); + return prompt(msg); // eslint-disable-line no-alert + }).then(newListValue => { const {vm, targetId, id: variableId} = this.props; setVariableValue(vm, targetId, variableId, newListValue); }); @@ -179,7 +175,8 @@ class Monitor extends React.Component { handleExport () { const {vm, targetId, id: variableId} = this.props; const variable = getVariable(vm, targetId, variableId); - const text = variable.value.join('\r\n'); + const rows = variable.value.map(item => [item]); + const text = Papa.unparse(rows); const blob = new Blob([text], {type: 'text/plain;charset=utf-8'}); downloadBlob(`${variable.name}.txt`, blob); } diff --git a/src/lib/import-csv.js b/src/lib/import-csv.js index 36ec7392bf9..bf133134953 100644 --- a/src/lib/import-csv.js +++ b/src/lib/import-csv.js @@ -1,23 +1,167 @@ import Papa from 'papaparse'; -export default () => new Promise((resolve, reject) => { - const fileInput = document.createElement('input'); - fileInput.setAttribute('type', 'file'); - fileInput.setAttribute('accept', '.csv, .tsv, .txt'); // parser auto-detects delimiter - fileInput.onchange = e => { - const file = e.target.files[0]; - Papa.parse(file, { - header: false, - complete: results => { - document.body.removeChild(fileInput); - resolve(results.data); - }, - error: err => { - document.body.removeChild(fileInput); - reject(err); +/** + * Guess the delimiter used to separate the fields in multicolumn data. + * Note: Assume we've found the right delimiter if it splits three lines into + * the same number (greater than 1) of fields. + * @param {Array.} lines - The lines of data to use. + * @returns {?string} - The delimiter, or null if the data is not multicolumn. + */ +const guessDelimiter = function (lines) { + if (lines.length === 0) { + return null; + } + + for (const d of [',', '\t']) { + const count1 = lines[0].split(d).length; + const count2 = lines[Math.floor(lines.length / 2)].split(d).length; + const count3 = lines[lines.length - 1].split(d).length; + if ((count1 > 1) && (count1 === count2) && (count1 === count3)) { + return d; + } + } + + return null; +}; + +/** + * Split an array of lines into rows (each line into an array of cells) by a given delimiter. + * @param {Array.} lines - The lines of data to use. + * @param {string} delimiter - The character to split lines by. + * @returns {Array.} Two-dimensional array of the rows and then columns representing the given data. + */ +const splitLinesIntoRows = function (lines, delimiter) { + return lines.map(line => line.split(delimiter)); +}; + +/** + * Extract a single column of data from a list of rows. If the column number is zero, the original lines read directly + * from the file are returned. + * @param {number} column - Column index, 1-indexed. + * @param {Array.} rows - The rows of data to use. + * @param {Array.} lines - The original lines read from a file. + * @returns {Array.} Array of items extracted. + */ +const extractColumnFromRows = function (column, rows, lines) { + if (column === 0) { + return lines; + } + return rows.map(cells => (column <= cells.length ? cells[column - 1] : '')); +}; + +/** + * Show a prompt for choosing the column number to retrieve when parsing multi-column data, then validate the value + * entered. + * @param {number} numberOfColumns - Number of columns, defining the valid range of column numbers (1 -> N inclusive). + * @param {function} onChooseColumn - Function to actually show the prompt for getting the single column index + * (1-indexed), for multicolumn data. + * @returns {Promise} Promise that resolves with the number chosen, validated. This is zero if the number is invalid. + */ +const promptColumnNumber = function (numberOfColumns, onChooseColumn) { + if (numberOfColumns > 1) { + return Promise.resolve(onChooseColumn(numberOfColumns)).then(columnInput => { + const column = parseInt(columnInput, 10); + if (isNaN(column) || (column < 1) || (column > numberOfColumns)) { + return 0; } + return column; }); + } + return Promise.resolve(numberOfColumns); +}; + +/** + * Remove empty (falsey) items from given array. Mutates the passed array. + * @param {Array.} lines - The lines of data to use. + * @returns {Array.} The same array, with trailing empty items removed. + */ +const removeTrailingEmptyLines = function (lines) { + while (lines.length && !lines[lines.length - 1]) { + lines.pop(); + } + return lines; +}; + +/** + * Parse "CSV" data from an array of lines in a manner compatible with files exported from Scratch 2.0. + * If passed data is multicolumn, a single column is returned. + * @param {Array.} lines - Array of lines to parse. + * @param {function} onChooseColumn - Function to get the single column index (1-indexed), for multicolumn data. + * @returns {Promise} Array of items returned from parsing. + */ +export const parseTxt = function (lines, onChooseColumn) { + const delimiter = guessDelimiter(lines); + + if (delimiter === null) { + return Promise.resolve(lines); + } + + const rows = splitLinesIntoRows(lines, delimiter); + const numberOfColumns = rows[0].length; + + return promptColumnNumber(numberOfColumns, onChooseColumn) + .then(column => extractColumnFromRows(column, rows, lines)); +}; + +/** + * Use Papaparse to parse CSV data from a string. If passed data is multicolumn, a single column is returned. + * @param {string} lines - Array of CSV-formatted lines from which to read data. + * @param {function} onChooseColumn - Function to get the single column index (1-indexed), for multicolumn data. + * @returns {Promise} Array of items returned from parsing. + */ +export const parseCsv = function (lines, onChooseColumn) { + const text = lines.join('\n'); + const {data} = Papa.parse(text, {header: false}); + const numberOfColumns = data[0].length; + return promptColumnNumber(numberOfColumns, onChooseColumn) + .then(column => extractColumnFromRows(column, data, lines)); +}; + +/** + * Use FileReader to read the text contents of a file. + * @param {File} file - The file to read. + * @returns {Promise} Resolves to a string - the contents of the file. + */ +const readFile = function (file) { + return new Promise((resolve, reject) => { + const fileReader = new FileReader(); + fileReader.onload = function () { + resolve(fileReader.result); + }; + fileReader.onerror = reject; + fileReader.readAsText(file); + }); +}; + +/** + * Parse CSV (or similar) data from a file selected by the user. + * @param {function} onChooseColumn - Function to get the single column index (1-indexed), for multicolumn data. + * @returns {Promise} Array of items returned from parsing. + */ +export default function parseFromFile (onChooseColumn) { + const fileInput = document.createElement('input'); + + const removeInput = data => { + document.body.removeChild(fileInput); + return data; }; - document.body.appendChild(fileInput); - fileInput.click(); -}); + + return new Promise(resolve => { + fileInput.setAttribute('type', 'file'); + fileInput.setAttribute('accept', '.csv, .tsv, .txt'); // parser auto-detects delimiter + fileInput.onchange = e => { + const file = e.target.files[0]; + readFile(file).then(text => { + const lines = removeTrailingEmptyLines(text.split(/\r\n|[\r\n]/)); + if (file.name.split('.').pop() === 'txt') { + resolve(parseTxt(lines, onChooseColumn)); + } else { + resolve(parseCsv(lines, onChooseColumn)); + } + }); + }; + + document.body.appendChild(fileInput); + fileInput.click(); + }).then(removeInput, removeInput); // Always remove the file input before resolving or rejecting. +} diff --git a/test/unit/util/import-csv.test.js b/test/unit/util/import-csv.test.js new file mode 100644 index 00000000000..6cb58ae75a9 --- /dev/null +++ b/test/unit/util/import-csv.test.js @@ -0,0 +1,276 @@ +import {parseTxt} from '../../../src/lib/import-csv'; + +describe('parseTxt', () => { + test('returns single-column data unmodified', () => { + const lines = [ + 'a', + 'b', + 'c' + ]; + + const result = lines; + + const onChooseColumn = jest.fn(); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).not.toHaveBeenCalled(); + }); + }); + + test('calls onChooseColumn once to determine column with multi-column data', () => { + const lines = [ + 'a,x', + 'b,y', + 'c,z' + ]; + + const resultColumn1 = [ + 'a', + 'b', + 'c' + ]; + + const resultColumn2 = [ + 'x', + 'y', + 'z' + ]; + + const onChooseColumn1 = jest.fn().mockReturnValue(1); + const onChooseColumn2 = jest.fn().mockReturnValue(2); + + return Promise.all([ + expect(parseTxt(lines, onChooseColumn1)).resolves.toMatchObject(resultColumn1), + expect(parseTxt(lines, onChooseColumn2)).resolves.toMatchObject(resultColumn2) + ]).then(() => { + expect(onChooseColumn1.mock.calls.length).toBe(1); + expect(onChooseColumn2.mock.calls.length).toBe(1); + }); + }); + + test('does not do anything with quotation marks', () => { + const lines = [ + '"', + 'a', + '"', + 'b' + ]; + + const result = lines; + + const onChooseColumn = jest.fn(); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).not.toHaveBeenCalled(); + }); + }); + + test('accepts \\t as delimiter', () => { + const lines = [ + 'a\tx', + 'b\ty', + 'c\tz' + ]; + + const result = [ + 'x', + 'y', + 'z' + ]; + + const onChooseColumn = jest.fn().mockReturnValue(2); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).toHaveBeenCalled(); + }); + }); + + test('does not do anything with commas if determined to be single-column data', () => { + const lines = [ + 'a', + 'b,anana', + 'c', + ',' + ]; + + const result = lines; + + const onChooseColumn = jest.fn(); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).not.toHaveBeenCalled(); + }); + }); + + test('treats data as single-column if a consistent delimiter cannot be found', () => { + const lines = [ + 'a,1', + 'b\t2' + ]; + + const result = lines; + + const onChooseColumn = jest.fn(); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).not.toHaveBeenCalled(); + }); + }); + + test('treats data as single-column if a consistent column count cannot be found', () => { + const lines = [ + 'a,1', + 'b,c,d,e,f,g,h' + ]; + + const result = lines; + + const onChooseColumn = jest.fn(); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).not.toHaveBeenCalled(); + }); + }); + + test('checks first, middle (rounded down), and last line in guessing delimiter', () => { + // Length of the list is 5; 5/2 = 2.5, round down to check index 2 (so the third line). + + const lines = [ + '1,a', + '2\tb', + '3,c', + '4\td', + '5,e' + ]; + + const result = [ + '1', + '2\tb', + '3', + '4\td', + '5' + ]; + + const onChooseColumn = jest.fn().mockReturnValue(1); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).toHaveBeenCalled(); + }); + }); + + test('passes onChooseColumn 1-indexed number of columns', () => { + const lines = [ + 'a,b,c,d', + 'w,x,y,z' + ]; + + // We don't care about parse's returned value in this test. + + const onChooseColumn = jest.fn().mockReturnValue(0); + + return ( + parseTxt(lines, onChooseColumn) + ).then(() => { + expect(onChooseColumn).toHaveBeenCalledWith(4); + }); + }); + + test('calls parseInt on return value of onChooseColumn', () => { + const lines = [ + 'a,1', + 'b,2' + ]; + + const result = [ + 'a', + 'b' + ]; + + const onChooseColumn = jest.fn().mockReturnValue('1.8'); + + return expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result); + }); + + test('returns data unmodified when onChooseColumn returns out-of-bounds or NaN', () => { + const lines = [ + 'a,1', + 'b,2' + ]; + + const result = [ + 'a,1', + 'b,2' + ]; + + const onChooseColumn0 = jest.fn().mockReturnValue(0); + const onChooseColumn3 = jest.fn().mockReturnValue(3); + const onChooseColumnNaN = jest.fn().mockReturnValue('unicorn'); + + return Promise.all([ + expect(parseTxt(lines, onChooseColumn0)).resolves.toMatchObject(result), + expect(parseTxt(lines, onChooseColumn3)).resolves.toMatchObject(result), + expect(parseTxt(lines, onChooseColumnNaN)).resolves.toMatchObject(result) + ]); + }); + + test('reads first/middle/last line to determine number of columns', () => { + // Note: The code checks the first line, which is guaranteed to have + // the same number of columns as the middle and last. + + const lines = [ + 'a,b', + '1,2,3,4', + 'c,d', + 'e,f' + ]; + + const result = [ + 'b', + '2', + 'd', + 'f' + ]; + + const onChooseColumn = jest.fn().mockReturnValue(2); + + return ( + expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result) + ).then(() => { + expect(onChooseColumn).toHaveBeenCalledWith(2); + }); + }); + + test('uses empty string in place of missing columns', () => { + const lines = [ + 'a,b', + 'x', + 'c,d', + 'e,f' + ]; + + const result = [ + 'b', + '', + 'd', + 'f' + ]; + + const onChooseColumn = jest.fn().mockReturnValue(2); + + return expect(parseTxt(lines, onChooseColumn)).resolves.toMatchObject(result); + }); +});