diff --git a/README.md b/README.md index 4f9799c..045b320 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,74 @@ Attempts to convert HTML tables into JSON. -Can be passed the markup for a single table as a string, a fragment of HTML or an entire page or just a URL (with an optional callback function; promises also supported). +Can be passed the markup for a single table as a string, a fragment of HTML or an entire page or just +a URL (with an optional callback function; promises also supported). -The response is always an array. Every array entry in the response represents a table found on the page (in same the order they were found in the HTML). +The response is always an array. Every array entry in the response represents a table found on the page +(in same the order they were found in the HTML). -## Options +## Basic usage + +Install via npm + +``` +npm install tabletojson +``` + +### Remote (`convertUrl`) + +```javascript +'use strict'; + +const tabletojson = require('tabletojson'); + +tabletojson.convertUrl( + 'https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes', + function(tablesAsJson) { + console.log(tablesAsJson[1]); + } +); + +``` + +### Local (`convert`) +Have a look in the examples. + +```javascript +// example-6.js +'use strict'; + +const tabletojson = require('../lib/tabletojson'); +const fs = require('fs'); +const path = require('path'); + +const html = fs.readFileSync(path.resolve(__dirname, '../test/tables.html'), {encoding: 'UTF-8'}); +const converted = tabletojson.convert(html); + +console.log(converted); +``` + +### Duplicate column headings + +If there are duplicate column headings, subsequent headings are suffixed with a count: + +``` +// Table +| PLACE | VALUE | PLACE | VALUE | +| abc | 1 | def | 2 | + +// Example output +[{ + PLACE: 'abc', VALUE: '1', + PLACE_2: 'def', VALUE_2: '2', +}] +``` ### Tables with headings in the first column -If a table contains headings in the first column you might get an unexpected result, but you can pass a second argument with options with `{ useFirstRowForHeadings: true }` to have it treat the first column as it would any other cell. +If a table contains headings in the first column you might get an unexpected result, but you can pass a +second argument with options with `{ useFirstRowForHeadings: true }` to have it treat the first column +as it would any other cell. ``` javascript tabletojson.convertUrl( @@ -31,12 +90,14 @@ tabletojson.convertUrl( ### Tables with HTML -The following options are true by default, which converts all values to plain text to give you an easier more readable object to work with: +The following options are true by default, which converts all values to plain text to give you an easier +more readable object to work with: * stripHtmlFromHeadings * stripHtmlFromCells -If your table contains HTML you want to parse (for example for links) you can set `stripHtmlFromCells` to `false` to treat it as raw text. +If your table contains HTML you want to parse (for example for links) you can set `stripHtmlFromCells` +to `false` to treat it as raw text. ``` javascript tabletojson.convertUrl( @@ -51,24 +112,89 @@ tabletojson.convertUrl( Note: This doesn't work with nested tables, which it will still try to parse. -You probably don't need to set `stripHtmlFromHeadings` to false (and setting it to false can make the results hard to parse), but if you do you can also set both at the same time by setting `stripHtml` to false. +You probably don't need to set `stripHtmlFromHeadings` to false (and setting it to false can make the +results hard to parse), but if you do you can also set both at the same time by setting `stripHtml` to +false. -### Duplicate column headings -If there are duplicate column headings, subsequent headings are suffixed with a count: +## Options + +### request (only `convertUrl`) +If you need to get data from a remote server to pass it to the parser you can call `tabletojson.convertUrl`. +When working behind a proxy you can pass any request-options (proxy, headers,...) by adding a request +object to the options passed to `convertUrl`. +for more information on how to configure request please have a look at: [https://github.com/request/request](https://github.com/request/request) + +``` javascript +tabletojson.convertUrl('https://www.timeanddate.com/holidays/ireland/2017', { + useFirstRowForHeadings: true, + request: { + proxy: 'http://proxy:8080' + } +}); +``` + +### stripHtmlFromHeadings +Strip any HTML from heading cells. Default is true. ``` // Table -| PLACE | VALUE | PLACE | VALUE | -| abc | 1 | def | 2 | +| KEY | VALUE | +| abc | 1 | +| dev | 2 | + +// Example output with stripHtmlFromHeadings:true +[ + { + KEY: 'abc', VALUE: '1' + }, + { + KEY: 'dev', VALUE: '2' + } +] +// Example output with stripHtmlFromHeadings:false +[ + { + KEY: 'abc', 'VALUE': '1' + }, + { + KEY: 'dev', 'VALUE': '2' + } +] +``` + +### stripHtmlFromCells + +Strip any HTML from tableBody cells. Default is true. -// Example output -[{ - PLACE: 'abc', VALUE: '1', - PLACE_2: 'def', VALUE_2: '2', -}] ``` -### Options forceIndexAsNumber +// Table +| KEY | VALUE | +| abc | 1 | +| dev | 2 | + +// Example output with stripHtmlFromHeadings:true +[ + { + KEY: 'abc', VALUE: '1' + }, + { + KEY: 'dev', VALUE: '2' + } +] +// Example output with stripHtmlFromHeadings:false +[ + { + KEY: 'abc', 'VALUE': '1' + }, + { + KEY: 'dev', 'VALUE': '2' + } +] +``` + + +### forceIndexAsNumber Instead of using column text (that sometime re-order the data), force an index as a number (string number). ``` javascript @@ -85,15 +211,196 @@ Instead of using column text (that sometime re-order the data), force an index a // Some JSON (Other rows) ``` -## Options, known issues and limitations +### countDuplicateHeadings +Default is 'true'. If set to 'false' duplicate headings will not get a trailing _. The value of +the field will be the last value found in the table row: -This module only supports parsing basic tables with a simple horizontal set of headings and corresponding cells. +``` +// Table +| PLACE | VALUE | PLACE | VALUE | +| abc | 1 | def | 2 | +| ghi | 3 | jkl | 4 | + +// Example output with countDuplicateHeadings:false +[ + { + PLACE: 'def', VALUE: '2' + }, + { + PLACE: 'jkl', VALUE: '4' + } +] +``` -It can give useless or weird results on tables that have complex structures (such as nested tables) or multiple headers (such as on both X and Y axis). +### ignoreColumns +Array of indexes to be ignored, starting with 0. Default is 'null/undefined'. -You'll need to handle things like work out which tables to parse and (in most cases) clean up the data. You might want to combine it it with modules like json2csv or CsvToMarkdownTable. +``` +// Table +| NAME | PLACE | WEIGHT | SEX | AGE | +| Mel | 1 | 58 | W | 23 | +| Tom | 2 | 78 | M | 54 | +| Bill | 3 | 92 | M | 31 | + +// Example output with ignoreColumns: [2, 3] +[ + { + NAME: 'Mel', PLACE: '1', AGE: '23' + }, + { + NAME: 'Tom', PLACE: '2', AGE: '54' + }, + { + NAME: 'Bill', PLACE: '3', AGE: '31' + } +] +``` -You might want to use it with a module like 'cheerio' if you want to parse specific tables identified by id or class (i.e. select them with cheerio and pass the HTML of them as a string). +### onlyColumns +Array of indexes that are taken, starting with 0. Default is 'null/undefined'. +If given, this option overrides ignoreColumns. + +``` +// Table +| NAME | PLACE | WEIGHT | SEX | AGE | +| Mel | 1 | 58 | W | 23 | +| Tom | 2 | 78 | M | 54 | +| Bill | 3 | 92 | M | 31 | + +// Example output with onlyColumns: [0, 4] +[ + { + NAME: 'Mel', AGE: '23' + }, + { + NAME: 'Tom', AGE: '54' + }, + { + NAME: 'Bill', AGE: '31' + } +] +``` + +### ignoreHiddenRows +Indicates if hidden rows (display:none) are ignored. Default is true: + +``` +// Table + | NAME | PLACE | WEIGHT | SEX | AGE | + | Mel | 1 | 58 | W | 23 | + | Tom | 2 | 78 | M | 54 | + | Bill | 3 | 92 | M | 31 | +*| Cat | 4 | 4 | W | 2 |* + +// Example output with ignoreHiddenRows:true +[ + { + NAME: 'Mel', PLACE: '1', WEIGHT: '58', SEX: 'W', AGE: '23' + }, + { + NAME: 'Tom', PLACE: '2', WEIGHT: '78', SEX: 'M', AGE: '54' + }, + { + NAME: 'Bill', PLACE: '3', WEIGHT: '92', SEX: 'M', AGE: '31' + } +] +// Example output with ignoreHiddenRows:false +[ + { + NAME: 'Mel', PLACE: '1', WEIGHT: '58', SEX: 'W', AGE: '23' + }, + { + NAME: 'Tom', PLACE: '2', WEIGHT: '78', SEX: 'M', AGE: '54' + }, + { + NAME: 'Bill', PLACE: '3', WEIGHT: '92', SEX: 'M', AGE: '31' + } + }, + { + NAME: 'Cat', PLACE: '4', WEIGHT: '4', SEX: 'W', AGE: '2' + } +] +``` + +### headings +Array of Strings to be used as headings. Default is 'null/undefined'. + +If more headings are given than columns exist the overcounting ones will be ignored. If less headings +are given than existing values the overcounting values are ignored. + +``` +// Table + | NAME | PLACE | WEIGHT | SEX | AGE | + | Mel | 1 | 58 | W | 23 | + | Tom | 2 | 78 | M | 54 | + | Bill | 3 | 92 | M | 31 | +*| Cat | 4 | 4 | W | 2 |* + + +// Example output with headings: ['A','B','C','D','E'] +[ + { + A: 'Mel', B: '1', C: '58', D: 'W', E: '23' + }, + { + A: 'Tom', B: '2', C: '78', D: 'M', E: '54' + }, + { + A: 'Bill', B: '3', C: '92', D: 'M', E: '31' + } +] +// Example output with headings: ['A','B','C'] +[ + { + A: 'Mel', B: '1', C: '58' + }, + { + A: 'Tom', B: '2', C: '78' + }, + { + A: 'Bill', B: '3', C: '92' + } +] +// Example output with headings: ['A','B','C','D','E','F','G','H'] +[ + { + A: 'Mel', B: '1', C: '58', D: 'W', E: '23' + }, + { + A: 'Tom', B: '2', C: '78', D: 'M', E: '54' + }, + { + A: 'Bill', B: '3', C: '92', D: 'M', E: '31' + } +] +// Example output with headings: ['A','B','C'] && ignoreColumns: [2, 3] +[ + { + A: 'Mel', B: 'W', C: '23' + }, + { + A: 'Tom', B: 'M', C: '54' + }, + { + A: 'Bill', B: 'M', C: '31' + } +] + +``` + +## Known issues and limitations + +This module only supports parsing basic tables with a simple horizontal set of headings and +corresponding cells. + +It can give useless or weird results on tables that have complex structures (such as nested tables) or +multiple headers (such as on both X and Y axis). + +You'll need to handle things like work out which tables to parse and (in most cases) clean up the data. +You might want to combine it it with modules like json2csv or CsvToMarkdownTable. + +You might want to use it with a module like 'cheerio' if you want to parse specific tables identified +by id or class (i.e. select them with cheerio and pass the HTML of them as a string). ## Example usage @@ -157,7 +464,8 @@ tabletojson.convertUrl(url) # Issues -Right now the table needs to be "well formatted" to be convertable. Tables in Html pages with not be processed. +Right now the table needs to be "well formatted" to be convertable. Tables in Html pages with not be +processed. ```html @@ -188,8 +496,5 @@ June 2018 - Very special thanks to the originator of the library, Iain Collins ( grasping and mastering cheerio this lib would have not been where it is right now. Also I would personally like to say "Thank you" for your trust in passing me the ownership. @maugenst -Special thanks to Marius Augenstein (@maugenst) for the latest major update, which includes ES6 syntax, uses native -promises and has much improved code and inline documentation. - Additional thanks to @roryok, Max Thyen (@maxthyen), Thor Jacobsen (@twjacobsen) and Michael Keller (@mhkeller) for improvements and bug fixes. \ No newline at end of file diff --git a/examples/example-6.js b/examples/example-6.js new file mode 100644 index 0000000..7a99db1 --- /dev/null +++ b/examples/example-6.js @@ -0,0 +1,10 @@ +'use strict'; + +const tabletojson = require('../lib/tabletojson'); +const fs = require('fs'); +const path = require('path'); + +const html = fs.readFileSync(path.resolve(__dirname, '../test/tables.html'), {encoding: 'UTF-8'}); +const converted = tabletojson.convert(html); + +console.log(converted); \ No newline at end of file diff --git a/lib/tabletojson.js b/lib/tabletojson.js index b598244..4d0ceb7 100644 --- a/lib/tabletojson.js +++ b/lib/tabletojson.js @@ -13,6 +13,11 @@ class tabletojson { * @param options.stripHtmlFromCells Strip HTML from cells [default=true] * @param options.stripHtml Strip off HTML [default=null] if set true stripHtmlFromHeadings and stripHtmlFromCells will also be true * @param options.forceIndexAsNumber Force the index to be used as number [default=false] + * @param options.countDuplicateHeadings If given a _ will be added to the duplicate key [default=false] + * @param options.ignoreColumns Array of column indices to ignored [default=null] + * @param options.onlyColumns Array of column indices to be used. Overrides ignoreColumn [default=null] + * @param options.ignoreHiddenRows Ignoring hidden rows [default=true] + * @param options.headings Array of Strings to be used as headings [default=null] * @return {Object} Converted Object as an object literal */ static convert(html, options) { @@ -23,7 +28,11 @@ class tabletojson { stripHtmlFromCells: true, stripHtml: null, forceIndexAsNumber: false, - countDuplicateHeadings: true + countDuplicateHeadings: true, + ignoreColumns: null, + onlyColumns: null, + ignoreHiddenRows: true, + headings: null }, options ); @@ -36,40 +45,48 @@ class tabletojson { options.stripHtmlFromCells = false; } - let jsonResponse = [], - alreadySeen = [], - suffix = undefined; + const jsonResponse = []; + let suffix = undefined; - let $ = cheerio.load(html); + const $ = cheerio.load(html); $('table').each(function(i, table) { - let tableAsJson = []; + const tableAsJson = []; const alreadySeen = {}; // Get column headings // @fixme Doesn't support vertical column headings. // @todo Try to support badly formated tables. - let columnHeadings = []; + const columnHeadings = []; let trs = $(table).find('tr'); if (options.useFirstRowForHeadings) trs = $(trs[0]); - + let headingsCounter = 0; + // Use headings for objects key evaluation trs.each(function(i, row) { $(row) .find('th') .each(function(j, cell) { - let value = options.stripHtmlFromHeadings - ? $(cell) - .text() - .trim() - : $(cell) - .html() - .trim(); + if (options.onlyColumns && !options.onlyColumns.includes(j)) return; + if (options.ignoreColumns && !options.onlyColumns && options.ignoreColumns.includes(j)) return; + let value = ''; + + if (options.headings) { + value = options.headings[headingsCounter++]; + } else { + value = options.stripHtmlFromHeadings + ? $(cell) + .text() + .trim() + : $(cell) + .html() + .trim(); + } - let seen = alreadySeen[value]; + const seen = alreadySeen[value]; if (seen && options.countDuplicateHeadings) { suffix = ++alreadySeen[value]; - columnHeadings[j] = (value !== '') ? value + '_' + suffix : j; + columnHeadings[j] = value !== '' ? value + '_' + suffix : j; } else { alreadySeen[value] = 1; columnHeadings[j] = value; @@ -81,11 +98,23 @@ class tabletojson { $(table) .find('tr') .each(function(i, row) { - let rowAsJson = {}; + const rowAsJson = {}; + + const cells = options.useFirstRowForHeadings ? $(row).find('td, th') : $(row).find('td'); + cells.each(function(j, cell) { + // ignoreHiddenRows + if (options.ignoreHiddenRows) { + const style = $(row).attr('style'); + if (style) { + const m = style.match(/.*display.*:.*none.*/g); + if (m && m.length > 0) return; + } + } + + if (options.onlyColumns && !options.onlyColumns.includes(j)) return; + if (options.ignoreColumns && !options.onlyColumns && options.ignoreColumns.includes(j)) return; - let rows = options.useFirstRowForHeadings ? $(row).find('td, th') : $(row).find('td'); - rows.each(function(j, cell) { - let content = options.stripHtmlFromCells + const content = options.stripHtmlFromCells ? $(cell) .text() .trim() @@ -101,11 +130,11 @@ class tabletojson { }); // Skip blank rows - if (JSON.stringify(rowAsJson) != '{}') tableAsJson.push(rowAsJson); + if (JSON.stringify(rowAsJson) !== '{}') tableAsJson.push(rowAsJson); }); // Add the table to the response - if (tableAsJson.length != 0) jsonResponse.push(tableAsJson); + if (tableAsJson.length !== 0) jsonResponse.push(tableAsJson); }); return jsonResponse; diff --git a/package.json b/package.json index 34ce1a7..b5c9aba 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ }, "name": "tabletojson", "description": "Converts HTML tables to JSON objects", - "version": "0.9.2", + "version": "0.9.3", "main": "./lib/tabletojson.js", "keywords": [ "table2json", diff --git a/test/tables.html b/test/tables.html index 1a5973d..6ccd0fd 100644 --- a/test/tables.html +++ b/test/tables.html @@ -6,7 +6,7 @@ -

With Heading

+

Table #1: With Heading

@@ -53,33 +53,40 @@

With Heading

+ + + + + + +
yes yes
HiddenWolf4nono
-

With heading and probably duplicated headings

+

Table #2: With heading and probably duplicated headings

- - - - - + + + + + - - - - - - - - - - + + + + + + + + + +
DogRaceAge
DogRaceAge
DonaldBobtail2
MaggyTBullterrier3
DonaldBobtail2
MaggyTBullterrier3
-

No explicit heading

+

Table #3: No explicit heading

@@ -113,33 +120,103 @@

No explicit heading

Dog
-

With heading and probably duplicated headings

+

Table #4: With heading and probably duplicated headings

- - - - - - - + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + + +
DogHeight
DogHeight
DonaldBobtail218042
MaggyTBullterrier315041
DonaldBobtail218042
MaggyTBullterrier315041
+ +

Table #5: Table conversion with countDuplicateHeadings:false

+ + + + + + + + + + + + + + + + + + + + + + + +
PLACEVALUEPLACEVALUE
abc1def2
ghi3jkl4
+ +

Table #6: Table for conversion using options 'ignoreColumns' and 'onlyColumns'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NAMEPLACEWEIGHTSEXAGE
Mel158W23
Tom278M54
Bill392M31
Cat44W2
+ \ No newline at end of file diff --git a/test/test-tabletojson-local.js b/test/test-tabletojson-local.js index 524ade7..ae83774 100644 --- a/test/test-tabletojson-local.js +++ b/test/test-tabletojson-local.js @@ -17,8 +17,8 @@ describe('TableToJSON Local', function() { it('Options: Strip HTML from header AND from body', async function() { const converted = await tabletojson.convert(html, { - stripHtmlFromHeadings: true, - stripHtmlFromCells: true + stripHtmlFromHeadings: true, + stripHtmlFromCells: true }); converted.should.be.ok(); @@ -30,7 +30,7 @@ describe('TableToJSON Local', function() { it('Options: Strip HTML from header AND from body using stripHtml-shortcut ', async function() { const converted = await tabletojson.convert(html, { - stripHtml: true + stripHtml: true }); converted.should.be.ok(); @@ -42,8 +42,8 @@ describe('TableToJSON Local', function() { it('Options: Strip HTML from header but not from body', async function() { const converted = await tabletojson.convert(html, { - stripHtmlFromHeadings: true, - stripHtmlFromCells: false + stripHtmlFromHeadings: true, + stripHtmlFromCells: false }); converted.should.be.ok(); @@ -55,8 +55,8 @@ describe('TableToJSON Local', function() { it('Options: Strip HTML from body but not from header', async function() { const converted = await tabletojson.convert(html, { - stripHtmlFromHeadings: false, - stripHtmlFromCells: true + stripHtmlFromHeadings: false, + stripHtmlFromCells: true }); converted.should.be.ok(); @@ -88,18 +88,6 @@ describe('TableToJSON Local', function() { _.has(firstTable[0], 'isDumb_2').should.be.true(); }); - it('Double Header Entry: do not count duplicate headings', async function() { - const converted = await tabletojson.convert(html, { - countDuplicateHeadings: false - }); - converted.should.be.ok(); - - const firstTable = converted[0]; - - _.has(firstTable[0], 'isDumb').should.be.true(); - _.has(firstTable[0], 'isDumb_2').should.be.false(); - }); - it('Directly local html content: Table with header', async function() { const converted = await tabletojson.convert(html); converted.should.be.ok(); @@ -117,11 +105,11 @@ describe('TableToJSON Local', function() { }); converted.should.be.ok(); - const firstTable = converted[0]; + const firstTable = converted[0]; - _.has(firstTable[0], 'Dog').should.be.true(); - _.has(firstTable[0], 'Race').should.be.true(); - _.has(firstTable[0], 'Age').should.be.true(); + _.has(firstTable[0], 'Dog').should.be.true(); + _.has(firstTable[0], 'Race').should.be.true(); + _.has(firstTable[0], 'Age').should.be.true(); }); it('Directly passing html content: Table without header', async function() { @@ -149,6 +137,346 @@ describe('TableToJSON Local', function() { _.has(forthTable[0], '2').should.be.true(); _.has(forthTable[0], 'Height').should.be.true(); _.has(forthTable[0], '4').should.be.true(); + }); + + // ADDED TO FIX: https://github.com/maugenst/tabletojson/pull/18 + it('Double Header Entry: countDuplicateHeadings:false', async function() { + const converted = await tabletojson.convert(html, { + countDuplicateHeadings: false + }); + converted.should.be.ok(); + + const table = converted[4]; + + _.has(table[0], 'PLACE').should.be.true(); + _.has(table[0], 'VALUE').should.be.true(); + _.has(table[0], 'PLACE_2').should.be.false(); + _.has(table[0], 'VALUE_2').should.be.false(); + _.has(table[1], 'PLACE').should.be.true(); + _.has(table[1], 'VALUE').should.be.true(); + _.has(table[1], 'PLACE_2').should.be.false(); + _.has(table[1], 'VALUE_2').should.be.false(); + + table[0].PLACE.should.be.equal('def'); + table[0].VALUE.should.be.equal('2'); + table[1].PLACE.should.be.equal('jkl'); + table[1].VALUE.should.be.equal('4'); + }); + + // ADDED TO FIX: https://github.com/maugenst/tabletojson/pull/18 + it('Double Header Entry: countDuplicateHeadings:true', async function() { + const converted = await tabletojson.convert(html, { + countDuplicateHeadings: true + }); + converted.should.be.ok(); + + const table = converted[4]; + + _.has(table[0], 'PLACE').should.be.true(); + _.has(table[0], 'VALUE').should.be.true(); + _.has(table[0], 'PLACE_2').should.be.true(); + _.has(table[0], 'VALUE_2').should.be.true(); + _.has(table[1], 'PLACE').should.be.true(); + _.has(table[1], 'VALUE').should.be.true(); + _.has(table[1], 'PLACE_2').should.be.true(); + _.has(table[1], 'VALUE_2').should.be.true(); + + table[0].PLACE.should.be.equal('abc'); + table[0].VALUE.should.be.equal('1'); + table[0].PLACE_2.should.be.equal('def'); + table[0].VALUE_2.should.be.equal('2'); + table[1].PLACE.should.be.equal('ghi'); + table[1].VALUE.should.be.equal('3'); + table[1].PLACE_2.should.be.equal('jkl'); + table[1].VALUE_2.should.be.equal('4'); + }); + + // FEATURE 'ignoreColumns' + it('Option: ignoreColumns: [2, 3]', async function() { + const converted = await tabletojson.convert(html, { + ignoreColumns: [2, 3] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'NAME').should.be.true(); + _.has(table[0], 'PLACE').should.be.true(); + _.has(table[0], 'WEIGHT').should.be.false(); + _.has(table[0], 'SEX').should.be.false(); + _.has(table[0], 'AGE').should.be.true(); + table[0].NAME.should.be.equal('Mel'); + table[0].PLACE.should.be.equal('1'); + table[0].AGE.should.be.equal('23'); + + _.has(table[1], 'NAME').should.be.true(); + _.has(table[1], 'PLACE').should.be.true(); + _.has(table[1], 'WEIGHT').should.be.false(); + _.has(table[1], 'SEX').should.be.false(); + _.has(table[1], 'AGE').should.be.true(); + table[1].NAME.should.be.equal('Tom'); + table[1].PLACE.should.be.equal('2'); + table[1].AGE.should.be.equal('54'); + + _.has(table[2], 'NAME').should.be.true(); + _.has(table[2], 'PLACE').should.be.true(); + _.has(table[2], 'WEIGHT').should.be.false(); + _.has(table[2], 'SEX').should.be.false(); + _.has(table[2], 'AGE').should.be.true(); + table[2].NAME.should.be.equal('Bill'); + table[2].PLACE.should.be.equal('3'); + table[2].AGE.should.be.equal('31'); + }); + + // FEATURE 'onlyColumns' + it('Option: onlyColumns: [0, 4]', async function() { + const converted = await tabletojson.convert(html, { + onlyColumns: [0, 4], + ignoreColumns: [2, 4] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'NAME').should.be.true(); + _.has(table[0], 'PLACE').should.be.false(); + _.has(table[0], 'WEIGHT').should.be.false(); + _.has(table[0], 'SEX').should.be.false(); + _.has(table[0], 'AGE').should.be.true(); + table[0].NAME.should.be.equal('Mel'); + table[0].AGE.should.be.equal('23'); + + _.has(table[1], 'NAME').should.be.true(); + _.has(table[1], 'PLACE').should.be.false(); + _.has(table[1], 'WEIGHT').should.be.false(); + _.has(table[1], 'SEX').should.be.false(); + _.has(table[1], 'AGE').should.be.true(); + table[1].NAME.should.be.equal('Tom'); + table[1].AGE.should.be.equal('54'); + + _.has(table[2], 'NAME').should.be.true(); + _.has(table[2], 'PLACE').should.be.false(); + _.has(table[2], 'WEIGHT').should.be.false(); + _.has(table[2], 'SEX').should.be.false(); + _.has(table[2], 'AGE').should.be.true(); + table[2].NAME.should.be.equal('Bill'); + table[2].AGE.should.be.equal('31'); + }); + + // FEATURE 'ignoreHiddenRows:true' + it('Option: ignoreHiddenRows:true', async function() { + const converted = await tabletojson.convert(html, { + ignoreHiddenRows: true + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'NAME').should.be.true(); + _.has(table[0], 'PLACE').should.be.true(); + _.has(table[0], 'WEIGHT').should.be.true(); + _.has(table[0], 'SEX').should.be.true(); + _.has(table[0], 'AGE').should.be.true(); + + table.length.should.be.equal(3); + }); + + // FEATURE 'ignoreHiddenRows:false' + it('Option: ignoreHiddenRows:false', async function() { + const converted = await tabletojson.convert(html, { + ignoreHiddenRows: false + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'NAME').should.be.true(); + _.has(table[0], 'PLACE').should.be.true(); + _.has(table[0], 'WEIGHT').should.be.true(); + _.has(table[0], 'SEX').should.be.true(); + _.has(table[0], 'AGE').should.be.true(); + + table.length.should.be.equal(4); + }); + + // FEATURE 'headings: ['A', 'B', 'C', 'D', 'E']' + it('Option: headings: ["A","B","C","D","E"]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A', 'B', 'C', 'D', 'E'] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.true(); + _.has(table[0], 'C').should.be.true(); + _.has(table[0], 'D').should.be.true(); + _.has(table[0], 'E').should.be.true(); + + table.length.should.be.equal(3); + }); + + // FEATURE 'headings: ['A', 'B', 'C']' + it('Option: headings: ["A","B","C"]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A', 'B', 'C'] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.true(); + _.has(table[0], 'C').should.be.true(); + _.has(table[0], 'D').should.be.false(); + _.has(table[0], 'E').should.be.false(); + + table.length.should.be.equal(3); + }); + + /** + * | NAME | PLACE | WEIGHT | SEX | AGE | + * | Mel | 1 | 58 | W | 23 | + * | Tom | 2 | 78 | M | 54 | + * | Bill | 3 | 92 | M | 31 | + */ + // FEATURE 'headings: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']' + it('Option: headings: ["A","B","C","E","E","F","G","H","I"]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.true(); + _.has(table[0], 'C').should.be.true(); + _.has(table[0], 'D').should.be.true(); + _.has(table[0], 'E').should.be.true(); + + table.length.should.be.equal(3); + table[0].A.should.equal('Mel'); + table[0].B.should.equal('1'); + table[0].C.should.equal('58'); + table[0].D.should.equal('W'); + table[0].E.should.equal('23'); + + table[1].A.should.equal('Tom'); + table[1].B.should.equal('2'); + table[1].C.should.equal('78'); + table[1].D.should.equal('M'); + table[1].E.should.equal('54'); + + table[2].A.should.equal('Bill'); + table[2].B.should.equal('3'); + table[2].C.should.equal('92'); + table[2].D.should.equal('M'); + table[2].E.should.equal('31'); + }); + + /** + * | NAME | PLACE | WEIGHT | SEX | AGE | + * | Mel | 1 | 58 | W | 23 | + * | Tom | 2 | 78 | M | 54 | + * | Bill | 3 | 92 | M | 31 | + */ + // FEATURE 'headings: ['A', 'B', 'C'] && ignoreColumns: [1, 2]' + it('Option: headings: ["A","B","C"] && ignoreColumns: [1, 2]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A', 'B', 'C'], + ignoreColumns: [1, 2] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.true(); + _.has(table[0], 'C').should.be.true(); + _.has(table[0], 'D').should.be.false(); + _.has(table[0], 'E').should.be.false(); + + table.length.should.be.equal(3); + + table[0].A.should.equal('Mel'); + table[0].B.should.equal('W'); + table[0].C.should.equal('23'); + + table[1].A.should.equal('Tom'); + table[1].B.should.equal('M'); + table[1].C.should.equal('54'); + + table[2].A.should.equal('Bill'); + table[2].B.should.equal('M'); + table[2].C.should.equal('31'); + }); + + /** + * | NAME | PLACE | WEIGHT | SEX | AGE | + * | Mel | 1 | 58 | W | 23 | + * | Tom | 2 | 78 | M | 54 | + * | Bill | 3 | 92 | M | 31 | + */ + // FEATURE 'headings: ['A', 'B', 'C'] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]' + it('Option: headings: ["A","B","C"] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A', 'B', 'C'], + ignoreColumns: [1, 2], + onlyColumns: [0, 4] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.true(); + _.has(table[0], 'C').should.be.false(); + _.has(table[0], 'D').should.be.false(); + _.has(table[0], 'E').should.be.false(); + + table.length.should.be.equal(3); + + table[0].A.should.equal('Mel'); + table[0].B.should.equal('23'); + + table[1].A.should.equal('Tom'); + table[1].B.should.equal('54'); + + table[2].A.should.equal('Bill'); + table[2].B.should.equal('31'); + }); + + /** + * | NAME | PLACE | WEIGHT | SEX | AGE | + * | Mel | 1 | 58 | W | 23 | + * | Tom | 2 | 78 | M | 54 | + * | Bill | 3 | 92 | M | 31 | + */ + // FEATURE 'headings: ['A'] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]' + it('Option: headings: ["A"] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]', async function() { + const converted = await tabletojson.convert(html, { + headings: ['A'], + ignoreColumns: [1, 2], + onlyColumns: [0, 4] + }); + converted.should.be.ok(); + + const table = converted[5]; + + _.has(table[0], 'A').should.be.true(); + _.has(table[0], 'B').should.be.false(); + _.has(table[0], 'C').should.be.false(); + _.has(table[0], 'D').should.be.false(); + _.has(table[0], 'E').should.be.false(); + + table.length.should.be.equal(3); + + table[0].A.should.equal('Mel'); + + table[1].A.should.equal('Tom'); + table[2].A.should.equal('Bill'); }); });