generated from neoncitylights/typescript
/
walkTable.ts
93 lines (76 loc) · 2.95 KB
/
walkTable.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
export type TableRow = { [ key: string ]: string };
export type TableBodies = TableRow[][];
/**
* From the **WHATWG HTML Living Standard**, "*a table model error is an error
* with the data represented by table elements and their descendants.
* Documents must not have table model errors*".
* @see [WHATWG HTML Living Standard § 'Table model error'](https://html.spec.whatwg.org/multipage/tables.html#table-model-error)
*/
export class TableModelError extends Error {}
/**
* Traverses an HTML table and converts it to machine-readable data.
* It is able to process multiple `thead` and `tbody` elements.
*/
export function walkTable(table: HTMLTableElement): TableBodies {
return collectTableBodies(table, collectProperties(table));
}
/**
* Attempts to collect the properties of the table through 2 methods:
* - Checks if a `<thead>` element exists, and uses the text content of each
* `<th>` elements as the property name.
* - Otherwise, it loops through the first `<tr>` element and uses the text
* content of each `<th>` element as the property name.
*/
export function collectProperties(table: HTMLTableElement): string[] {
const properties: string[] = [];
const tableHeaders = table.tHead?.getElementsByTagName('th');
if(tableHeaders !== undefined) {
Array.from(tableHeaders).forEach(tableHeader => {
properties.push(tableHeader.innerText);
});
return properties;
}
// retrieve all cells in the first row of the table body,
// but first check if there's actually content inside the
// table body
const tableBodies = table.tBodies;
assertHtmlCollectionHasChildren(tableBodies, 'No table body found');
const tableRows = tableBodies[0].rows;
assertHtmlCollectionHasChildren(tableRows, 'No table cells found');
const firstRowCells = tableRows[0].cells;
assertHtmlCollectionHasChildren(firstRowCells, 'No table cells found in first row');
// the table header cells actually exist, time to collect them
for(const cell of firstRowCells) {
properties.push(cell.innerText);
}
return properties;
}
function assertHtmlCollectionHasChildren<T extends HTMLElement>(collection: HTMLCollectionOf<T>, message: string): void {
if(collection.length === 0) {
throw new TableModelError('Cannot collect properties: ' + message);
}
}
export function collectTableBodies(table: HTMLTableElement, properties: string[]): TableBodies {
const tableBodies: TableBodies = [];
Array.from(table.tBodies).forEach(tableBody => {
tableBodies.push(collectTableRows(tableBody, properties));
});
return tableBodies;
}
export function collectTableRows(
tableBody: HTMLTableSectionElement,
properties: string[],
): TableRow[] {
const tableRows: TableRow[] = [];
Array.from(tableBody.rows).forEach(tableRow => {
const tableCells = tableRow.cells;
const row: TableRow = {};
for (let i = 0; i < tableCells.length; i++) {
const prop = properties[i];
const cell = tableCells[i];
row[`${prop}`] = cell.innerText;
}
tableRows.push(row);
});
return tableRows;
}