-
-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(csv): add coercions, restructure
- Loading branch information
1 parent
2b07100
commit 93d79ec
Showing
4 changed files
with
163 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import { maybeParseFloat, maybeParseInt } from "@thi.ng/strings"; | ||
import { CoercionFn } from "./api"; | ||
|
||
export const float = (defaultVal = 0): CoercionFn => (x) => | ||
maybeParseFloat(x, defaultVal); | ||
|
||
export const int = (defaultVal = 0): CoercionFn => (x) => | ||
maybeParseInt(x, defaultVal, 10); | ||
|
||
export const hex = (defaultVal = 0): CoercionFn => (x) => | ||
maybeParseInt(x, defaultVal, 16); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,3 @@ | ||
import { isIterable } from "@thi.ng/checks"; | ||
import { ESCAPES, split } from "@thi.ng/strings"; | ||
import { compR, iterator1, Reducer, Transducer } from "@thi.ng/transducers"; | ||
import type { ColumnSpec, CSVOpts, CSVRow } from "./api"; | ||
|
||
export function parseCSV(opts?: Partial<CSVOpts>): Transducer<string, CSVRow>; | ||
export function parseCSV( | ||
opts: Partial<CSVOpts>, | ||
src: Iterable<string> | ||
): IterableIterator<CSVRow>; | ||
export function parseCSV(opts?: Partial<CSVOpts>, src?: Iterable<string>): any { | ||
return isIterable(src) | ||
? iterator1(parseCSV(opts), src) | ||
: (rfn: Reducer<any, CSVRow>) => { | ||
const { all, cols, delim, quote, comment, trim, header } = { | ||
all: true, | ||
delim: ",", | ||
quote: '"', | ||
comment: "#", | ||
trim: false, | ||
...opts, | ||
}; | ||
const reduce = rfn[2]; | ||
let index: Record<string, number>; | ||
let revIndex: Record<number, string>; | ||
let first = true; | ||
let isQuoted = false; | ||
let record: string[] = []; | ||
if (header) { | ||
cols && (index = initIndex(header, cols)); | ||
all && (revIndex = initRevIndex(header)); | ||
first = false; | ||
} | ||
return compR(rfn, (acc, line: string) => { | ||
if ((!line.length || line.startsWith(comment)) && !isQuoted) | ||
return acc; | ||
if (!first) { | ||
isQuoted = tokenizeLine( | ||
line, | ||
record, | ||
isQuoted, | ||
delim, | ||
quote | ||
); | ||
if (isQuoted) return acc; | ||
const row: CSVRow = {}; | ||
all && | ||
record.reduce( | ||
(acc, x, i) => ( | ||
(acc[revIndex[i]] = trim ? x.trim() : x), acc | ||
), | ||
row | ||
); | ||
cols && | ||
Object.entries(cols).reduce((acc, [id, spec]) => { | ||
let val = record[index[id]]; | ||
trim && (val = val.trim()); | ||
acc[spec.alias || id] = spec.coerce | ||
? spec.coerce(val, acc) | ||
: val; | ||
return acc; | ||
}, row); | ||
record = []; | ||
return reduce(acc, row); | ||
} else { | ||
const names = line.split(delim); | ||
cols && (index = initIndex(names, cols)); | ||
all && (revIndex = initRevIndex(names)); | ||
first = false; | ||
return acc; | ||
} | ||
}); | ||
}; | ||
} | ||
|
||
export const parseCSVString = (opts: Partial<CSVOpts>, src: string) => | ||
parseCSV(opts, split(src)); | ||
|
||
export const tokenizeLine = ( | ||
line: string, | ||
record: string[], | ||
isQuoted: boolean, | ||
delim: string, | ||
quote: string | ||
) => { | ||
let curr = ""; | ||
let p = ""; | ||
let openQuote = isQuoted; | ||
for (let i = 0, n = line.length; i < n; i++) { | ||
const c = line[i]; | ||
// escaped char | ||
if (p === "\\") { | ||
curr += ESCAPES[c] || c; | ||
} | ||
// quote open/close & CSV escape pair (aka `""`) | ||
else if (c === quote) { | ||
if (!isQuoted) { | ||
p = ""; | ||
isQuoted = true; | ||
continue; | ||
} else if (p === quote) { | ||
curr += quote; | ||
p = ""; | ||
continue; | ||
} else if (line[i + 1] !== quote) isQuoted = false; | ||
} | ||
// field delimiter | ||
else if (!isQuoted && c === delim) { | ||
if (openQuote) record[record.length - 1] += "\n" + curr; | ||
else record.push(curr); | ||
openQuote = false; | ||
curr = ""; | ||
} | ||
// record unless escape seq start | ||
else if (c !== "\\") { | ||
curr += c; | ||
} | ||
p = c; | ||
} | ||
if (curr !== "") { | ||
if (openQuote) record[record.length - 1] += "\n" + curr; | ||
else record.push(curr); | ||
} | ||
return isQuoted; | ||
}; | ||
|
||
const initIndex = (line: string[], cols: Record<string, ColumnSpec>) => | ||
line.reduce( | ||
(acc, id, i) => (cols![id] ? ((acc[id] = i), acc) : acc), | ||
<Record<string, number>>{} | ||
); | ||
|
||
const initRevIndex = (line: string[]) => | ||
line.reduce((acc, x, i) => ((acc[i] = x), acc), <Record<number, string>>{}); | ||
export * from "./api"; | ||
export * from "./coerce"; | ||
export * from "./parse"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import { isIterable } from "@thi.ng/checks"; | ||
import { ESCAPES, split } from "@thi.ng/strings"; | ||
import { compR, iterator1, Reducer, Transducer } from "@thi.ng/transducers"; | ||
import type { ColumnSpec, CSVOpts, CSVRow } from "./api"; | ||
|
||
export function parseCSV(opts?: Partial<CSVOpts>): Transducer<string, CSVRow>; | ||
export function parseCSV( | ||
opts: Partial<CSVOpts>, | ||
src: Iterable<string> | ||
): IterableIterator<CSVRow>; | ||
export function parseCSV(opts?: Partial<CSVOpts>, src?: Iterable<string>): any { | ||
return isIterable(src) | ||
? iterator1(parseCSV(opts), src) | ||
: (rfn: Reducer<any, CSVRow>) => { | ||
const { all, cols, delim, quote, comment, trim, header } = { | ||
all: true, | ||
delim: ",", | ||
quote: '"', | ||
comment: "#", | ||
trim: false, | ||
...opts, | ||
}; | ||
const reduce = rfn[2]; | ||
let index: Record<string, number>; | ||
let revIndex: Record<number, string>; | ||
let first = true; | ||
let isQuoted = false; | ||
let record: string[] = []; | ||
if (header) { | ||
cols && (index = initIndex(header, cols)); | ||
all && (revIndex = initRevIndex(header)); | ||
first = false; | ||
} | ||
return compR(rfn, (acc, line: string) => { | ||
if ((!line.length || line.startsWith(comment)) && !isQuoted) | ||
return acc; | ||
if (!first) { | ||
isQuoted = tokenizeLine( | ||
line, | ||
record, | ||
isQuoted, | ||
delim, | ||
quote | ||
); | ||
if (isQuoted) return acc; | ||
const row: CSVRow = {}; | ||
all && | ||
record.reduce( | ||
(acc, x, i) => ( | ||
(acc[revIndex[i]] = trim ? x.trim() : x), acc | ||
), | ||
row | ||
); | ||
cols && | ||
Object.entries(cols).reduce((acc, [id, spec]) => { | ||
let val = record[index[id]]; | ||
trim && (val = val.trim()); | ||
acc[spec.alias || id] = spec.coerce | ||
? spec.coerce(val, acc) | ||
: val; | ||
return acc; | ||
}, row); | ||
record = []; | ||
return reduce(acc, row); | ||
} else { | ||
const names = line.split(delim); | ||
cols && (index = initIndex(names, cols)); | ||
all && (revIndex = initRevIndex(names)); | ||
first = false; | ||
return acc; | ||
} | ||
}); | ||
}; | ||
} | ||
|
||
export const parseCSVString = (opts: Partial<CSVOpts>, src: string) => | ||
parseCSV(opts, split(src)); | ||
|
||
/** | ||
* @param line | ||
* @param acc | ||
* @param isQuoted | ||
* @param delim | ||
* @param quote | ||
* | ||
* @internal | ||
*/ | ||
export const tokenizeLine = ( | ||
line: string, | ||
acc: string[], | ||
isQuoted: boolean, | ||
delim: string, | ||
quote: string | ||
) => { | ||
let curr = ""; | ||
let p = ""; | ||
let openQuote = isQuoted; | ||
for (let i = 0, n = line.length; i < n; i++) { | ||
const c = line[i]; | ||
// escaped char | ||
if (p === "\\") { | ||
curr += ESCAPES[c] || c; | ||
} | ||
// quote open/close & CSV escape pair (aka `""`) | ||
else if (c === quote) { | ||
if (!isQuoted) { | ||
p = ""; | ||
isQuoted = true; | ||
continue; | ||
} else if (p === quote) { | ||
curr += quote; | ||
p = ""; | ||
continue; | ||
} else if (line[i + 1] !== quote) isQuoted = false; | ||
} | ||
// field delimiter | ||
else if (!isQuoted && c === delim) { | ||
if (openQuote) acc[acc.length - 1] += "\n" + curr; | ||
else acc.push(curr); | ||
openQuote = false; | ||
curr = ""; | ||
} | ||
// record unless escape seq start | ||
else if (c !== "\\") { | ||
curr += c; | ||
} | ||
p = c; | ||
} | ||
if (curr !== "") { | ||
if (openQuote) acc[acc.length - 1] += "\n" + curr; | ||
else acc.push(curr); | ||
} | ||
return isQuoted; | ||
}; | ||
|
||
const initIndex = (line: string[], cols: Record<string, ColumnSpec>) => | ||
line.reduce( | ||
(acc, id, i) => (cols![id] ? ((acc[id] = i), acc) : acc), | ||
<Record<string, number>>{} | ||
); | ||
|
||
const initRevIndex = (line: string[]) => | ||
line.reduce((acc, x, i) => ((acc[i] = x), acc), <Record<number, string>>{}); |