Skip to content

Commit

Permalink
feat(csv): add coercions, restructure
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Nov 17, 2020
1 parent 2b07100 commit 93d79ec
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 136 deletions.
8 changes: 6 additions & 2 deletions packages/csv/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@ import type { Fn2 } from "@thi.ng/api";

export type CSVRow = Record<string, any>;

export type CoercionFn = Fn2<string, CSVRow, any>;

export interface ColumnSpec {
/**
* Rename column to given name in result objects.
*/
alias?: string;
/**
* Value transformer.
* Cell value transformer. This is a 2-arg function receiving string value
* and (incomplete) result object of current row. Return value is used as
* actual value for the cell.
*/
coerce?: Fn2<string, CSVRow, any>;
coerce?: CoercionFn;
}

export interface CSVOpts {
Expand Down
11 changes: 11 additions & 0 deletions packages/csv/src/coerce.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { maybeParseFloat, maybeParseInt } from "@thi.ng/strings";
import { CoercionFn } from "./api";

export const float = (defaultVal = 0): CoercionFn => (x) =>
maybeParseFloat(x, defaultVal);

export const int = (defaultVal = 0): CoercionFn => (x) =>
maybeParseInt(x, defaultVal, 10);

export const hex = (defaultVal = 0): CoercionFn => (x) =>
maybeParseInt(x, defaultVal, 16);
137 changes: 3 additions & 134 deletions packages/csv/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,134 +1,3 @@
import { isIterable } from "@thi.ng/checks";
import { ESCAPES, split } from "@thi.ng/strings";
import { compR, iterator1, Reducer, Transducer } from "@thi.ng/transducers";
import type { ColumnSpec, CSVOpts, CSVRow } from "./api";

export function parseCSV(opts?: Partial<CSVOpts>): Transducer<string, CSVRow>;
export function parseCSV(
opts: Partial<CSVOpts>,
src: Iterable<string>
): IterableIterator<CSVRow>;
export function parseCSV(opts?: Partial<CSVOpts>, src?: Iterable<string>): any {
return isIterable(src)
? iterator1(parseCSV(opts), src)
: (rfn: Reducer<any, CSVRow>) => {
const { all, cols, delim, quote, comment, trim, header } = {
all: true,
delim: ",",
quote: '"',
comment: "#",
trim: false,
...opts,
};
const reduce = rfn[2];
let index: Record<string, number>;
let revIndex: Record<number, string>;
let first = true;
let isQuoted = false;
let record: string[] = [];
if (header) {
cols && (index = initIndex(header, cols));
all && (revIndex = initRevIndex(header));
first = false;
}
return compR(rfn, (acc, line: string) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = tokenizeLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row: CSVRow = {};
all &&
record.reduce(
(acc, x, i) => (
(acc[revIndex[i]] = trim ? x.trim() : x), acc
),
row
);
cols &&
Object.entries(cols).reduce((acc, [id, spec]) => {
let val = record[index[id]];
trim && (val = val.trim());
acc[spec.alias || id] = spec.coerce
? spec.coerce(val, acc)
: val;
return acc;
}, row);
record = [];
return reduce(acc, row);
} else {
const names = line.split(delim);
cols && (index = initIndex(names, cols));
all && (revIndex = initRevIndex(names));
first = false;
return acc;
}
});
};
}

export const parseCSVString = (opts: Partial<CSVOpts>, src: string) =>
parseCSV(opts, split(src));

export const tokenizeLine = (
line: string,
record: string[],
isQuoted: boolean,
delim: string,
quote: string
) => {
let curr = "";
let p = "";
let openQuote = isQuoted;
for (let i = 0, n = line.length; i < n; i++) {
const c = line[i];
// escaped char
if (p === "\\") {
curr += ESCAPES[c] || c;
}
// quote open/close & CSV escape pair (aka `""`)
else if (c === quote) {
if (!isQuoted) {
p = "";
isQuoted = true;
continue;
} else if (p === quote) {
curr += quote;
p = "";
continue;
} else if (line[i + 1] !== quote) isQuoted = false;
}
// field delimiter
else if (!isQuoted && c === delim) {
if (openQuote) record[record.length - 1] += "\n" + curr;
else record.push(curr);
openQuote = false;
curr = "";
}
// record unless escape seq start
else if (c !== "\\") {
curr += c;
}
p = c;
}
if (curr !== "") {
if (openQuote) record[record.length - 1] += "\n" + curr;
else record.push(curr);
}
return isQuoted;
};

const initIndex = (line: string[], cols: Record<string, ColumnSpec>) =>
line.reduce(
(acc, id, i) => (cols![id] ? ((acc[id] = i), acc) : acc),
<Record<string, number>>{}
);

const initRevIndex = (line: string[]) =>
line.reduce((acc, x, i) => ((acc[i] = x), acc), <Record<number, string>>{});
export * from "./api";
export * from "./coerce";
export * from "./parse";
143 changes: 143 additions & 0 deletions packages/csv/src/parse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { isIterable } from "@thi.ng/checks";
import { ESCAPES, split } from "@thi.ng/strings";
import { compR, iterator1, Reducer, Transducer } from "@thi.ng/transducers";
import type { ColumnSpec, CSVOpts, CSVRow } from "./api";

export function parseCSV(opts?: Partial<CSVOpts>): Transducer<string, CSVRow>;
export function parseCSV(
opts: Partial<CSVOpts>,
src: Iterable<string>
): IterableIterator<CSVRow>;
export function parseCSV(opts?: Partial<CSVOpts>, src?: Iterable<string>): any {
return isIterable(src)
? iterator1(parseCSV(opts), src)
: (rfn: Reducer<any, CSVRow>) => {
const { all, cols, delim, quote, comment, trim, header } = {
all: true,
delim: ",",
quote: '"',
comment: "#",
trim: false,
...opts,
};
const reduce = rfn[2];
let index: Record<string, number>;
let revIndex: Record<number, string>;
let first = true;
let isQuoted = false;
let record: string[] = [];
if (header) {
cols && (index = initIndex(header, cols));
all && (revIndex = initRevIndex(header));
first = false;
}
return compR(rfn, (acc, line: string) => {
if ((!line.length || line.startsWith(comment)) && !isQuoted)
return acc;
if (!first) {
isQuoted = tokenizeLine(
line,
record,
isQuoted,
delim,
quote
);
if (isQuoted) return acc;
const row: CSVRow = {};
all &&
record.reduce(
(acc, x, i) => (
(acc[revIndex[i]] = trim ? x.trim() : x), acc
),
row
);
cols &&
Object.entries(cols).reduce((acc, [id, spec]) => {
let val = record[index[id]];
trim && (val = val.trim());
acc[spec.alias || id] = spec.coerce
? spec.coerce(val, acc)
: val;
return acc;
}, row);
record = [];
return reduce(acc, row);
} else {
const names = line.split(delim);
cols && (index = initIndex(names, cols));
all && (revIndex = initRevIndex(names));
first = false;
return acc;
}
});
};
}

export const parseCSVString = (opts: Partial<CSVOpts>, src: string) =>
parseCSV(opts, split(src));

/**
* @param line
* @param acc
* @param isQuoted
* @param delim
* @param quote
*
* @internal
*/
export const tokenizeLine = (
line: string,
acc: string[],
isQuoted: boolean,
delim: string,
quote: string
) => {
let curr = "";
let p = "";
let openQuote = isQuoted;
for (let i = 0, n = line.length; i < n; i++) {
const c = line[i];
// escaped char
if (p === "\\") {
curr += ESCAPES[c] || c;
}
// quote open/close & CSV escape pair (aka `""`)
else if (c === quote) {
if (!isQuoted) {
p = "";
isQuoted = true;
continue;
} else if (p === quote) {
curr += quote;
p = "";
continue;
} else if (line[i + 1] !== quote) isQuoted = false;
}
// field delimiter
else if (!isQuoted && c === delim) {
if (openQuote) acc[acc.length - 1] += "\n" + curr;
else acc.push(curr);
openQuote = false;
curr = "";
}
// record unless escape seq start
else if (c !== "\\") {
curr += c;
}
p = c;
}
if (curr !== "") {
if (openQuote) acc[acc.length - 1] += "\n" + curr;
else acc.push(curr);
}
return isQuoted;
};

const initIndex = (line: string[], cols: Record<string, ColumnSpec>) =>
line.reduce(
(acc, id, i) => (cols![id] ? ((acc[id] = i), acc) : acc),
<Record<string, number>>{}
);

const initRevIndex = (line: string[]) =>
line.reduce((acc, x, i) => ((acc[i] = x), acc), <Record<number, string>>{});

0 comments on commit 93d79ec

Please sign in to comment.