From 7fad0d16446181bcc65d59c6f3298e7a106f3203 Mon Sep 17 00:00:00 2001 From: Yuki Yamazaki <35218186+kamiazya@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:07:00 +0900 Subject: [PATCH] Refactor CSV parsing options and assertions (#168) * Refactor CSV parsing options and assertions * Fix quotation and delimiter validation in assertCommonOptions * Add changeset --- .changeset/lazy-geckos-double.md | 5 ++ README.md | 4 +- src/Lexer.spec.ts | 16 ---- src/__tests__/helper.ts | 6 +- src/assertCommonOptions.spec.ts | 127 ++++++++++++------------------- src/assertCommonOptions.ts | 68 ++++++++++++----- src/common/types.ts | 13 ++-- 7 files changed, 116 insertions(+), 123 deletions(-) create mode 100644 .changeset/lazy-geckos-double.md diff --git a/.changeset/lazy-geckos-double.md b/.changeset/lazy-geckos-double.md new file mode 100644 index 00000000..46f70c89 --- /dev/null +++ b/.changeset/lazy-geckos-double.md @@ -0,0 +1,5 @@ +--- +"web-csv-toolbox": minor +--- + +Refactor CSV parsing options and assertions diff --git a/README.md b/README.md index 036817a7..1e050c80 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,6 @@ A CSV Toolbox utilizing Web Standard APIs. - 🧩 Parse CSVs directly from `string`s, `ReadableStream`s, or `Response` objects. - ⚙️ **Advanced Parsing Options**: Customize your experience with various delimiters and quotation marks. - 🔄 Defaults to `,` and `"` respectively. - - 🛠️ Use multi-character/multi-byte delimiters and quotations. - 💾 **Specialized Binary CSV Parsing**: Leverage Stream-based processing for versatility and strength. - 🔄 Flexible BOM handling. - 🗜️ Supports various compression formats. @@ -294,8 +293,7 @@ You can use WebAssembly to parse CSV data for high performance. - Parsing with WebAssembly is faster than parsing with JavaScript, but it takes time to load the WebAssembly module. - Supports only UTF-8 encoding csv data. -- Demiliter characters are limited to single-byte characters. -- Quotation characters is only `"`. (Double quotation mark) +- Quotation characters are only `"`. (Double quotation mark) - If you pass a different character, it will throw an error. ```ts diff --git a/src/Lexer.spec.ts b/src/Lexer.spec.ts index 3f0ff373..a6b9c898 100644 --- a/src/Lexer.spec.ts +++ b/src/Lexer.spec.ts @@ -195,22 +195,6 @@ describe("class Lexer", () => { expect(actual).toStrictEqual(expected); }, ), - { - examples: [ - [ - { - csv: "QfQQff0Qf0Qf", - data: [["Q", "0"]], - options: { delimiter: "f0", quotation: "Qf" }, - expected: [ - { type: Field, value: "Q" }, - FieldDelimiter, - { type: Field, value: "0" }, - ], - }, - ], - ], - }, ); }); diff --git a/src/__tests__/helper.ts b/src/__tests__/helper.ts index ca608887..92e96670 100644 --- a/src/__tests__/helper.ts +++ b/src/__tests__/helper.ts @@ -113,8 +113,10 @@ export namespace FC { } const { excludes = [], ...constraints }: DelimiterConstraints = options; return text({ - minLength: 1, ...constraints, + minLength: 1, + maxLength: 1, + kindExcludes: ["string16bits", "unicode"], }) .filter(_excludeFilter([...CRLF])) .filter(_excludeFilter(excludes)); @@ -132,6 +134,8 @@ export namespace FC { return text({ ...constraints, minLength: 1, + maxLength: 1, + kindExcludes: ["string16bits", "unicode"], }) .filter(_excludeFilter([...CRLF])) .filter(_excludeFilter(excludes)); diff --git a/src/assertCommonOptions.spec.ts b/src/assertCommonOptions.spec.ts index 8d3371e7..573d0ddc 100644 --- a/src/assertCommonOptions.spec.ts +++ b/src/assertCommonOptions.spec.ts @@ -2,10 +2,10 @@ import { fc } from "@fast-check/vitest"; import { describe, expect, it } from "vitest"; import { FC } from "./__tests__/helper.ts"; import { assertCommonOptions } from "./assertCommonOptions.ts"; -import { COMMA, CRLF, DOUBLE_QUOTE } from "./constants.ts"; +import { COMMA, CR, CRLF, DOUBLE_QUOTE, LF } from "./constants.ts"; describe("function assertCommonOptions", () => { - it("should be throw error if quotation is a empty character", () => { + it("should throw an error if quotation is an empty character", () => { expect(() => assertCommonOptions({ quotation: "", @@ -14,7 +14,7 @@ describe("function assertCommonOptions", () => { ).toThrow("quotation must not be empty"); }); - it("should be throw error if delimiter is a empty character", () => { + it("should throw an error if delimiter is an empty character", () => { expect(() => assertCommonOptions({ quotation: COMMA, @@ -23,88 +23,57 @@ describe("function assertCommonOptions", () => { ).toThrow("delimiter must not be empty"); }); - it("should be throw error if quotation includes CR or LF", () => + it("should throw an error if delimiter is the same as quotation", async () => { fc.assert( fc.property( - fc.gen().map((g) => { - const EOL = g(() => fc.constantFrom("\n", "\r")); - const prefix = g(FC.text); - const sufix = g(FC.text); - return prefix + EOL + sufix; - }), - (invalidQuotation) => { + FC.text({ minLength: 1, maxLength: 1, excludes: [...CRLF] }).filter( + (v) => v.length === 1, + ), + (value) => { expect(() => - assertCommonOptions({ - quotation: invalidQuotation, - delimiter: DOUBLE_QUOTE, - }), - ).toThrow("quotation must not include CR or LF"); + assertCommonOptions({ quotation: value, delimiter: value }), + ).toThrow( + "delimiter must not be the same as quotation, use different characters", + ); }, ), - { - examples: [ - // "\n" is included - ["\n"], - // "\r" is included - ["\r"], - // "\n" and "\r" are included - ["\n\r"], - ], - }, - )); + ); + }); - it("should be throw error if delimiter includes CR or LF", () => - fc.assert( - fc.property( - fc.gen().map((g) => { - const EOL = g(() => fc.constantFrom("\n", "\r")); - const prefix = g(FC.text); - const sufix = g(FC.text); - return prefix + EOL + sufix; + it("should throw an error if quotation is CR or LF", () => { + for (const quotation of [CR, LF]) { + expect(() => + assertCommonOptions({ + quotation: quotation, + delimiter: DOUBLE_QUOTE, }), - (invalidDelimiter) => { - expect(() => - assertCommonOptions({ - quotation: COMMA, - delimiter: invalidDelimiter, - }), - ).toThrow("delimiter must not include CR or LF"); - }, - ), - { - examples: [ - // "\n" is included - ["\n"], - // "\r" is included - ["\r"], - // "\n" and "\r" are included - ["\n\r"], - ], - }, - )); - - it("should be throw error if delimiter and quotation include each other as a substring", () => - fc.assert( - fc.property( - fc.gen().map((g) => { - const excludes = [...CRLF]; - const A = g(FC.text, { minLength: 1, excludes }); - // B is a string that includes A as a substring. - const B = g(FC.text, { excludes }) + A + g(FC.text, { excludes }); - return { A, B }; + ).toThrow("quotation must not include CR or LF"); + } + for (const delimiter of [CR, LF]) { + expect(() => + assertCommonOptions({ + quotation: COMMA, + delimiter: delimiter, }), - ({ A, B }) => { - expect(() => - assertCommonOptions({ quotation: A, delimiter: B }), - ).toThrow( - "delimiter and quotation must not include each other as a substring", - ); - expect(() => - assertCommonOptions({ quotation: B, delimiter: A }), - ).toThrow( - "delimiter and quotation must not include each other as a substring", - ); - }, - ), - )); + ).toThrow("delimiter must not include CR or LF"); + } + }); + + it("should throw an error if quotation is not a string", () => { + expect(() => + assertCommonOptions({ + quotation: 1 as unknown as string, + delimiter: DOUBLE_QUOTE, + }), + ).toThrow("quotation must be a string"); + }); + + it("should throw an error if delimiter is not a string", () => { + expect(() => + assertCommonOptions({ + quotation: COMMA, + delimiter: 1 as unknown as string, + }), + ).toThrow("delimiter must be a string"); + }); }); diff --git a/src/assertCommonOptions.ts b/src/assertCommonOptions.ts index e21c4694..1233f5b5 100644 --- a/src/assertCommonOptions.ts +++ b/src/assertCommonOptions.ts @@ -2,29 +2,59 @@ import type { CommonOptions } from "./common/types.ts"; import { CR, LF } from "./constants.ts"; /** - * Assert that the options are valid. - * - * @param options The options to assert. + * Asserts that the provided value is a string and satisfies certain conditions. + * @param value - The value to be checked. + * @param name - The name of the option. + * @throws {Error} If the value is not a string or does not satisfy the conditions. */ -export function assertCommonOptions(options: Required): void { - if (typeof options.quotation === "string" && options.quotation.length === 0) { - throw new Error("quotation must not be empty"); - } - if (typeof options.delimiter === "string" && options.delimiter.length === 0) { - throw new Error("delimiter must not be empty"); - } - if (options.quotation.includes(LF) || options.quotation.includes(CR)) { - throw new Error("quotation must not include CR or LF"); +function assertOptionValue( + value: string, + name: string, +): asserts value is string { + if (typeof value === "string") { + switch (true) { + case value.length === 0: + throw new Error(`${name} must not be empty`); + case value.length > 1: + throw new Error(`${name} must be a single character`); + case value === LF: + case value === CR: + throw new Error(`${name} must not include CR or LF`); + default: + break; + } + } else { + throw new Error(`${name} must be a string`); } - if (options.delimiter.includes(LF) || options.delimiter.includes(CR)) { - throw new Error("delimiter must not include CR or LF"); +} + +/** + * Asserts that the provided options object contains all the required properties. + * Throws an error if any required property is missing + * or if the delimiter and quotation length is not 1 byte character, + * or if the delimiter is the same as the quotation. + * + * @example + * + * ```ts + * assertCommonOptions({ + * quotation: '"', + * delimiter: ',', + * }); + * ``` + * + * @param options - The options object to be validated. + * @throws {Error} If any required property is missing or if the delimiter is the same as the quotation. + */ +export function assertCommonOptions( + options: Required, +): asserts options is Required { + for (const [name, value] of Object.entries(options)) { + assertOptionValue(value, name); } - if ( - options.delimiter.includes(options.quotation) || - options.quotation.includes(options.delimiter) - ) { + if (options.delimiter === options.quotation) { throw new Error( - "delimiter and quotation must not include each other as a substring", + "delimiter must not be the same as quotation, use different characters", ); } } diff --git a/src/common/types.ts b/src/common/types.ts index 52cff854..9af5a412 100644 --- a/src/common/types.ts +++ b/src/common/types.ts @@ -23,20 +23,23 @@ export type Token = FieldToken | typeof FieldDelimiter | typeof RecordDelimiter; export interface CommonOptions { /** * CSV field delimiter. + * If you want to parse TSV, specify `'\t'`. * * @remarks - * If you want to parse TSV, specify `'\t'`. + * Detail restrictions are as follows: + * + * - Must not be empty + * - Must be a single character + * - Multi-byte characters are not supported + * - Must not include CR or LF + * - Must not be the same as the quotation * - * This library supports multi-character delimiters. * @default ',' */ delimiter?: string; /** * CSV field quotation. * - * @remarks - * This library supports multi-character quotations. - * * @default '"' */ quotation?: string;