diff --git a/package.json b/package.json index 217e3ad6c..1de559297 100644 --- a/package.json +++ b/package.json @@ -44,7 +44,6 @@ "dependencies": { "@nuxt/kit": "^3.0.0-rc.8", "consola": "^2.15.3", - "csvtojson": "^2.0.10", "defu": "^6.1.0", "destr": "^1.1.1", "detab": "^3.0.1", @@ -87,6 +86,7 @@ "@nuxtjs/eslint-config-typescript": "latest", "@types/ws": "^8.5.3", "c8": "^7.12.0", + "csvtojson": "^2.0.10", "eslint": "^8.22.0", "globby": "^13.1.2", "husky": "^8.0.1", diff --git a/src/module.ts b/src/module.ts index 94ec66344..4c4998e24 100644 --- a/src/module.ts +++ b/src/module.ts @@ -145,7 +145,10 @@ export interface ModuleOptions { * * @default {} */ - csv: false | Record + csv: false | { + json?: boolean + delimeter?: string + } /** * Enable/Disable navigation. * @@ -217,7 +220,10 @@ export default defineNuxtModule({ tags: Object.fromEntries(PROSE_TAGS.map(t => [t, `prose-${t}`])) }, yaml: {}, - csv: {}, + csv: { + delimeter: ',', + json: true + }, navigation: { fields: [] }, diff --git a/src/runtime/transformers/csv.ts b/src/runtime/transformers/csv.ts deleted file mode 100644 index 3ddd3d45c..000000000 --- a/src/runtime/transformers/csv.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { ParsedContent } from '../types' -import { defineTransformer } from './utils' - -export default defineTransformer({ - name: 'csv', - extensions: ['.csv'], - parse: async (_id, content, options = {}) => { - const csvToJson: any = await import('csvtojson').then(m => m.default || m) - - const parsed = await csvToJson({ output: 'json', ...options }) - .fromString(content) - - return { - _id, - _type: 'csv', - body: parsed - } - } -}) diff --git a/src/runtime/transformers/csv/create-tokenizer.ts b/src/runtime/transformers/csv/create-tokenizer.ts new file mode 100644 index 000000000..aa5e5e85e --- /dev/null +++ b/src/runtime/transformers/csv/create-tokenizer.ts @@ -0,0 +1,600 @@ +// Do not edit this file. This is code generated by micromark. +// See: https://github.com/micromark/micromark/blob/ed234535990d3e968f3c108d03f3235d733c43ac/packages/micromark/dev/lib/create-tokenizer.js +// @ts-nocheck +/* eslint-disable */ +/** + * @typedef {import('micromark-util-types').Code} Code + * @typedef {import('micromark-util-types').Chunk} Chunk + * @typedef {import('micromark-util-types').Point} Point + * @typedef {import('micromark-util-types').Token} Token + * @typedef {import('micromark-util-types').Effects} Effects + * @typedef {import('micromark-util-types').State} State + * @typedef {import('micromark-util-types').Construct} Construct + * @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct + * @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord + * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext + * @typedef {import('micromark-util-types').ParseContext} ParseContext + */ + +/** + * @typedef Info + * @property {() => void} restore + * @property {number} from + * + * @callback ReturnHandle + * Handle a successful run. + * @param {Construct} construct + * @param {Info} info + * @returns {void} + */ + import {markdownLineEnding} from 'micromark-util-character' + import {push, splice} from 'micromark-util-chunked' + import {resolveAll} from 'micromark-util-resolve-all' + + /** + * Create a tokenizer. + * Tokenizers deal with one type of data (e.g., containers, flow, text). + * The parser is the object dealing with it all. + * `initialize` works like other constructs, except that only its `tokenize` + * function is used, in which case it doesn’t receive an `ok` or `nok`. + * `from` can be given to set the point before the first character, although + * when further lines are indented, they must be set with `defineSkip`. + * + * @param {ParseContext} parser + * @param {InitialConstruct} initialize + * @param {Omit} [from] + * @returns {TokenizeContext} + */ + export function createTokenizer(parser, initialize, from) { + /** @type {Point} */ + let point = Object.assign( + from + ? Object.assign({}, from) + : { + line: 1, + column: 1, + offset: 0 + }, + { + _index: 0, + _bufferIndex: -1 + } + ) + /** @type {Record} */ + + const columnStart = {} + /** @type {Construct[]} */ + + const resolveAllConstructs = [] + /** @type {Chunk[]} */ + + let chunks = [] + /** @type {Token[]} */ + + let stack = [] + /** @type {boolean|undefined} */ + + let consumed = true + /** + * Tools used for tokenizing. + * + * @type {Effects} + */ + + const effects = { + consume, + enter, + exit, + attempt: constructFactory(onsuccessfulconstruct), + check: constructFactory(onsuccessfulcheck), + interrupt: constructFactory(onsuccessfulcheck, { + interrupt: true + }) + } + /** + * State and tools for resolving and serializing. + * + * @type {TokenizeContext} + */ + + const context = { + previous: null, + code: null, + containerState: {}, + events: [], + parser, + sliceStream, + sliceSerialize, + now, + defineSkip, + write + } + /** + * The state function. + * + * @type {State|void} + */ + + let state = initialize.tokenize.call(context, effects) + /** + * Track which character we expect to be consumed, to catch bugs. + * + * @type {Code} + */ + + let expectedCode + + if (initialize.resolveAll) { + resolveAllConstructs.push(initialize) + } + + return context + /** @type {TokenizeContext['write']} */ + + function write(slice) { + chunks = push(chunks, slice) + main() // Exit if we’re not done, resolve might change stuff. + + if (chunks[chunks.length - 1] !== null) { + return [] + } + + addResult(initialize, 0) // Otherwise, resolve, and exit. + + context.events = resolveAll(resolveAllConstructs, context.events, context) + return context.events + } // + // Tools. + // + + /** @type {TokenizeContext['sliceSerialize']} */ + + function sliceSerialize(token, expandTabs) { + return serializeChunks(sliceStream(token), expandTabs) + } + /** @type {TokenizeContext['sliceStream']} */ + + function sliceStream(token) { + return sliceChunks(chunks, token) + } + /** @type {TokenizeContext['now']} */ + + function now() { + return Object.assign({}, point) + } + /** @type {TokenizeContext['defineSkip']} */ + + function defineSkip(value) { + columnStart[value.line] = value.column + accountForPotentialSkip() + } // + // State management. + // + + /** + * Main loop (note that `_index` and `_bufferIndex` in `point` are modified by + * `consume`). + * Here is where we walk through the chunks, which either include strings of + * several characters, or numerical character codes. + * The reason to do this in a loop instead of a call is so the stack can + * drain. + * + * @returns {void} + */ + + function main() { + /** @type {number} */ + let chunkIndex + + while (point._index < chunks.length) { + const chunk = chunks[point._index] // If we’re in a buffer chunk, loop through it. + + if (typeof chunk === 'string') { + chunkIndex = point._index + + if (point._bufferIndex < 0) { + point._bufferIndex = 0 + } + + while ( + point._index === chunkIndex && + point._bufferIndex < chunk.length + ) { + go(chunk.charCodeAt(point._bufferIndex)) + } + } else { + go(chunk) + } + } + } + /** + * Deal with one code. + * + * @param {Code} code + * @returns {void} + */ + + function go(code) { + consumed = undefined + expectedCode = code + state = state(code) + } + /** @type {Effects['consume']} */ + + function consume(code) { + if (markdownLineEnding(code)) { + point.line++ + point.column = 1 + point.offset += code === -3 ? 2 : 1 + accountForPotentialSkip() + } else if (code !== -1) { + point.column++ + point.offset++ + } // Not in a string chunk. + + if (point._bufferIndex < 0) { + point._index++ + } else { + point._bufferIndex++ // At end of string chunk. + // @ts-expect-error Points w/ non-negative `_bufferIndex` reference + // strings. + + if (point._bufferIndex === chunks[point._index].length) { + point._bufferIndex = -1 + point._index++ + } + } // Expose the previous character. + + context.previous = code // Mark as consumed. + + consumed = true + } + /** @type {Effects['enter']} */ + + function enter(type, fields) { + /** @type {Token} */ + // @ts-expect-error Patch instead of assign required fields to help GC. + const token = fields || {} + token.type = type + token.start = now() + context.events.push(['enter', token, context]) + stack.push(token) + return token + } + /** @type {Effects['exit']} */ + + function exit(type) { + const token = stack.pop() + token.end = now() + context.events.push(['exit', token, context]) + return token + } + /** + * Use results. + * + * @type {ReturnHandle} + */ + + function onsuccessfulconstruct(construct, info) { + addResult(construct, info.from) + } + /** + * Discard results. + * + * @type {ReturnHandle} + */ + + function onsuccessfulcheck(_, info) { + info.restore() + } + /** + * Factory to attempt/check/interrupt. + * + * @param {ReturnHandle} onreturn + * @param {Record} [fields] + */ + + function constructFactory(onreturn, fields) { + return hook + /** + * Handle either an object mapping codes to constructs, a list of + * constructs, or a single construct. + * + * @param {Construct|Construct[]|ConstructRecord} constructs + * @param {State} returnState + * @param {State} [bogusState] + * @returns {State} + */ + + function hook(constructs, returnState, bogusState) { + /** @type {Construct[]} */ + let listOfConstructs + /** @type {number} */ + + let constructIndex + /** @type {Construct} */ + + let currentConstruct + /** @type {Info} */ + + let info + return Array.isArray(constructs) + ? /* c8 ignore next 1 */ + handleListOfConstructs(constructs) + : 'tokenize' in constructs // @ts-expect-error Looks like a construct. + ? handleListOfConstructs([constructs]) + : handleMapOfConstructs(constructs) + /** + * Handle a list of construct. + * + * @param {ConstructRecord} map + * @returns {State} + */ + + function handleMapOfConstructs(map) { + return start + /** @type {State} */ + + function start(code) { + const def = code !== null && map[code] + const all = code !== null && map.null + const list = [ + // To do: add more extension tests. + + /* c8 ignore next 2 */ + ...(Array.isArray(def) ? def : def ? [def] : []), + ...(Array.isArray(all) ? all : all ? [all] : []) + ] + return handleListOfConstructs(list)(code) + } + } + /** + * Handle a list of construct. + * + * @param {Construct[]} list + * @returns {State} + */ + + function handleListOfConstructs(list) { + listOfConstructs = list + constructIndex = 0 + + if (list.length === 0) { + return bogusState + } + + return handleConstruct(list[constructIndex]) + } + /** + * Handle a single construct. + * + * @param {Construct} construct + * @returns {State} + */ + + function handleConstruct(construct) { + return start + /** @type {State} */ + + function start(code) { + // To do: not needed to store if there is no bogus state, probably? + // Currently doesn’t work because `inspect` in document does a check + // w/o a bogus, which doesn’t make sense. But it does seem to help perf + // by not storing. + info = store() + currentConstruct = construct + + if (!construct.partial) { + context.currentConstruct = construct + } + + if ( + construct.name && + context.parser.constructs.disable.null.includes(construct.name) + ) { + return nok(code) + } + + return construct.tokenize.call( + // If we do have fields, create an object w/ `context` as its + // prototype. + // This allows a “live binding”, which is needed for `interrupt`. + fields ? Object.assign(Object.create(context), fields) : context, + effects, + ok, + nok + )(code) + } + } + /** @type {State} */ + + function ok(code) { + consumed = true + onreturn(currentConstruct, info) + return returnState + } + /** @type {State} */ + + function nok(code) { + consumed = true + info.restore() + + if (++constructIndex < listOfConstructs.length) { + return handleConstruct(listOfConstructs[constructIndex]) + } + + return bogusState + } + } + } + /** + * @param {Construct} construct + * @param {number} from + * @returns {void} + */ + + function addResult(construct, from) { + if (construct.resolveAll && !resolveAllConstructs.includes(construct)) { + resolveAllConstructs.push(construct) + } + + if (construct.resolve) { + splice( + context.events, + from, + context.events.length - from, + construct.resolve(context.events.slice(from), context) + ) + } + + if (construct.resolveTo) { + context.events = construct.resolveTo(context.events, context) + } + } + /** + * Store state. + * + * @returns {Info} + */ + + function store() { + const startPoint = now() + const startPrevious = context.previous + const startCurrentConstruct = context.currentConstruct + const startEventsIndex = context.events.length + const startStack = Array.from(stack) + return { + restore, + from: startEventsIndex + } + /** + * Restore state. + * + * @returns {void} + */ + + function restore() { + point = startPoint + context.previous = startPrevious + context.currentConstruct = startCurrentConstruct + context.events.length = startEventsIndex + stack = startStack + accountForPotentialSkip() + } + } + /** + * Move the current point a bit forward in the line when it’s on a column + * skip. + * + * @returns {void} + */ + + function accountForPotentialSkip() { + if (point.line in columnStart && point.column < 2) { + point.column = columnStart[point.line] + point.offset += columnStart[point.line] - 1 + } + } + } + /** + * Get the chunks from a slice of chunks in the range of a token. + * + * @param {Chunk[]} chunks + * @param {Pick} token + * @returns {Chunk[]} + */ + + function sliceChunks(chunks, token) { + const startIndex = token.start._index + const startBufferIndex = token.start._bufferIndex + const endIndex = token.end._index + const endBufferIndex = token.end._bufferIndex + /** @type {Chunk[]} */ + + let view + + if (startIndex === endIndex) { + // @ts-expect-error `_bufferIndex` is used on string chunks. + view = [chunks[startIndex].slice(startBufferIndex, endBufferIndex)] + } else { + view = chunks.slice(startIndex, endIndex) + + if (startBufferIndex > -1) { + // @ts-expect-error `_bufferIndex` is used on string chunks. + view[0] = view[0].slice(startBufferIndex) + } + + if (endBufferIndex > 0) { + // @ts-expect-error `_bufferIndex` is used on string chunks. + view.push(chunks[endIndex].slice(0, endBufferIndex)) + } + } + + return view + } + /** + * Get the string value of a slice of chunks. + * + * @param {Chunk[]} chunks + * @param {boolean} [expandTabs=false] + * @returns {string} + */ + + function serializeChunks(chunks, expandTabs) { + let index = -1 + /** @type {string[]} */ + + const result = [] + /** @type {boolean|undefined} */ + + let atTab + + while (++index < chunks.length) { + const chunk = chunks[index] + /** @type {string} */ + + let value + + if (typeof chunk === 'string') { + value = chunk + } else + switch (chunk) { + case -5: { + value = '\r' + break + } + + case -4: { + value = '\n' + break + } + + case -3: { + value = '\r' + '\n' + break + } + + case -2: { + value = expandTabs ? ' ' : '\t' + break + } + + case -1: { + if (!expandTabs && atTab) continue + value = ' ' + break + } + + default: { + // Currently only replacement character. + value = String.fromCharCode(chunk) + } + } + + atTab = chunk === -2 + result.push(value) + } + + return result.join('') + } + \ No newline at end of file diff --git a/src/runtime/transformers/csv/from-csv.ts b/src/runtime/transformers/csv/from-csv.ts new file mode 100644 index 000000000..efb435c6d --- /dev/null +++ b/src/runtime/transformers/csv/from-csv.ts @@ -0,0 +1,266 @@ +// Based on mdast-util-from-markdown +// See: https://github.com/syntax-tree/mdast-util-from-markdown/blob/05875cde264253f0d6a725791f10f55eb8d8c267/dev/lib/index.js +import { toString } from 'mdast-util-to-string' +import { preprocess } from 'micromark/lib/preprocess.js' +import { postprocess } from 'micromark/lib/postprocess.js' +import { stringifyPosition } from 'unist-util-stringify-position' +import { Token, Event, Point as MPoint } from 'micromark-util-types' +import { parse } from './parser' + +type Point = Omit +type Node = { + type: string + children: Array + position?: { + start?: Point + end?: Point + } + value?: string +} + +const own = {}.hasOwnProperty + +const initialPoint: Point = { + line: 1, + column: 1, + offset: 0 +} + +export const fromCSV = function (value, encoding?, options?) { + if (typeof encoding !== 'string') { + options = encoding + encoding = undefined + } + + return compiler()( + postprocess( + parse(options).write(preprocess()(value, encoding, true)) + ) + ) +} + +function compiler () { + const config = { + enter: { + column: opener(openColumn), + row: opener(openRow), + data: onenterdata, + quotedData: onenterdata + }, + exit: { + row: closer(), + column: closer(), + data: onexitdata, + quotedData: onexitQuotedData + } + } + + return compile + + function compile (events: Array) { + const tree: Node = { + type: 'root', + children: [] + } + + const stack = [tree] + + const tokenStack = [] + + const context = { + stack, + tokenStack, + config, + enter, + exit, + resume + } + + let index = -1 + + while (++index < events.length) { + const handler = config[events[index][0]] + + if (own.call(handler, events[index][1].type)) { + handler[events[index][1].type].call( + Object.assign( + { + sliceSerialize: events[index][2].sliceSerialize + }, + context + ), + events[index][1] + ) + } + } + + if (tokenStack.length > 0) { + const tail: Function = tokenStack[tokenStack.length - 1] + const handler = tail[1] || defaultOnError + handler.call(context, undefined, tail[0]) + } // Figure out `root` position. + + tree.position = { + start: point( + events.length > 0 ? events[0][1].start : initialPoint + ), + end: point( + events.length > 0 ? events[events.length - 2][1].end : initialPoint + ) + } + + return tree + } + + function point (d: Point): Point { + return { + line: d.line, + column: d.column, + offset: d.offset + } + } + + function opener (create, and?) { + return open + + function open (token: Token) { + enter.call(this, create(token), token) + if (and) { and.call(this, token) } + } + } + + function enter (node: Node, token: Token, errorHandler) { + const parent = this.stack[this.stack.length - 1] + parent.children.push(node) + this.stack.push(node) + this.tokenStack.push([token, errorHandler]) + + node.position = { + start: point(token.start) + } + return node + } + + function closer (and?) { + return close + + function close (token: Token) { + if (and) { and.call(this, token) } + exit.call(this, token) + } + } + + function exit (token: Token, onExitError) { + const node = this.stack.pop() + const open = this.tokenStack.pop() + + if (!open) { + throw new Error( + 'Cannot close `' + + token.type + + '` (' + + stringifyPosition({ + start: token.start, + end: token.end + }) + + '): it’s not open' + ) + } else if (open[0].type !== token.type) { + if (onExitError) { + onExitError.call(this, token, open[0]) + } else { + const handler = open[1] || defaultOnError + handler.call(this, token, open[0]) + } + } + node.position.end = point(token.end) + return node + } + + function resume () { + return toString(this.stack.pop()) + } + + function onenterdata (token: Token) { + const parent = this.stack[this.stack.length - 1] + + let tail = parent.children[parent.children.length - 1] + + if (!tail || tail.type !== 'text') { + // Add a new text node. + tail = text() + + tail.position = { + start: point(token.start) + } + + parent.children.push(tail) + } + this.stack.push(tail) + } + + function onexitdata (token: Token) { + const tail = this.stack.pop() + tail.value += this.sliceSerialize(token).trim().replace(/""/g, '"') + tail.position.end = point(token.end) + } + function onexitQuotedData (token: Token) { + const tail = this.stack.pop() + const value = this.sliceSerialize(token) + tail.value += this.sliceSerialize(token).trim().substring(1, value.length - 1).replace(/""/g, '"') + tail.position.end = point(token.end) + } + + function text () { + return { + type: 'text', + value: '' + } + } + + function openColumn () { + return { + type: 'column', + children: [] + } + } + function openRow () { + return { + type: 'row', + children: [] + } + } +} + +function defaultOnError (left, right) { + if (left) { + throw new Error( + 'Cannot close `' + + left.type + + '` (' + + stringifyPosition({ + start: left.start, + end: left.end + }) + + '): a different token (`' + + right.type + + '`, ' + + stringifyPosition({ + start: right.start, + end: right.end + }) + + ') is open' + ) + } else { + throw new Error( + 'Cannot close document, a token (`' + + right.type + + '`, ' + + stringifyPosition({ + start: right.start, + end: right.end + }) + + ') is still open' + ) + } +} diff --git a/src/runtime/transformers/csv/index.ts b/src/runtime/transformers/csv/index.ts new file mode 100644 index 000000000..2c2a2e383 --- /dev/null +++ b/src/runtime/transformers/csv/index.ts @@ -0,0 +1,60 @@ +import { unified } from 'unified' +import { ParsedContent } from '../../types' +import { defineTransformer } from '../utils' +import { fromCSV } from './from-csv' + +function csvParse (options) { + const parser = (doc) => { + return fromCSV(doc, options) + } + + Object.assign(this, { Parser: parser }) + + const toJsonObject = (tree) => { + const [header, ...rows] = tree.children + const columns = header.children.map(col => col.children[0].value) + + const data = rows.map((row) => { + return row.children.reduce((acc, col, i) => { + acc[String(columns[i])] = col.children[0]?.value + return acc + }, {}) + }) + return data + } + + const toJsonArray = (tree) => { + const data = tree.children.map((row) => { + return row.children.map(col => col.children[0]?.value) + }) + return data + } + + const compiler = (doc) => { + if (options.json) { + return toJsonObject(doc) + } + return toJsonArray(doc) + } + + Object.assign(this, { Compiler: compiler }) +} + +export default defineTransformer({ + name: 'csv', + extensions: ['.csv'], + parse: async (_id, content, options = {}) => { + const stream = unified().use(csvParse, { + delimiter: ',', + json: true, + ...options + }) + const { result } = await stream.process(content) + + return { + _id, + _type: 'csv', + body: result + } + } +}) diff --git a/src/runtime/transformers/csv/parser.ts b/src/runtime/transformers/csv/parser.ts new file mode 100644 index 000000000..0ed9f20da --- /dev/null +++ b/src/runtime/transformers/csv/parser.ts @@ -0,0 +1,178 @@ +import { markdownLineEnding, markdownSpace } from 'micromark-util-character' +import { createTokenizer } from './create-tokenizer' + +function initializeDocument (effects) { + const self = this + const delimiter = (this.parser.delimiter || ',').charCodeAt(0) + + return enterRow + + function enterRow (code) { + return effects.attempt( + { tokenize: attemptLastLine }, + (code) => { + effects.consume(code) + return enterRow + }, + (code) => { + effects.enter('row') + return enterColumn(code) + } + )(code) + } + + function enterColumn (code) { + effects.enter('column') + return content(code) + } + + function content (code) { + if (code === null) { + effects.exit('column') + effects.exit('row') + effects.consume(code) + return content + } + if (code === 34 /** " */) { + return quotedData(code) + } + if (code === delimiter) { + // Hanlde: + // - "1,,3,4" + // - ",2,3,4" + if (self.previous === delimiter || markdownLineEnding(self.previous) || self.previous === null) { + effects.enter('data') + effects.exit('data') + } + effects.exit('column') + effects.enter('columnSeparator') + effects.consume(code) + effects.exit('columnSeparator') + effects.enter('column') + return content + } + if (markdownLineEnding(code)) { + effects.exit('column') + effects.enter('newline') + effects.consume(code) + effects.exit('newline') + effects.exit('row') + + return enterRow + } + return data(code) + } + + // data + function data (code) { + effects.enter('data') + return dataChunk(code) + } + + function dataChunk (code) { + if (code === null || markdownLineEnding(code) || code === delimiter) { + effects.exit('data') + return content(code) + } + if (code === 92 /** \ */) { + return escapeCharacter(code) + } + effects.consume(code) + return dataChunk + } + + function escapeCharacter (code) { + effects.consume(code) + return function (code) { + effects.consume(code) + return content + } + } + + function quotedData (code) { + effects.enter('quotedData') + effects.enter('quotedDataChunk') + effects.consume(code) + + return quotedDataChunk + } + + function quotedDataChunk (code) { + if (code === 92 /** \ */) { + return escapeCharacter(code) + } + if (code === 34) { + return effects.attempt( + { tokenize: attemptDoubleQuote }, + (code) => { + effects.exit('quotedDataChunk') + effects.enter('quotedDataChunk') + return quotedDataChunk(code) + }, + (code) => { + effects.consume(code) + effects.exit('quotedDataChunk') + effects.exit('quotedData') + + return content + } + )(code) + } + effects.consume(code) + return quotedDataChunk + } +} + +function attemptDoubleQuote (effects, ok, nok) { + return startSequence + + function startSequence (code) { + if (code !== 34) { + return nok(code) + } + effects.enter('quoteFence') + effects.consume(code) + return sequence + } + + function sequence (code) { + if (code !== 34) { + return nok(code) + } + effects.consume(code) + effects.exit('quoteFence') + return code => ok(code) + } +} + +function attemptLastLine (effects, ok, nok) { + return enterLine + + function enterLine (code) { + if (!markdownSpace(code) && code !== null) { + return nok(code) + } + effects.enter('emptyLine') + return continueLine(code) + } + + function continueLine (code) { + if (markdownSpace(code)) { + effects.consume(code) + return continueLine + } + if (code === null) { + effects.exit('emptyLine') + return ok(code) + } + return nok(code) + } +} + +export const parse = (options) => { + return createTokenizer( + { ...options }, + { tokenize: initializeDocument }, + undefined + ) +} diff --git a/test/features/parser-csv.ts b/test/features/parser-csv.ts index b94f226c5..eb917c798 100644 --- a/test/features/parser-csv.ts +++ b/test/features/parser-csv.ts @@ -1,29 +1,88 @@ import { describe, test, expect, assert } from 'vitest' import { $fetch } from '@nuxt/test-utils' +import csvToJson from 'csvtojson' -const csv = `a,b,c +const csvs = ` +a,b,c 1,2,3 -4,5,6` +4,5,6 +--- +first,last,address,city,zip +John,Doe,120 any st.,"Anytown, WW",08123 +--- +a,b,c +1,"","" +2,3,4 +--- +a,b,c +1,"","" +2,3,4 +--- +a,b +1,"ha ""ha"" ha" +3,4 +--- +key,val +1,"{""type"": ""Point"", ""coordinates"": [102.0, 0.5]}" +--- +a,b,c +1,2,3 +"Once upon +a time",5,6 +7,8,9 +--- +a,b,c +1,2,3 +"Once upon +a time",5,6 +7,8,9 +--- +a,b +1,"ha +""ha"" +ha" +3,4 +--- +a,b,c +1,2,3 +--- +a,b,c +1,2,3 +--- +a,b,c +1,2,3 +4,5,ʤ +--- +John,Doe,120 jefferson st.,Riverside, NJ, 08075 +Jack,McGinnis,220 hobo Av.,Phila, PA,09119 +"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075 +Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234 +,Blankman,,SomeTown, SD, 00298 +"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123 + +`.trim().split('\n---\n') export const testCSVParser = () => { describe('Parser (.csv)', () => { - test('Basic usage', async () => { - const parsed = await $fetch('/api/parse', { - method: 'POST', - body: { - id: 'content:index.csv', - content: csv - } - }) + for (const csv of csvs) { + test(`${csv.replace(/\n/g, '-')}`, async () => { + const parsed = await $fetch('/api/parse', { + method: 'POST', + body: { + id: 'content:index.csv', + content: csv + } + }) - expect(parsed).toHaveProperty('_id') - assert(parsed._id === 'content:index.csv') + expect(parsed).toHaveProperty('_id') + assert(parsed._id === 'content:index.csv') - expect(parsed).toHaveProperty('body') - expect(Array.isArray(parsed.body)).toBeTruthy() + expect(parsed).toHaveProperty('body') + expect(Array.isArray(parsed.body)).toBeTruthy() + const truth = await csvToJson({ output: 'json' }).fromString(csv) - expect(parsed.body[0]).toMatchObject({ a: '1', b: '2', c: '3' }) - expect(parsed.body[1]).toMatchObject({ a: '4', b: '5', c: '6' }) - }) + expect(parsed.body).toMatchObject(truth) + }) + } }) }