Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Parser that uses babylon for html nodes #52

Merged
merged 6 commits into from
Feb 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parser/.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock=false
27 changes: 27 additions & 0 deletions parser/fixture.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import {
Foo,
Bar
} from './ui'

# Hello, world!

I'm an awesome paragraph.

<Foo bg='red'>
<Bar />
<markdown>
## Hello
</markdown>
</Foo>

\imports can be escaped too!

### Shopping list

- Apples
- Bananas
- Oranges

***

:heart: :heart: :heart:
38 changes: 38 additions & 0 deletions parser/get-imports.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
const { parse } = require('remark')
const select = require('unist-util-select')
const parseImports = require('parse-es6-imports')

const IMPORT_SELECTOR = 'paragraph > text:first-child[value^="import"]'

const importScope = (imports = []) =>
imports.reduce((acc, curr) => {
const scopedImports = curr
.parsed
.reduce((a, c) => {
const i = c.namedImports.map(n => n.value)
if (c.defaultImport) i.push(c.defaultImport)
if (c.starImport) i.push(c.starImport)

return a.concat(i)
}, [])

return acc.concat(scopedImports)
}, [])

module.exports = mdx => {
const imports = select(parse(mdx), IMPORT_SELECTOR)
.map(i => {
const squeezed = i.value.replace(/\s+/g, ' ')
const parsed = parseImports(squeezed)

return {
raw: i.value,
parsed
}
})

return {
imports,
scope: importScope(imports)
}
}
93 changes: 93 additions & 0 deletions parser/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
const unified = require('unified')
const remark = require('remark-parse')
const rehype = require('remark-rehype')
const html = require('rehype-stringify')
const visit = require('unist-util-visit')
const blocks = require('remark-parse/lib/block-elements.json')

const getImports = require('./get-imports')
const parseImports = require('./parse-imports')
const parseJSX = require('./parse-jsx')

const MARKDOWN_REOPEN = 'markdown'

const gatherText = node => {
const children = (node.children || []).map(gatherText)
return [node.value]
.concat(children)
.join('')
}

const fromBabelAST = (node, options) => {
if (node.type === 'JSXText') {
return {
type: 'text',
value: node.value
}
} else if (node.type === 'JSXElement') {
const tagName = node.openingElement.name.name

if (tagName === MARKDOWN_REOPEN) {
return parse(gatherText(node, options))
}

return {
tagName,
type: 'element',
properties: node.openingElement.attributes.reduce((acc, curr) => {
const name = curr.name.name
const value = curr.value.value

return Object.assign(acc, { [name]: value })
}, {}),
children: node.children.map(c => fromBabelAST(c, options)).filter(Boolean)
}
} else {
return
}
}

const jsx = options => tree =>
visit(tree, 'html', (node, i, parent) => {
try {
const ast = parseJSX(node.value).program.body[0].expression
node.type = 'jsx'
node.children = fromBabelAST(ast, options)
//console.log(JSON.stringify(node, null, 2))
} catch (e) {
const position = [
node.position.start.line,
node.position.start.column
].join(':')

throw new Error(
[
`[${position}]: Syntax Error - Could not parse JSX block`,
node.value,
''
].join('\n\n')
)
}
})

const parse = (mdx, options = {}) => {
options.components = options.components || {}

// TODO: Need to figure out a better way to handle imports
// parsing. As implemented it parses the whole thing :(
options.blocks = Object
.keys(options.components)
.concat(getImports(mdx).scope)
.concat(blocks)

const fn = unified()
.use(remark, options)
.use(parseImports, options)
.use(jsx, options)
.use(rehype)
.use(html)

return fn.processSync(mdx)
}

module.exports = parse
40 changes: 40 additions & 0 deletions parser/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"name": "mdx-parser",
"description": "Parser for MDX files, outputs MDXAST",
"author": "John Otander",
"version": "0.0.1",
"main": "index.js",
"files": [
"index.js",
"get-imports.js",
"parse-jsx.js",
"parse-imports.js"
],
"scripts": {
"inspect": "inspect node_modules/ava/profile.js test.js",
"test": "ava -v"
},
"repository": "c8r/markdown",
"keywords": [
"react",
"markdown",
"remark",
"mdxast",
"mdx"
],
"license": "MIT",
"dependencies": {
"babylon": "^6.18.0",
"rehype-stringify": "^3.0.0",
"remark": "^9.0.0",
"remark-parse": "^5.0.0",
"remark-rehype": "^3.0.0",
"unified": "^6.1.6",
"unist-util-select": "^1.5.0",
"unist-util-visit": "^1.3.0"
},
"devDependencies": {
"ava": "^0.25.0",
"inspect-process": "^0.5.0"
}
}
26 changes: 26 additions & 0 deletions parser/parse-imports.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const visit = require('unist-util-visit')

const IMPORT_REGEX = /^import/
const ESCAPED_IMPORT_REGEX = /^\\import/
const isImport = text => IMPORT_REGEX.test(text)
const isEscapedImport = text => ESCAPED_IMPORT_REGEX.test(text)

const unescape = node => {
if (isEscapedImport(node.value)) {
node.value = node.value.replace(ESCAPED_IMPORT_REGEX, 'import')
}
}

module.exports = options => (tree, file) =>
visit(tree, 'text', (node, i, parent) => {
if (!isImport(node.value)) {
return unescape(node)
}

const siblings = parent.children
parent.children = siblings
.splice(0, i)
.concat(
siblings.slice(i + 1, siblings.length)
)
})
9 changes: 9 additions & 0 deletions parser/parse-jsx.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const babylon = require('babylon')

module.exports = jsx =>
babylon.parse(jsx, {
sourceType: 'module',
plugins: [
'jsx'
]
})
21 changes: 21 additions & 0 deletions parser/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# [WIP] MDX Parser

Proof of concept of an MDX parser to create an MDXAST.

## Limitations

I was hoping to use `babylon` to handle the majority of the JSX parsing, however I don't think this will end up working since it will attempt to parse embedded markdown which uses the `<markdown>` syntax.

Consider the following scenario of a `{` in markdown:

```jsx
# Hello,

<ul>
<markdown>
* this will break {
* and {this will break}
* <this too>
</markdown>
</ul>
```
25 changes: 25 additions & 0 deletions parser/test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const fs = require('fs')
const test = require('ava')
const parse = require('./')

const fixture = fs.readFileSync('fixture.md', 'utf8')

test('it parses a file', t => {
const result = parse(fixture)

//console.log(JSON.stringify(result, null, 2))

t.pass()
})

test('it parses imports', t => {
const result = parse(fixture)

t.snapshot(result)
})

test('it raises an error on invalid jsx', t => {
const error = t.throws(() => parse('</Foo>'))

t.regex(error.message, /Foo/)
})
34 changes: 34 additions & 0 deletions parser/test.js.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Snapshot report for `test.js`

The actual snapshot is saved in `test.js.snap`.

Generated by [AVA](https://ava.li).

## it parses imports

> Snapshot 1

VFile {
contents: `<p></p>␊
<h1>Hello, world!</h1>␊
<p>I'm an awesome paragraph.</p>␊
&#x3C;Foo bg='red'>␊
&#x3C;Bar />␊
&#x3C;markdown>␊
## Hello␊
&#x3C;/markdown>␊
&#x3C;/Foo>␊
<p>imports can be escaped too!</p>␊
<h3>Shopping list</h3>␊
<ul>␊
<li>Apples</li>␊
<li>Bananas</li>␊
<li>Oranges</li>␊
</ul>␊
<hr>␊
<p>:heart: :heart: :heart:</p>`,
cwd: '/Users/johno/c/c8r/markdown/parser',
data: {},
history: [],
messages: [],
}
Binary file added parser/test.js.snap
Binary file not shown.