Skip to content

Commit

Permalink
parse: add support for emails without protocol
Browse files Browse the repository at this point in the history
Closes GH-475.
Closes GH-479.
  • Loading branch information
wooorm committed Mar 29, 2020
1 parent 512cd7a commit 0cec4d8
Show file tree
Hide file tree
Showing 15 changed files with 2,610 additions and 9 deletions.
51 changes: 51 additions & 0 deletions packages/remark-parse/lib/locate/email.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'use strict'

var decimal = require('is-decimal')
var alphabetical = require('is-alphabetical')

var plusSign = 43 // '+'
var dash = 45 // '-'
var dot = 46 // '.'
var underscore = 95 // '_'

module.exports = locate

// See: <https://github.github.com/gfm/#extended-email-autolink>
function locate(value, fromIndex) {
var self = this
var at
var position

if (!this.options.gfm) {
return -1
}

at = value.indexOf('@', fromIndex)

if (at === -1) {
return -1
}

position = at

if (position === fromIndex || !isGfmAtext(value.charCodeAt(position - 1))) {
return locate.call(self, value, at + 1)
}

while (position > fromIndex && isGfmAtext(value.charCodeAt(position - 1))) {
position--
}

return position
}

function isGfmAtext(code) {
return (
decimal(code) ||
alphabetical(code) ||
code === plusSign ||
code === dash ||
code === dot ||
code === underscore
)
}
1 change: 1 addition & 0 deletions packages/remark-parse/lib/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ proto.inlineTokenizers = {
escape: require('./tokenize/escape'),
autoLink: require('./tokenize/auto-link'),
url: require('./tokenize/url'),
email: require('./tokenize/email'),
html: require('./tokenize/html-inline'),
link: require('./tokenize/link'),
reference: require('./tokenize/reference'),
Expand Down
114 changes: 114 additions & 0 deletions packages/remark-parse/lib/tokenize/email.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
'use strict'

var decode = require('parse-entities')
var decimal = require('is-decimal')
var alphabetical = require('is-alphabetical')
var locate = require('../locate/email')

module.exports = email
email.locator = locate
email.notInLink = true

var plusSign = 43 // '+'
var dash = 45 // '-'
var dot = 46 // '.'
var atSign = 64 // '@'
var underscore = 95 // '_'

function email(eat, value, silent) {
var self = this
var gfm = self.options.gfm
var tokenizers = self.inlineTokenizers
var index = 0
var length = value.length
var firstDot = -1
var code
var content
var children
var exit

if (!gfm) {
return
}

code = value.charCodeAt(index)

while (
decimal(code) ||
alphabetical(code) ||
code === plusSign ||
code === dash ||
code === dot ||
code === underscore
) {
code = value.charCodeAt(++index)
}

if (index === 0) {
return
}

if (code !== atSign) {
return
}

index++

while (index < length) {
code = value.charCodeAt(index)

if (
decimal(code) ||
alphabetical(code) ||
code === dash ||
code === dot ||
code === underscore
) {
index++

if (firstDot === -1 && code === dot) {
firstDot = index
}

continue
}

break
}

if (
firstDot === -1 ||
firstDot === index ||
code === dash ||
code === underscore
) {
return
}

if (code === dot) {
index--
}

content = value.slice(0, index)

/* istanbul ignore if - never used (yet) */
if (silent) {
return true
}

exit = self.enterLink()

// Temporarily remove all tokenizers except text in url.
self.inlineTokenizers = {text: tokenizers.text}
children = self.tokenizeInline(content, eat.now())
self.inlineTokenizers = tokenizers

exit()

return eat(content)({
type: 'link',
title: null,
url: 'mailto:' + decode(content, {nonTerminated: false}),
children: children
})
}
1 change: 1 addition & 0 deletions packages/remark-parse/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ Precedence of default inline methods is as follows:
* `escape`
* `autoLink`
* `url`
* `email`
* `html`
* `link`
* `reference`
Expand Down
2 changes: 2 additions & 0 deletions test/fixtures/input/auto-link-url.text
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ Also, subdomain should be a part of the link (http://foo.example.com/(hello[worl
So should this: mailto:foo@bar.com.

And even with underscore http://domain.org/this_is_good.

All links should work http://a.b, https://c.d, http://e.f, https://g.h.
18 changes: 18 additions & 0 deletions test/fixtures/input/literal-email.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
example1 (1234567@example.com)
example1 (mailto:1234567@example.com)

Lorem foo@bar.baz ipsum.

alpha@bravo+charlie.delta isn’t valid, but echo+foxtrot@golf.hotel is.

Valid: a.b-c_d@a.b

Valid, but the dot is not part of the email: a.b-c_d@a.b.

Not valid: a.b-c_d@a.b-

Not valid: a.b-c_d@a.b_

Not valid: alpha@bravo.

&lt;foo@example.com
Loading

0 comments on commit 0cec4d8

Please sign in to comment.