diff --git a/dist/to-markdown.js b/dist/to-markdown.js index 3a81b5b6..01bbd9d1 100644 --- a/dist/to-markdown.js +++ b/dist/to-markdown.js @@ -13,20 +13,9 @@ var toMarkdown; var converters; var mdConverters = require('./lib/md-converters'); var gfmConverters = require('./lib/gfm-converters'); +var HtmlParser = require('./lib/html-parser'); var collapse = require('collapse-whitespace'); -/* - * Set up window and document for Node.js - */ - -var _window = (typeof window !== 'undefined' ? window : this), _document; -if (typeof document === 'undefined') { - _document = require('jsdom').jsdom(); -} -else { - _document = document; -} - /* * Utilities */ @@ -56,46 +45,9 @@ function isVoid(node) { return voids.indexOf(node.nodeName.toLowerCase()) !== -1; } -/* - * Parsing HTML strings - */ - -function canParseHtml() { - var Parser = _window.DOMParser, canParse = false; - - // Adapted from https://gist.github.com/1129031 - // Firefox/Opera/IE throw errors on unsupported types - try { - // WebKit returns null on unsupported types - if (new Parser().parseFromString('', 'text/html')) { - canParse = true; - } - } catch (e) {} - return canParse; -} - -function createHtmlParser() { - var Parser = function () {}; - - Parser.prototype.parseFromString = function (string) { - var newDoc = _document.implementation.createHTMLDocument(''); - - if (string.toLowerCase().indexOf(' -1) { - newDoc.documentElement.innerHTML = string; - } - else { - newDoc.body.innerHTML = string; - } - return newDoc; - }; - return Parser; -} - -var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser(); - function htmlToDom(string) { var tree = new HtmlParser().parseFromString(string, 'text/html'); - collapse(tree, isBlock); + collapse(tree.documentElement, isBlock); return tree; } @@ -282,7 +234,7 @@ toMarkdown.outer = outer; module.exports = toMarkdown; -},{"./lib/gfm-converters":2,"./lib/md-converters":3,"collapse-whitespace":4,"jsdom":7}],2:[function(require,module,exports){ +},{"./lib/gfm-converters":2,"./lib/html-parser":3,"./lib/md-converters":4,"collapse-whitespace":7}],2:[function(require,module,exports){ 'use strict'; function cell(content, node) { @@ -395,6 +347,84 @@ module.exports = [ ]; },{}],3:[function(require,module,exports){ +/* + * Set up window for Node.js + */ + +var _window = (typeof window !== 'undefined' ? window : this); + +/* + * Parsing HTML strings + */ + +function canParseHtmlNatively () { + var Parser = _window.DOMParser + var canParse = false; + + // Adapted from https://gist.github.com/1129031 + // Firefox/Opera/IE throw errors on unsupported types + try { + // WebKit returns null on unsupported types + if (new Parser().parseFromString('', 'text/html')) { + canParse = true; + } + } catch (e) {} + + return canParse; +} + +function createHtmlParser () { + var Parser = function () {}; + + // For Node.js environments + if (typeof document === 'undefined') { + var jsdom = require('jsdom'); + Parser.prototype.parseFromString = function (string) { + return jsdom.jsdom(string, { + features: { + FetchExternalResources: [], + ProcessExternalResources: false + } + }); + }; + } else { + if (!shouldUseActiveX()) { + Parser.prototype.parseFromString = function (string) { + var doc = document.implementation.createHTMLDocument(''); + doc.open(); + doc.write(string); + doc.close(); + return doc; + }; + } else { + Parser.prototype.parseFromString = function (string) { + var doc = new ActiveXObject('htmlfile'); + doc.designMode = 'on'; // disable on-page scripts + doc.open(); + doc.write(string); + doc.close(); + return doc; + }; + } + } + return Parser; +} + +function shouldUseActiveX () { + var useActiveX = false; + + try { + document.implementation.createHTMLDocument('').open(); + } catch (e) { + if (window.ActiveXObject) useActiveX = true; + } + + return useActiveX; +} + +module.exports = canParseHtmlNatively() ? _window.DOMParser : createHtmlParser() + +},{"jsdom":6}],4:[function(require,module,exports){ 'use strict'; module.exports = [ @@ -546,7 +576,53 @@ module.exports = [ } } ]; -},{}],4:[function(require,module,exports){ +},{}],5:[function(require,module,exports){ +/** + * This file automatically generated from `build.js`. + * Do not manually edit. + */ + +module.exports = [ + "address", + "article", + "aside", + "audio", + "blockquote", + "canvas", + "dd", + "div", + "dl", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "header", + "hgroup", + "hr", + "main", + "nav", + "noscript", + "ol", + "output", + "p", + "pre", + "section", + "table", + "tfoot", + "ul", + "video" +]; + +},{}],6:[function(require,module,exports){ + +},{}],7:[function(require,module,exports){ 'use strict'; var voidElements = require('void-elements'); @@ -684,51 +760,7 @@ function next(prev, current) { module.exports = collapseWhitespace; -},{"block-elements":5,"void-elements":6}],5:[function(require,module,exports){ -/** - * This file automatically generated from `build.js`. - * Do not manually edit. - */ - -module.exports = [ - "address", - "article", - "aside", - "audio", - "blockquote", - "canvas", - "dd", - "div", - "dl", - "fieldset", - "figcaption", - "figure", - "footer", - "form", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "header", - "hgroup", - "hr", - "main", - "nav", - "noscript", - "ol", - "output", - "p", - "pre", - "section", - "table", - "tfoot", - "ul", - "video" -]; - -},{}],6:[function(require,module,exports){ +},{"block-elements":5,"void-elements":8}],8:[function(require,module,exports){ /** * This file automatically generated from `pre-publish.js`. * Do not manually edit. @@ -753,7 +785,5 @@ module.exports = { "wbr": true }; -},{}],7:[function(require,module,exports){ - },{}]},{},[1])(1) }); \ No newline at end of file diff --git a/index.js b/index.js index de67853b..b520a81a 100644 --- a/index.js +++ b/index.js @@ -12,20 +12,9 @@ var toMarkdown; var converters; var mdConverters = require('./lib/md-converters'); var gfmConverters = require('./lib/gfm-converters'); +var HtmlParser = require('./lib/html-parser'); var collapse = require('collapse-whitespace'); -/* - * Set up window and document for Node.js - */ - -var _window = (typeof window !== 'undefined' ? window : this), _document; -if (typeof document === 'undefined') { - _document = require('jsdom').jsdom(); -} -else { - _document = document; -} - /* * Utilities */ @@ -55,46 +44,9 @@ function isVoid(node) { return voids.indexOf(node.nodeName.toLowerCase()) !== -1; } -/* - * Parsing HTML strings - */ - -function canParseHtml() { - var Parser = _window.DOMParser, canParse = false; - - // Adapted from https://gist.github.com/1129031 - // Firefox/Opera/IE throw errors on unsupported types - try { - // WebKit returns null on unsupported types - if (new Parser().parseFromString('', 'text/html')) { - canParse = true; - } - } catch (e) {} - return canParse; -} - -function createHtmlParser() { - var Parser = function () {}; - - Parser.prototype.parseFromString = function (string) { - var newDoc = _document.implementation.createHTMLDocument(''); - - if (string.toLowerCase().indexOf(' -1) { - newDoc.documentElement.innerHTML = string; - } - else { - newDoc.body.innerHTML = string; - } - return newDoc; - }; - return Parser; -} - -var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser(); - function htmlToDom(string) { var tree = new HtmlParser().parseFromString(string, 'text/html'); - collapse(tree, isBlock); + collapse(tree.documentElement, isBlock); return tree; } diff --git a/lib/html-parser.js b/lib/html-parser.js new file mode 100644 index 00000000..778d8ef2 --- /dev/null +++ b/lib/html-parser.js @@ -0,0 +1,76 @@ +/* + * Set up window for Node.js + */ + +var _window = (typeof window !== 'undefined' ? window : this); + +/* + * Parsing HTML strings + */ + +function canParseHtmlNatively () { + var Parser = _window.DOMParser + var canParse = false; + + // Adapted from https://gist.github.com/1129031 + // Firefox/Opera/IE throw errors on unsupported types + try { + // WebKit returns null on unsupported types + if (new Parser().parseFromString('', 'text/html')) { + canParse = true; + } + } catch (e) {} + + return canParse; +} + +function createHtmlParser () { + var Parser = function () {}; + + // For Node.js environments + if (typeof document === 'undefined') { + var jsdom = require('jsdom'); + Parser.prototype.parseFromString = function (string) { + return jsdom.jsdom(string, { + features: { + FetchExternalResources: [], + ProcessExternalResources: false + } + }); + }; + } else { + if (!shouldUseActiveX()) { + Parser.prototype.parseFromString = function (string) { + var doc = document.implementation.createHTMLDocument(''); + doc.open(); + doc.write(string); + doc.close(); + return doc; + }; + } else { + Parser.prototype.parseFromString = function (string) { + var doc = new ActiveXObject('htmlfile'); + doc.designMode = 'on'; // disable on-page scripts + doc.open(); + doc.write(string); + doc.close(); + return doc; + }; + } + } + return Parser; +} + +function shouldUseActiveX () { + var useActiveX = false; + + try { + document.implementation.createHTMLDocument('').open(); + } catch (e) { + if (window.ActiveXObject) useActiveX = true; + } + + return useActiveX; +} + +module.exports = canParseHtmlNatively() ? _window.DOMParser : createHtmlParser() diff --git a/package.json b/package.json index 42762ce0..bddfe8e0 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ }, "dependencies": { "collapse-whitespace": "1.1.2", - "jsdom": "^6.5.1" + "jsdom": "^8.1.0" }, "engines": { "node": "^4" diff --git a/test/to-markdown-test.js b/test/to-markdown-test.js index 13e5fe90..46c2c7cb 100644 --- a/test/to-markdown-test.js +++ b/test/to-markdown-test.js @@ -436,3 +436,9 @@ asyncTest('img[onerror]', 1, function () { start(); equal(toMarkdown('>\'>">'), '>\'>">![](x)', 'We expect img[onerror] functions not to run'); }); + +test('malformed documents', function() { + expect(0); // just make sure to-markdown doesn't crash + var html = ''; + toMarkdown(html); +});