Skip to content
This repository has been archived by the owner on Mar 28, 2019. It is now read-only.

Commit

Permalink
first stab at adding SVG parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Pomax committed Jun 21, 2012
1 parent f0206ed commit fd9e6dd
Showing 1 changed file with 82 additions and 39 deletions.
121 changes: 82 additions & 39 deletions slowparse.js
Expand Up @@ -36,7 +36,7 @@ var Slowparse = (function() {
quot: '"',
amp: "&"
};

// `replaceEntityRefs()` will replace named character entity references
// (e.g. `<`) in the given text string and return the result. If an
// entity name is unrecognized, don't replace it at all. Writing HTML
Expand All @@ -52,11 +52,11 @@ var Slowparse = (function() {
return ref;
});
}


// ### Errors
//
// `ParseError` is an internal error class used to indicate a parsing error.
// `ParseError` is an internal error class used to indicate a parsing error.
// It never gets seen by Slowparse clients, as parse errors are an
// expected occurrence. However, they are used internally to simplify
// flow control.
Expand Down Expand Up @@ -90,7 +90,7 @@ var Slowparse = (function() {
// `ParseErrorBuilders` contains Factory functions for all our types of
// parse errors, indexed by error type.
//
// Each public factory function returns a `parseInfo` object, sans the
// Each public factory function returns a `parseInfo` object, sans the
// `type` property. For more information on each type of error,
// see the [error specification][].
//
Expand Down Expand Up @@ -309,7 +309,7 @@ var Slowparse = (function() {
}
}
};

// ### Streams
//
// `Stream` is an internal class used for tokenization. The interface for
Expand Down Expand Up @@ -442,10 +442,10 @@ var Slowparse = (function() {


// ### CSS Parsing
//
//
// `CSSParser` is our internal CSS token stream parser object. This object
// has references to the stream, as well as the HTML DOM builder that is
// used by the HTML parser.
// used by the HTML parser.
function CSSParser(stream, domBuilder) {
this.stream = stream;
this.domBuilder = domBuilder;
Expand Down Expand Up @@ -544,7 +544,7 @@ var Slowparse = (function() {

// * A list of the CSS rulesets for the CSS block.
this.rules = [];

// * A list of comment blocks inside the CSS.
this.comments = [];

Expand Down Expand Up @@ -657,7 +657,7 @@ var Slowparse = (function() {
this.stream.eatCSSWhile(/[^\{;\}<]/);
var token = this.stream.makeToken(),
peek = this.stream.peek();

// If there was nothing to select, we're either done,
// or an error occurred.
if (token === null) {
Expand Down Expand Up @@ -762,7 +762,7 @@ var Slowparse = (function() {
throw new ParseError("MISSING_CSS_BLOCK_CLOSER", this, selectorStart,
selectorStart+value.length, value);
}

// If we're still in this function at this point, all is well
// and we can move on to property parsing.
else {
Expand Down Expand Up @@ -861,16 +861,16 @@ var Slowparse = (function() {
_parseValue: function(selector, selectorStart, property, propertyStart) {
var rule = this.stream.eatCSSWhile(/[^}<;]/),
token = this.stream.makeToken();

if(token === null) {
throw new ParseError("MISSING_CSS_VALUE", this, propertyStart,
propertyStart+property.length, property);
}

var next = (!this.stream.end() ? this.stream.next() : "end of stream"),
errorMsg = "[_parseValue] Expected }, <, or ;, instead found "+next;


this.filterComments(token);
var value = token.value,
valueStart = token.interval.start,
Expand Down Expand Up @@ -916,7 +916,7 @@ var Slowparse = (function() {
}
},
// This helper function binds the currrent `property: value` object
// in the current ruleset, and resets it for the next selector block.
// in the current ruleset, and resets it for the next selector block.
_bindCurrentRule: function() {
this.currentRule.declarations.properties.push(this.currentProperty);
this.currentProperty = null;
Expand All @@ -936,12 +936,21 @@ var Slowparse = (function() {
}

HTMLParser.prototype = {
html5Doctype: '<!DOCTYPE html>',
// Void HTML elements are the ones that don't need to have a closing
// tag.
// since SVG requires a slightly different code path,
// we need to track whether we're in HTML or SVG mode.
parsingSVG: false,

// For SVG DOM elements, we need to know the SVG namespace.
svgNameSpace: "http://www.w3.org/2000/svg",

// HTML5 documents have a special doctype that we must use
html5Doctype: "<!DOCTYPE html>",

// Void HTML elements are the ones that don't need to have a closing tag.
voidHtmlElements: ["area", "base", "br", "col", "command", "embed", "hr",
"img", "input", "keygen", "link", "meta", "param",
"source", "track", "wbr"],

// We keep a list of all valid HTML5 elements.
htmlElements: ["a", "abbr", "address", "area", "article", "aside",
"audio", "b", "base", "bdi", "bdo", "bgsound", "blink",
Expand All @@ -960,6 +969,26 @@ var Slowparse = (function() {
"strong", "style", "sub", "summary", "sup", "table",
"tbody", "td", "textarea", "tfoot", "th", "thead", "time",
"title", "tr", "track", "u", "ul", "var", "video", "wbr"],

// HTML5 allows SVG elements
svgElements: ["a", "altGlyph", "altGlyphDef", "altGlyphItem", "animate",
"animateColor", "animateMotion", "animateTransform", "circle",
"clipPath", "color-profile", "cursor", "defs", "desc",
"ellipse", "feBlend", "feColorMatrix", "feComponentTransfer",
"feComposite", "feConvolveMatrix", "feDiffuseLighting",
"feDisplacementMap", "feDistantLight", "feFlood", "feFuncA",
"feFuncB", "feFuncG", "feFuncR", "feGaussianBlur", "feImage",
"feMerge", "feMergeNode", "feMorphology", "feOffset",
"fePointLight", "feSpecularLighting", "feSpotLight",
"feTile", "feTurbulence", "filter", "font", "font-face",
"font-face-format", "font-face-name", "font-face-src",
"font-face-uri", "foreignObject", "g", "glyph", "glyphRef",
"hkern", "image", "line", "linearGradient", "marker", "mask",
"metadata", "missing-glyph", "mpath", "path", "pattern",
"polygon", "polyline", "radialGradient", "rect", "script",
"set", "stop", "style", "svg", "switch", "symbol", "text",
"textPath", "title", "tref", "tspan", "use", "view", "vkern"],

// We also keep a list of HTML elements that are now obsolete, but
// may still be encountered in the wild on popular sites.
obsoleteHtmlElements: ["acronym", "applet", "basefont", "big", "center",
Expand All @@ -969,11 +998,17 @@ var Slowparse = (function() {
// This is a helper function to determine whether a given string
// is a legal HTML element tag.
_knownHTMLElement: function(tagName) {
return this.voidHtmlElements.indexOf(tagName) > -1 ||
return this.voidHtmlElements.indexOf(tagName) > -1 ||
this.htmlElements.indexOf(tagName) > -1 ||
this.svgElements.indexOf(tagName) > -1 ||
this.obsoleteHtmlElements.indexOf(tagName) > -1;
},
// This is a helper function to determine whether a given string
// is a legal SVG element tag.
_knownSVGElement: function(tagName) {
return this.svgElements.indexOf(tagName) > -1;
},
// This is a helper function to determine whether a given string
// is a void HTML element tag.
_knownVoidHTMLElement: function(tagName) {
return this.voidHtmlElements.indexOf(tagName) > -1;
Expand All @@ -997,7 +1032,7 @@ var Slowparse = (function() {
end: this.stream.pos
}
};

// Next, we parse "tag soup", creating text nodes and diving into
// tags as we find them.
while (!this.stream.end()) {
Expand Down Expand Up @@ -1039,12 +1074,17 @@ var Slowparse = (function() {
this.stream.eatWhile(/[\w\d]/);
var token = this.stream.makeToken();
var tagName = token.value.slice(1).toLowerCase();


if (tagName === "svg")
this.parseSVG = true;

// If the character after the `<` is a `/`, we're on a closing tag.
// We want to report useful errors about whether the tag is unexpected
// or doesn't match with the most recent opening tag.
if (tagName[0] == '/') {
var closeTagName = tagName.slice(1).toLowerCase();
if (closeTagName === "svg")
this.parseSVG = false;
if (this._knownVoidHTMLElement(closeTagName))
throw new ParseError("CLOSE_TAG_FOR_VOID_ELEMENT", this,
closeTagName, token);
Expand All @@ -1060,17 +1100,18 @@ var Slowparse = (function() {
closeTagName, token);
this._parseEndCloseTag();
}

else {
// We want to make sure that opening tags have valid tag names.
if (!(tagName && this._knownHTMLElement(tagName)))
throw new ParseError("INVALID_TAG_NAME", tagName, token);
if (tagName) {
if ((this.parseSVG && !this._knownSVGElement(tagName)) || !this._knownHTMLElement(tagName))
throw new ParseError("INVALID_TAG_NAME", tagName, token);
}

var parseInfo = { openTag: { start: token.interval.start }};
var nameSpace = (this.parseSVG ? this.svgNameSpace : undefined);
this.domBuilder.pushElement(tagName, parseInfo, nameSpace);

this.domBuilder.pushElement(tagName, {
openTag: {
start: token.interval.start
}
});
if (!this.stream.end())
this._parseEndOpenTag(tagName);
}
Expand Down Expand Up @@ -1146,9 +1187,10 @@ var Slowparse = (function() {
this.stream.makeToken();
}
else if (this.stream.peek() == '>' || this.stream.match("/>")) {
if (this.stream.match("/>", true)) {
if (!this._knownVoidHTMLElement(tagName))
throw new ParseError("SELF_CLOSING_NON_VOID_ELEMENT", this,
var selfClosing = this.stream.match("/>", true);
if (selfClosing) {
if (!this.parseSVG && !this._knownVoidHTMLElement(tagName))
throw new ParseError("SELF_CLOSING_NON_VOID_ELEMENT", this,
tagName);
} else
this.stream.next();
Expand All @@ -1157,16 +1199,16 @@ var Slowparse = (function() {

// If the opening tag represents a void element, there will not be
// a closing element, so we tell our DOM builder that we're done.
if (tagName && this._knownVoidHTMLElement(tagName))
if (tagName && ((selfClosing && this._knownSVGElement(tagName)) || this._knownVoidHTMLElement(tagName)))
this.domBuilder.popElement();

// If the opening tag represents a `<style>` element, we hand
// off parsing to our CSS parser.
if (!this.stream.end() && tagName === "style") {
var cssBlock = this.cssParser.parse();
this.domBuilder.text(cssBlock.value, cssBlock.parseInfo);
}

// If the opening tag represents a `<textarea>` element, we need
// to parse all its contents as CDATA (unparsed character data)
if (tagName && tagName === "textarea") {
Expand Down Expand Up @@ -1239,8 +1281,9 @@ var Slowparse = (function() {
// This method pushes a new element onto the DOM builder's stack.
// The element is appended to the currently active element and is
// then made the new currently active element.
pushElement: function(tagName, parseInfo) {
var node = this.document.createElement(tagName);
pushElement: function(tagName, parseInfo, nameSpace) {
var node = (nameSpace ? this.document.createElementNS(nameSpace,tagName)
: this.document.createElement(tagName));
node.parseInfo = parseInfo;
this.currentNode.appendChild(node);
this.currentNode = node;
Expand Down Expand Up @@ -1274,7 +1317,7 @@ var Slowparse = (function() {
};

// ### Exported Symbols
//
//
// `Slowparse` is the object that holds all exported symbols from
// this library.
var Slowparse = {
Expand Down Expand Up @@ -1331,7 +1374,7 @@ var Slowparse = (function() {
} else
throw e;
}

(errorDetectors || []).forEach(function(detector) {
if (!error)
error = detector(html, domBuilder.fragment) || null;
Expand Down

0 comments on commit fd9e6dd

Please sign in to comment.