Permalink
Browse files

Added backslash as escape character for content parts of a tag litera…

…l (attribute values and non-tag tag body), not to the name because that didnt make much sense
  • Loading branch information...
1 parent 7858a65 commit 86103aecb74a44d6b1b2b4f6b7382b6e25fd5a2f Peter van der Zee committed Apr 4, 2012
Showing with 10 additions and 3 deletions.
  1. +10 −3 Tokenizer.js
View
13 Tokenizer.js
@@ -726,13 +726,20 @@ Tokenizer.Unidocde = window.Unicode;
Tokenizer.regexNumber = /^(?:(0[xX][0-9A-Fa-f]+)|((?:(?:(?:(?:[0-9]+)(?:\.[0-9]*)?))|(?:\.[0-9]+))(?:[eE][-+]?[0-9]{1,})?))/;
Tokenizer.regexNormalizeNewlines = /(\u000D[^\u000A])|[\u2028\u2029]/;
// tag parsing regex
+ // ws name (must start with non-number-or-dash)
Tokenizer.regexTagName = /[^\S]*([a-zA-Z][a-zA-Z0-9-]*)/g;
-Tokenizer.regexTagAttributes = /[^\S]+([a-zA-Z0-9-]+)(?:=(?:(?:"([^"]*?)")|(?:'([^']*?)')))?/g;
+ // ws attrname "..[\"].." '..[\']..'
+Tokenizer.regexTagAttributes = /[^\S]+([a-zA-Z0-9-]+)(?:=(?:(?:"((?:(?:\\.)|(?:[^"]))*?)")|(?:'((?:(?:\\')|(?:[^']))*?)')))?/g;
+ // ws />
Tokenizer.regexTagUnarySuffix = /[^\S]*\/[^\S]*>/g;
+ // ws >
Tokenizer.regexTagBinarySuffix = /[^\S]*?>/g;
-Tokenizer.regexTagBody = /([^<]*)/g;
+ // anything as long as its not a <, unless preceeded by \
+Tokenizer.regexTagBody = /((?:(?:\\.)|(?:[^<]))*)/g;
+ // < ws /> / (?? TOFIX not sure whether this is correct or intentional...)
Tokenizer.regexTagOpenOrClose = /<[^\S]*[\/>]*\//g;
-Tokenizer.regexTagClose = /<[^\S]*\/[^\S]*([a-zA-Z0-9-]+)[^\S]*>/g;
+ // < ws / ws name ws >
+Tokenizer.regexTagClose = /<[^\S]*\/[^\S]*([a-zA-Z][a-zA-Z0-9-]*)[^\S]*>/g;
// 1 ws 2 lt 3 scmt 4 mcmt 5/6 str 7 nr 8 rx 9 dom 10 punc

0 comments on commit 86103ae

Please sign in to comment.