Skip to content
Browse files

use new haxe.xml.Parser for JS : faster, not based on Regexp but stat…

…e machine

git-svn-id: http://haxe.googlecode.com/svn/trunk@4486 f16182fa-f095-11de-8f43-4547254af6c6
  • Loading branch information...
1 parent 7743b22 commit 8bae7d8a8bea74e77f57fa07c1ec5ae8de179128 ncannasse committed Apr 20, 2012
Showing with 286 additions and 111 deletions.
  1. +1 −0 doc/CHANGES.txt
  2. +284 −0 std/haxe/xml/Parser.hx
  3. +1 −111 std/js/_std/Xml.hx
View
1 doc/CHANGES.txt
@@ -8,6 +8,7 @@
js : forbid static 'length' (issue since object is a Function)
all : does not allow overriding var/prop
flash : removed wrapping for Xml nodes, use instead specific compare when comparing two typed nodes
+ js : use new haxe.xml.Parser (faster, not based on Regexp)
2012-04-14: 2.09
all : optimized const == const and const != const (with different const types)
View
284 std/haxe/xml/Parser.hx
@@ -0,0 +1,284 @@
+package haxe.xml;
+
+using StringTools;
+
+/* poor'man enum : reduce code size + a bit faster since inlined */
+extern private class S {
+ public static inline var IGNORE_SPACES = 0;
+ public static inline var BEGIN = 1;
+ public static inline var BEGIN_NODE = 2;
+ public static inline var TAG_NAME = 3;
+ public static inline var BODY = 4;
+ public static inline var ATTRIB_NAME = 5;
+ public static inline var EQUALS = 6;
+ public static inline var ATTVAL_BEGIN = 7;
+ public static inline var ATTRIB_VAL = 8;
+ public static inline var CHILDS = 9;
+ public static inline var CLOSE = 10;
+ public static inline var WAIT_END = 11;
+ public static inline var WAIT_END_RET = 12;
+ public static inline var PCDATA = 13;
+ public static inline var HEADER = 14;
+ public static inline var COMMENT = 15;
+ public static inline var DOCTYPE = 16;
+ public static inline var CDATA = 17;
+}
+
+class Parser
+{
+ static public function parse(str:String)
+ {
+ var doc = Xml.createDocument();
+ doParse(str, 0, doc);
+ return doc;
+ }
+
+ static function doParse(str:String, ?p:Int = 0, ?parent:Xml):Int
+ {
+ var xml:Xml = null;
+ var state = S.BEGIN;
+ var next = S.BEGIN;
+ var aname = null;
+ var start = 0;
+ var nsubs = 0;
+ var nbrackets = 0;
+ var c = str.fastCodeAt(p);
+
+ while (!c.isEOF())
+ {
+ switch(state)
+ {
+ case S.IGNORE_SPACES:
+ switch(c)
+ {
+ case
+ '\n'.code,
+ '\r'.code,
+ '\t'.code,
+ ' '.code:
+ default:
+ state = next;
+ continue;
+ }
+ case S.BEGIN:
+ switch(c)
+ {
+ case '<'.code:
+ state = S.IGNORE_SPACES;
+ next = S.BEGIN_NODE;
+ default:
+ start = p;
+ state = S.PCDATA;
+ continue;
+ }
+ case S.PCDATA:
+ if (c == '<'.code)
+ {
+ var child = Xml.createPCData(str.substr(start, p - start));
+ parent.addChild(child);
+ nsubs++;
+ state = S.IGNORE_SPACES;
+ next = S.BEGIN_NODE;
+ }
+ case S.CDATA:
+ if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code)
+ {
+ var child = Xml.createCData(str.substr(start, p - start));
+ parent.addChild(child);
+ nsubs++;
+ p += 2;
+ state = S.BEGIN;
+ }
+ case S.BEGIN_NODE:
+ switch(c)
+ {
+ case '!'.code:
+ if (str.fastCodeAt(p + 1) == '['.code)
+ {
+ p += 2;
+ if (str.substr(p, 6).toUpperCase() != "CDATA[")
+ throw("Expected <![CDATA[");
+ p += 5;
+ state = S.CDATA;
+ start = p + 1;
+ }
+ else if (str.fastCodeAt(p + 1) == 'D'.code || str.fastCodeAt(p + 1) == 'd'.code)
+ {
+ if(str.substr(p + 2, 6).toUpperCase() != "OCTYPE")
+ throw("Expected <!DOCTYPE");
+ p += 8;
+ state = S.DOCTYPE;
+ start = p + 1;
+ }
+ else if( str.fastCodeAt(p + 1) != '-'.code || str.fastCodeAt(p + 2) != '-'.code )
+ throw("Expected <!--");
+ else
+ {
+ p += 2;
+ state = S.COMMENT;
+ start = p + 1;
+ }
+ case '?'.code:
+ state = S.HEADER;
+ start = p;
+ case '/'.code:
+ if( parent == null )
+ throw("Expected node name");
+ start = p + 1;
+ state = S.IGNORE_SPACES;
+ next = S.CLOSE;
+ default:
+ state = S.TAG_NAME;
+ start = p;
+ continue;
+ }
+ case S.TAG_NAME:
+ if (!isValidChar(c))
+ {
+ if( p == start )
+ throw("Expected node name");
+ xml = Xml.createElement(str.substr(start, p - start));
+ parent.addChild(xml);
+ state = S.IGNORE_SPACES;
+ next = S.BODY;
+ continue;
+ }
+ case S.BODY:
+ switch(c)
+ {
+ case '/'.code:
+ state = S.WAIT_END;
+ nsubs++;
+ case '>'.code:
+ state = S.CHILDS;
+ nsubs++;
+ default:
+ state = S.ATTRIB_NAME;
+ start = p;
+ continue;
+ }
+ case S.ATTRIB_NAME:
+ if (!isValidChar(c))
+ {
+ var tmp;
+ if( start == p )
+ throw("Expected attribute name");
+ tmp = str.substr(start,p-start);
+ aname = tmp;
+ if( xml.exists(aname) )
+ throw("Duplicate attribute");
+ state = S.IGNORE_SPACES;
+ next = S.EQUALS;
+ continue;
+ }
+ case S.EQUALS:
+ switch(c)
+ {
+ case '='.code:
+ state = S.IGNORE_SPACES;
+ next = S.ATTVAL_BEGIN;
+ default:
+ throw("Expected =");
+ }
+ case S.ATTVAL_BEGIN:
+ switch(c)
+ {
+ case '"'.code, '\''.code:
+ state = S.ATTRIB_VAL;
+ start = p;
+ default:
+ throw("Expected \"");
+ }
+ case S.ATTRIB_VAL:
+ if (c == str.fastCodeAt(start))
+ {
+ var val = str.substr(start+1,p-start-1);
+ xml.set(aname, val);
+ state = S.IGNORE_SPACES;
+ next = S.BODY;
+ }
+ case S.CHILDS:
+ p = doParse(str, p, xml);
+ start = p;
+ state = S.BEGIN;
+ case S.WAIT_END:
+ switch(c)
+ {
+ case '>'.code:
+ state = S.BEGIN;
+ default :
+ throw("Expected >");
+ }
+ case S.WAIT_END_RET:
+ switch(c)
+ {
+ case '>'.code:
+ if( nsubs == 0 )
+ parent.addChild(Xml.createPCData(""));
+ return p;
+ default :
+ throw("Expected >");
+ }
+ case S.CLOSE:
+ if (!isValidChar(c))
+ {
+ if( start == p )
+ throw("Expected node name");
+
+ var v = str.substr(start,p - start);
+ if (v != parent.nodeName)
+ throw "Expected </" +parent.nodeName + ">";
+
+ state = S.IGNORE_SPACES;
+ next = S.WAIT_END_RET;
+ continue;
+ }
+ case S.COMMENT:
+ if (c == '-'.code && str.fastCodeAt(p +1) == '-'.code && str.fastCodeAt(p + 2) == '>'.code)
+ {
+ parent.addChild(Xml.createComment(str.substr(start, p - start)));
+ p += 2;
+ state = S.BEGIN;
+ }
+ case S.DOCTYPE:
+ if(c == '['.code)
+ nbrackets++;
+ else if(c == ']'.code)
+ nbrackets--;
+ else if (c == '>'.code && nbrackets == 0)
+ {
+ parent.addChild(Xml.createDocType(str.substr(start, p - start)));
+ state = S.BEGIN;
+ }
+ case S.HEADER:
+ if (c == '?'.code && str.fastCodeAt(p + 1) == '>'.code)
+ {
+ p++;
+ var str = str.substr(start + 1, p - start - 2);
+ parent.addChild(Xml.createProlog(str));
+ state = S.BEGIN;
+ }
+ }
+ c = str.fastCodeAt(++p);
+ }
+
+ if (state == S.BEGIN)
+ {
+ start = p;
+ state = S.PCDATA;
+ }
+
+ if (state == S.PCDATA)
+ {
+ if (p != start || nsubs == 0)
+ parent.addChild(Xml.createPCData(str.substr(start, p - start)));
+ return p;
+ }
+
+ throw "Unexpected end";
+ }
+
+ static inline function isValidChar(c) {
+ return (c >= 'a'.code && c <= 'z'.code) || (c >= 'A'.code && c <= 'Z'.code) || (c >= '0'.code && c <= '9'.code) || c == ':'.code || c == '.'.code || c == '_'.code || c == '-'.code;
+ }
+}
View
112 std/js/_std/Xml.hx
@@ -36,20 +36,6 @@ enum XmlType {
public static var Prolog(default,null) : XmlType;
public static var Document(default,null) : XmlType;
- static var enode = ~/^<([a-zA-Z0-9:._-]+)/;
- static var ecdata = ~/^<!\[CDATA\[/i;
- static var edoctype = ~/^<!DOCTYPE /i;
- static var eend = ~/^<\/([a-zA-Z0-9:._-]+)>/;
- static var epcdata = ~/^[^<]+/;
- static var ecomment = ~/^<!--/;
- static var eprolog = ~/^<\?[^\?]+\?>/;
-
- static var eattribute = ~/^\s*([a-zA-Z0-9:_-]+)\s*=\s*(["'])([^\2]*?)\2/; //"
- static var eclose = ~/^[ \r\n\t]*(>|(\/>))/;
- static var ecdata_end = ~/\]\]>/;
- static var edoctype_elt = ~/[\[|\]>]/;
- static var ecomment_end = ~/-->/;
-
public var nodeType(default,null) : XmlType;
public var nodeName(getNodeName,setNodeName) : String;
public var nodeValue(getNodeValue,setNodeValue) : String;
@@ -62,103 +48,7 @@ enum XmlType {
var _parent : Xml;
public static function parse( str : String ) : Xml {
- var rules = [enode,epcdata,eend,ecdata,edoctype,ecomment,eprolog];
- var nrules = rules.length;
- var current = Xml.createDocument();
-
- var stack = new List();
- while( str.length > 0 ) {
- var i = 0;
- while( i < nrules ) {
- var r = rules[i];
- if( r.match(str) ) {
- switch( i ) {
- case 0: // Node
- var x = Xml.createElement(r.matched(1));
- current.addChild(x);
- str = r.matchedRight();
- while( eattribute.match(str) ) {
- x.set(eattribute.matched(1),eattribute.matched(3));
- str = eattribute.matchedRight();
- }
- if( !eclose.match(str) ) {
- i = nrules;
- break;
- }
- if( eclose.matched(1) == ">" ) {
- stack.push(current);
- current = x;
- }
- str = eclose.matchedRight();
- case 1: // PCData
- var x = Xml.createPCData(r.matched(0));
- current.addChild(x);
- str = r.matchedRight();
- case 2: // End Node
- untyped if( current._children != null && current._children.length == 0 ) {
- var e = Xml.createPCData("");
- current.addChild(e);
- }
- untyped if( r.matched(1) != current._nodeName || stack.isEmpty() ) {
- i = nrules;
- break;
- }
- current = stack.pop();
- str = r.matchedRight();
- case 3: // CData
- str = r.matchedRight();
- if( !ecdata_end.match(str) )
- throw "End of CDATA section not found";
- var x = Xml.createCData(ecdata_end.matchedLeft());
- current.addChild(x);
- str = ecdata_end.matchedRight();
- case 4: // DocType
- var pos = 0;
- var count = 0;
- var old = str;
- while( true ) {
- if( !edoctype_elt.match(str) )
- throw "End of DOCTYPE section not found";
- var p = edoctype_elt.matchedPos();
- pos += p.pos + p.len;
- str = edoctype_elt.matchedRight();
- switch( edoctype_elt.matched(0) ) {
- case "[": count++;
- case "]": count--; if( count < 0 ) throw "Invalid ] found in DOCTYPE declaration";
- default:
- if( count == 0 )
- break;
- }
- }
- var x = Xml.createDocType(old.substr(10,pos-11));
- current.addChild(x);
- case 5: // Comment
- if( !ecomment_end.match(str) )
- throw "Unclosed Comment";
- var p = ecomment_end.matchedPos();
- var x = Xml.createComment(str.substr(4,p.pos+p.len-7));
- current.addChild(x);
- str = ecomment_end.matchedRight();
- case 6: // Prolog
- var prolog = r.matched(0);
- var x = Xml.createProlog(prolog.substr(2,prolog.length - 4));
- current.addChild(x);
- str = r.matchedRight();
- }
- break;
- }
- i += 1;
- }
- if( i == nrules ) {
- if( str.length > 10 )
- throw ("Xml parse error : Unexpected "+str.substr(0,10)+"...");
- else
- throw ("Xml parse error : Unexpected "+str);
- }
- }
- if( !stack.isEmpty() )
- throw "Xml parse error : Unclosed "+stack.last().nodeName;
- untyped return current;
+ return haxe.xml.Parser.parse(str);
}
private function new() : Void {

0 comments on commit 8bae7d8

Please sign in to comment.
Something went wrong with that request. Please try again.