Permalink
Browse files

use new haxe.xml.Parser for JS : faster, not based on Regexp but stat…

…e machine

git-svn-id: http://haxe.googlecode.com/svn/trunk@4486 f16182fa-f095-11de-8f43-4547254af6c6
  • Loading branch information...
ncannasse
ncannasse committed Apr 20, 2012
1 parent 7743b22 commit 8bae7d8a8bea74e77f57fa07c1ec5ae8de179128
Showing with 286 additions and 111 deletions.
  1. +1 −0 doc/CHANGES.txt
  2. +284 −0 std/haxe/xml/Parser.hx
  3. +1 −111 std/js/_std/Xml.hx
View
@@ -8,6 +8,7 @@
js : forbid static 'length' (issue since object is a Function)
all : does not allow overriding var/prop
flash : removed wrapping for Xml nodes, use instead specific compare when comparing two typed nodes
+ js : use new haxe.xml.Parser (faster, not based on Regexp)
2012-04-14: 2.09
all : optimized const == const and const != const (with different const types)
View
@@ -0,0 +1,284 @@
+package haxe.xml;
+
+using StringTools;
+
+/* poor'man enum : reduce code size + a bit faster since inlined */
+extern private class S {
+ public static inline var IGNORE_SPACES = 0;
+ public static inline var BEGIN = 1;
+ public static inline var BEGIN_NODE = 2;
+ public static inline var TAG_NAME = 3;
+ public static inline var BODY = 4;
+ public static inline var ATTRIB_NAME = 5;
+ public static inline var EQUALS = 6;
+ public static inline var ATTVAL_BEGIN = 7;
+ public static inline var ATTRIB_VAL = 8;
+ public static inline var CHILDS = 9;
+ public static inline var CLOSE = 10;
+ public static inline var WAIT_END = 11;
+ public static inline var WAIT_END_RET = 12;
+ public static inline var PCDATA = 13;
+ public static inline var HEADER = 14;
+ public static inline var COMMENT = 15;
+ public static inline var DOCTYPE = 16;
+ public static inline var CDATA = 17;
+}
+
+class Parser
+{
+ static public function parse(str:String)
+ {
+ var doc = Xml.createDocument();
+ doParse(str, 0, doc);
+ return doc;
+ }
+
+ static function doParse(str:String, ?p:Int = 0, ?parent:Xml):Int
+ {
+ var xml:Xml = null;
+ var state = S.BEGIN;
+ var next = S.BEGIN;
+ var aname = null;
+ var start = 0;
+ var nsubs = 0;
+ var nbrackets = 0;
+ var c = str.fastCodeAt(p);
+
+ while (!c.isEOF())
+ {
+ switch(state)
+ {
+ case S.IGNORE_SPACES:
+ switch(c)
+ {
+ case
+ '\n'.code,
+ '\r'.code,
+ '\t'.code,
+ ' '.code:
+ default:
+ state = next;
+ continue;
+ }
+ case S.BEGIN:
+ switch(c)
+ {
+ case '<'.code:
+ state = S.IGNORE_SPACES;
+ next = S.BEGIN_NODE;
+ default:
+ start = p;
+ state = S.PCDATA;
+ continue;
+ }
+ case S.PCDATA:
+ if (c == '<'.code)
+ {
+ var child = Xml.createPCData(str.substr(start, p - start));
+ parent.addChild(child);
+ nsubs++;
+ state = S.IGNORE_SPACES;
+ next = S.BEGIN_NODE;
+ }
+ case S.CDATA:
+ if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code)
+ {
+ var child = Xml.createCData(str.substr(start, p - start));
+ parent.addChild(child);
+ nsubs++;
+ p += 2;
+ state = S.BEGIN;
+ }
+ case S.BEGIN_NODE:
+ switch(c)
+ {
+ case '!'.code:
+ if (str.fastCodeAt(p + 1) == '['.code)
+ {
+ p += 2;
+ if (str.substr(p, 6).toUpperCase() != "CDATA[")
+ throw("Expected <![CDATA[");
+ p += 5;
+ state = S.CDATA;
+ start = p + 1;
+ }
+ else if (str.fastCodeAt(p + 1) == 'D'.code || str.fastCodeAt(p + 1) == 'd'.code)
+ {
+ if(str.substr(p + 2, 6).toUpperCase() != "OCTYPE")
+ throw("Expected <!DOCTYPE");
+ p += 8;
+ state = S.DOCTYPE;
+ start = p + 1;
+ }
+ else if( str.fastCodeAt(p + 1) != '-'.code || str.fastCodeAt(p + 2) != '-'.code )
+ throw("Expected <!--");
+ else
+ {
+ p += 2;
+ state = S.COMMENT;
+ start = p + 1;
+ }
+ case '?'.code:
+ state = S.HEADER;
+ start = p;
+ case '/'.code:
+ if( parent == null )
+ throw("Expected node name");
+ start = p + 1;
+ state = S.IGNORE_SPACES;
+ next = S.CLOSE;
+ default:
+ state = S.TAG_NAME;
+ start = p;
+ continue;
+ }
+ case S.TAG_NAME:
+ if (!isValidChar(c))
+ {
+ if( p == start )
+ throw("Expected node name");
+ xml = Xml.createElement(str.substr(start, p - start));
+ parent.addChild(xml);
+ state = S.IGNORE_SPACES;
+ next = S.BODY;
+ continue;
+ }
+ case S.BODY:
+ switch(c)
+ {
+ case '/'.code:
+ state = S.WAIT_END;
+ nsubs++;
+ case '>'.code:
+ state = S.CHILDS;
+ nsubs++;
+ default:
+ state = S.ATTRIB_NAME;
+ start = p;
+ continue;
+ }
+ case S.ATTRIB_NAME:
+ if (!isValidChar(c))
+ {
+ var tmp;
+ if( start == p )
+ throw("Expected attribute name");
+ tmp = str.substr(start,p-start);
+ aname = tmp;
+ if( xml.exists(aname) )
+ throw("Duplicate attribute");
+ state = S.IGNORE_SPACES;
+ next = S.EQUALS;
+ continue;
+ }
+ case S.EQUALS:
+ switch(c)
+ {
+ case '='.code:
+ state = S.IGNORE_SPACES;
+ next = S.ATTVAL_BEGIN;
+ default:
+ throw("Expected =");
+ }
+ case S.ATTVAL_BEGIN:
+ switch(c)
+ {
+ case '"'.code, '\''.code:
+ state = S.ATTRIB_VAL;
+ start = p;
+ default:
+ throw("Expected \"");
+ }
+ case S.ATTRIB_VAL:
+ if (c == str.fastCodeAt(start))
+ {
+ var val = str.substr(start+1,p-start-1);
+ xml.set(aname, val);
+ state = S.IGNORE_SPACES;
+ next = S.BODY;
+ }
+ case S.CHILDS:
+ p = doParse(str, p, xml);
+ start = p;
+ state = S.BEGIN;
+ case S.WAIT_END:
+ switch(c)
+ {
+ case '>'.code:
+ state = S.BEGIN;
+ default :
+ throw("Expected >");
+ }
+ case S.WAIT_END_RET:
+ switch(c)
+ {
+ case '>'.code:
+ if( nsubs == 0 )
+ parent.addChild(Xml.createPCData(""));
+ return p;
+ default :
+ throw("Expected >");
+ }
+ case S.CLOSE:
+ if (!isValidChar(c))
+ {
+ if( start == p )
+ throw("Expected node name");
+
+ var v = str.substr(start,p - start);
+ if (v != parent.nodeName)
+ throw "Expected </" +parent.nodeName + ">";
+
+ state = S.IGNORE_SPACES;
+ next = S.WAIT_END_RET;
+ continue;
+ }
+ case S.COMMENT:
+ if (c == '-'.code && str.fastCodeAt(p +1) == '-'.code && str.fastCodeAt(p + 2) == '>'.code)
+ {
+ parent.addChild(Xml.createComment(str.substr(start, p - start)));
+ p += 2;
+ state = S.BEGIN;
+ }
+ case S.DOCTYPE:
+ if(c == '['.code)
+ nbrackets++;
+ else if(c == ']'.code)
+ nbrackets--;
+ else if (c == '>'.code && nbrackets == 0)
+ {
+ parent.addChild(Xml.createDocType(str.substr(start, p - start)));
+ state = S.BEGIN;
+ }
+ case S.HEADER:
+ if (c == '?'.code && str.fastCodeAt(p + 1) == '>'.code)
+ {
+ p++;
+ var str = str.substr(start + 1, p - start - 2);
+ parent.addChild(Xml.createProlog(str));
+ state = S.BEGIN;
+ }
+ }
+ c = str.fastCodeAt(++p);
+ }
+
+ if (state == S.BEGIN)
+ {
+ start = p;
+ state = S.PCDATA;
+ }
+
+ if (state == S.PCDATA)
+ {
+ if (p != start || nsubs == 0)
+ parent.addChild(Xml.createPCData(str.substr(start, p - start)));
+ return p;
+ }
+
+ throw "Unexpected end";
+ }
+
+ static inline function isValidChar(c) {
+ return (c >= 'a'.code && c <= 'z'.code) || (c >= 'A'.code && c <= 'Z'.code) || (c >= '0'.code && c <= '9'.code) || c == ':'.code || c == '.'.code || c == '_'.code || c == '-'.code;
+ }
+}
Oops, something went wrong.

0 comments on commit 8bae7d8

Please sign in to comment.