Skip to content

Commit

Permalink
Added .rl files to generate HpricotScanner.cs using Ragel
Browse files Browse the repository at this point in the history
  • Loading branch information
nrk committed Feb 22, 2009
1 parent 9143b59 commit c6f2802
Show file tree
Hide file tree
Showing 3 changed files with 497 additions and 12 deletions.
19 changes: 7 additions & 12 deletions TODO
@@ -1,13 +1,8 @@
* The current version of ironruby-hpricot is based off of hpricot 0.6.164,
but I'm already starting to work on aligning its code base to new features
from 0.6.207 (see _why's git repository http://github.com/why/hpricot/).

* HpricotScanner.rl is still missing: I think I'll give an higher priority to
this task to be able to easily test the generation of HpricotScanner.cs by
passing -F1 and -G1 flags to Ragel, just to see which one is the best
speed and memory wise.

* README needs to be populated with notes and building instructions. The code
needs the right dose of comments.

* The current version of ironruby-hpricot is based off of hpricot 0.6.164,
but I'm already starting to work on aligning its code base to new features
from 0.6.207 (see _why's git repository http://github.com/why/hpricot/).

* README needs to be populated with notes and building instructions. The code
needs the right dose of comments.

* Add tests
76 changes: 76 additions & 0 deletions src/Ragel/HpricotScanner.common.rl
@@ -0,0 +1,76 @@
%%{

machine hpricot_common;

#
# HTML tokens
# (a blatant rip from HTree)
#
newline = '\n' @{curline += 1;} ;
NameChar = [\-A-Za-z0-9._:?] ;
Name = [A-Za-z_:] NameChar* ;
StartComment = "<!--" ;
EndComment = "-->" ;
StartCdata = "<![CDATA[" ;
EndCdata = "]]>" ;

NameCap = Name >_tag %tag;
NameAttr = NameChar+ >_akey %akey ;
Q1Char = ( "\\\'" | [^'] ) ;
Q1Attr = Q1Char* >_aval %aval ;
Q2Char = ( "\\\"" | [^"] ) ;
Q2Attr = Q2Char* >_aval %aval ;
UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ;
Nmtoken = NameChar+ >_akey %akey ;

Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ;
AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ;
AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ;
StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">";
EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ;

EndTag = "</" NameCap space* ">" ;
XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ;
XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ;
XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ;
XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ;
XmlYesNo = ("yes" | "no") >_aval %xmlsd ;
XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ;
XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?"? ">" ;

SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ;
PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' |
"'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ;
ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ;
DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ;
StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ;
EndXmlProcIns = "?"? ">" ;

html_comment := |*
EndComment @{ EBLK(comment, 3); fgoto main; };
any | newline { TEXT_PASS(); };
*|;

html_cdata := |*
EndCdata @{ EBLK(cdata, 3); fgoto main; };
any | newline { TEXT_PASS(); };
*|;

html_procins := |*
EndXmlProcIns @{ EBLK(procins, 2); fgoto main; };
any | newline { TEXT_PASS(); };
*|;

main := |*
XmlDecl >newEle { ELE(xmldecl); };
DocType >newEle { ELE(doctype); };
StartXmlProcIns >newEle { fgoto html_procins; };
StartTag >newEle { ELE(stag); };
EndTag >newEle { ELE(etag); };
EmptyTag >newEle { ELE(emptytag); };
StartComment >newEle { fgoto html_comment; };
StartCdata >newEle { fgoto html_cdata; };
any | newline { TEXT_PASS(); };
*|;

}%%;

0 comments on commit c6f2802

Please sign in to comment.