Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added .rl files to generate HpricotScanner.cs using Ragel
- Loading branch information
Showing
3 changed files
with
497 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,8 @@ | ||
* The current version of ironruby-hpricot is based off of hpricot 0.6.164, | ||
but I'm already starting to work on aligning its code base to new features | ||
from 0.6.207 (see _why's git repository http://github.com/why/hpricot/). | ||
|
||
* HpricotScanner.rl is still missing: I think I'll give an higher priority to | ||
this task to be able to easily test the generation of HpricotScanner.cs by | ||
passing -F1 and -G1 flags to Ragel, just to see which one is the best | ||
speed and memory wise. | ||
|
||
* README needs to be populated with notes and building instructions. The code | ||
needs the right dose of comments. | ||
|
||
* The current version of ironruby-hpricot is based off of hpricot 0.6.164, | ||
but I'm already starting to work on aligning its code base to new features | ||
from 0.6.207 (see _why's git repository http://github.com/why/hpricot/). | ||
|
||
* README needs to be populated with notes and building instructions. The code | ||
needs the right dose of comments. | ||
|
||
* Add tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
%%{ | ||
|
||
machine hpricot_common; | ||
|
||
# | ||
# HTML tokens | ||
# (a blatant rip from HTree) | ||
# | ||
newline = '\n' @{curline += 1;} ; | ||
NameChar = [\-A-Za-z0-9._:?] ; | ||
Name = [A-Za-z_:] NameChar* ; | ||
StartComment = "<!--" ; | ||
EndComment = "-->" ; | ||
StartCdata = "<![CDATA[" ; | ||
EndCdata = "]]>" ; | ||
|
||
NameCap = Name >_tag %tag; | ||
NameAttr = NameChar+ >_akey %akey ; | ||
Q1Char = ( "\\\'" | [^'] ) ; | ||
Q1Attr = Q1Char* >_aval %aval ; | ||
Q2Char = ( "\\\"" | [^"] ) ; | ||
Q2Attr = Q2Char* >_aval %aval ; | ||
UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ; | ||
Nmtoken = NameChar+ >_akey %akey ; | ||
|
||
Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; | ||
AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; | ||
AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; | ||
StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; | ||
EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; | ||
|
||
EndTag = "</" NameCap space* ">" ; | ||
XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; | ||
XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; | ||
XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; | ||
XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; | ||
XmlYesNo = ("yes" | "no") >_aval %xmlsd ; | ||
XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; | ||
XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?"? ">" ; | ||
|
||
SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; | ||
PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' | | ||
"'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; | ||
ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; | ||
DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ; | ||
StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ; | ||
EndXmlProcIns = "?"? ">" ; | ||
|
||
html_comment := |* | ||
EndComment @{ EBLK(comment, 3); fgoto main; }; | ||
any | newline { TEXT_PASS(); }; | ||
*|; | ||
|
||
html_cdata := |* | ||
EndCdata @{ EBLK(cdata, 3); fgoto main; }; | ||
any | newline { TEXT_PASS(); }; | ||
*|; | ||
|
||
html_procins := |* | ||
EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; | ||
any | newline { TEXT_PASS(); }; | ||
*|; | ||
|
||
main := |* | ||
XmlDecl >newEle { ELE(xmldecl); }; | ||
DocType >newEle { ELE(doctype); }; | ||
StartXmlProcIns >newEle { fgoto html_procins; }; | ||
StartTag >newEle { ELE(stag); }; | ||
EndTag >newEle { ELE(etag); }; | ||
EmptyTag >newEle { ELE(emptytag); }; | ||
StartComment >newEle { fgoto html_comment; }; | ||
StartCdata >newEle { fgoto html_cdata; }; | ||
any | newline { TEXT_PASS(); }; | ||
*|; | ||
|
||
}%%; |
Oops, something went wrong.