Permalink
Browse files

Added email address example

  • Loading branch information...
1 parent ed08be6 commit ff42c5f4e08d42fecb95b1b4b3270010bd86e2b4 @mjackson committed Feb 20, 2011
Showing with 339 additions and 0 deletions.
  1. +161 −0 examples/email.citrus
  2. +178 −0 examples/email_test.rb
View
@@ -0,0 +1,161 @@
+# A grammar for email addresses that closely conforms to RFC 5322, with the
+# notable exception that this grammar does not allow for folding white space
+# or comments within atoms.
+grammar EmailAddress
+ root addr-spec
+
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
+ rule ALPHA
+ [A-Za-z]
+ end
+
+ # DIGIT = %x30-39
+ # ; 0-9
+ rule DIGIT
+ [0-9]
+ end
+
+ # DQUOTE = %x22
+ # ; " (Double Quote)
+ rule DQUOTE
+ '"'
+ end
+
+ # NO-WS-CTL = %d1-8 / ; US-ASCII control characters
+ # %d11 / ; that do not include the
+ # %d12 / ; carriage return, line feed,
+ # %d14-31 / ; and white space characters
+ # %d127
+ rule NO-WS-CTL
+ [\x01-\x08\x0B\x0C\x0E-\x1F\x7F]
+ end
+
+ # quoted-pair = ("\" text) / obs-qp
+ rule quoted-pair
+ ("\\" text) | obs-qp
+ end
+
+ # atext = ALPHA / DIGIT / ; Printable US-ASCII
+ # "!" / "#" / ; characters not including
+ # "$" / "%" / ; specials. Used for atoms.
+ # "&" / "'" /
+ # "*" / "+" /
+ # "-" / "/" /
+ # "=" / "?" /
+ # "^" / "_" /
+ # "`" / "{" /
+ # "|" / "}" /
+ # "~"
+ rule atext
+ ALPHA | DIGIT | [!\#$\%&'*+-/=?^_`{|}~]
+ end
+
+ # atom = [CFWS] 1*atext [CFWS]
+ rule atom
+ atext 1*
+ end
+
+ # dot-atom-text = 1*atext *("." 1*atext)
+ rule dot-atom-text
+ atext 1* ("." atext 1*)*
+ end
+
+ # dot-atom = [CFWS] dot-atom-text [CFWS]
+ rule dot-atom
+ dot-atom-text
+ end
+
+ # qtext = %d33 / ; Printable US-ASCII
+ # %d35-91 / ; characters not including
+ # %d93-126 / ; "\" or the quote character
+ # obs-qtext
+ rule qtext
+ [\x21\x23-\x5B\x5D-\x7E] | obs-qtext
+ end
+
+ # qcontent = qtext / quoted-pair
+ rule qcontent
+ qtext | quoted-pair
+ end
+
+ # quoted-string = [CFWS]
+ # DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+ # [CFWS]
+ rule quoted-string
+ '"' qcontent* '"'
+ end
+
+ # word = atom / quoted-string
+ rule word
+ atom | quoted-string
+ end
+
+ # addr-spec = local-part "@" domain
+ rule addr-spec
+ local-part "@" domain
+ end
+
+ # local-part = dot-atom / quoted-string / obs-local-part
+ rule local-part
+ dot-atom | quoted-string | obs-local-part
+ end
+
+ # domain = dot-atom / domain-literal / obs-domain
+ rule domain
+ dot-atom | domain-literal | obs-domain
+ end
+
+ # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
+ rule domain-literal
+ "[" dtext* "]"
+ end
+
+ # dtext = %d33-90 / ; Printable US-ASCII
+ # %d94-126 / ; characters not including
+ # obs-dtext ; "[", "]", or "\"
+ rule dtext
+ [\x21-\x5A\x5E-\x7E] | obs-dtext
+ end
+
+ # text = %d1-9 / ; Characters excluding CR
+ # %d11 / ; and LF
+ # %d12 /
+ # %d14-127
+ rule text
+ [\x01-\x09\x0B\x0C\x0E-\x7F]
+ end
+
+ # obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
+ # %d11 / ; characters that do not
+ # %d12 / ; include the carriage
+ # %d14-31 / ; return, line feed, and
+ # %d127 ; white space characters
+ rule obs-NO-WS-CTL
+ [\x01-\x08\x0B\x0C\x0E-\x1F\x7F]
+ end
+
+ # obs-qtext = obs-NO-WS-CTL
+ rule obs-qtext
+ obs-NO-WS-CTL
+ end
+
+ # obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
+ rule obs-qp
+ "\\" ("\x00" | obs-NO-WS-CTL | "\n" | "\r")
+ end
+
+ # obs-local-part = word *("." word)
+ rule obs-local-part
+ word ("." word)*
+ end
+
+ # obs-domain = atom *("." atom)
+ rule obs-domain
+ atom ("." atom)*
+ end
+
+ # obs-dtext = obs-NO-WS-CTL / quoted-pair
+ rule obs-dtext
+ obs-NO-WS-CTL | quoted-pair
+ end
+end
View
@@ -0,0 +1,178 @@
+# This file contains a suite of tests for the EmailAddress grammar
+# found in email.citrus.
+
+require 'citrus'
+Citrus.require File.expand_path('../email', __FILE__)
+require 'test/unit'
+
+class EmailAddressTest < Test::Unit::TestCase
+ def test_addr_spec_valid
+ addresses = %w[
+ l3tt3rsAndNumb3rs@domain.com
+ has-dash@domain.com
+ hasApostrophe.o'leary@domain.org
+ uncommonTLD@domain.museum
+ uncommonTLD@domain.travel
+ uncommonTLD@domain.mobi
+ countryCodeTLD@domain.uk
+ countryCodeTLD@domain.rw
+ lettersInDomain@911.com
+ underscore_inLocal@domain.net
+ IPInsteadOfDomain@127.0.0.1
+ subdomain@sub.domain.com
+ local@dash-inDomain.com
+ dot.inLocal@foo.com
+ a@singleLetterLocal.org
+ singleLetterDomain@x.org
+ &*=?^+{}'~@validCharsInLocal.net
+ foor@bar.newTLD
+ ]
+
+ addresses.each do |address|
+ match = EmailAddress.parse(address)
+ assert(match)
+ assert_equal(address, match)
+ end
+ end
+
+ # NO-WS-CTL = %d1-8 / ; US-ASCII control characters
+ # %d11 / ; that do not include the
+ # %d12 / ; carriage return, line feed,
+ # %d14-31 / ; and white space characters
+ # %d127
+ def test_no_ws_ctl
+ chars = chars_no_ws_ctl
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :'NO-WS-CTL')
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # quoted-pair = ("\" text) / obs-qp
+ def test_quoted_pair
+ chars = chars_quoted_pair
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :'quoted-pair')
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # atext = ALPHA / DIGIT / ; Printable US-ASCII
+ # "!" / "#" / ; characters not including
+ # "$" / "%" / ; specials. Used for atoms.
+ # "&" / "'" /
+ # "*" / "+" /
+ # "-" / "/" /
+ # "=" / "?" /
+ # "^" / "_" /
+ # "`" / "{" /
+ # "|" / "}" /
+ # "~"
+ def test_atext
+ chars = ('A'..'Z').to_a
+ chars += ('a'..'z').to_a
+ chars += ('0'..'9').to_a
+ chars.push(*%w[! # $ % & ' * + - / = ? ^ _ ` { | } ~])
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :atext)
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # qtext = %d33 / ; Printable US-ASCII
+ # %d35-91 / ; characters not including
+ # %d93-126 / ; "\" or the quote character
+ # obs-qtext
+ def test_qtext
+ chars = ["\x21"]
+ chars += ("\x23".."\x5B").to_a
+ chars += ("\x5D".."\x7E").to_a
+
+ # obs-qtext
+ chars += chars_obs_no_ws_ctl
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :qtext)
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # dtext = %d33-90 / ; Printable US-ASCII
+ # %d94-126 / ; characters not including
+ # obs-dtext ; "[", "]", or "\"
+ def test_dtext
+ chars = ("\x21".."\x5A").to_a
+ chars += ("\x5E".."\x7E").to_a
+
+ # obs-dtext
+ chars += chars_obs_no_ws_ctl
+ chars += chars_quoted_pair
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :dtext)
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # text = %d1-9 / ; Characters excluding CR
+ # %d11 / ; and LF
+ # %d12 /
+ # %d14-127
+ def test_text
+ chars = chars_text
+
+ chars.each do |c|
+ match = EmailAddress.parse(c, :root => :text)
+ assert(match)
+ assert_equal(c, match)
+ end
+ end
+
+ # [\x01-\x08\x0B\x0C\x0E-\x1F\x7F]
+ def chars_no_ws_ctl
+ chars = ("\x01".."\x08").to_a
+ chars << "\x0B"
+ chars << "\x0C"
+ chars += ("\x0E".."\x1F").to_a
+ chars << "\x7F"
+ chars
+ end
+
+ # [\x01-\x09\x0B\x0C\x0E-\x7F]
+ def chars_text
+ chars = ("\x01".."\x09").to_a
+ chars << "\x0B"
+ chars << "\x0C"
+ chars += ("\x0E".."\x7F").to_a
+ chars
+ end
+
+ # [\x01-\x08\x0B\x0C\x0E-\x1F\x7F]
+ def chars_obs_no_ws_ctl
+ chars_no_ws_ctl
+ end
+
+ # ("\\" text) | obs-qp
+ def chars_quoted_pair
+ chars = chars_text.map {|c| "\\" + c }
+ chars += chars_obs_qp
+ chars
+ end
+
+ # "\\" ("\x00" | obs-NO-WS-CTL | "\n" | "\r")
+ def chars_obs_qp
+ chars = ["\x00"]
+ chars += chars_obs_no_ws_ctl
+ chars << "\n"
+ chars << "\r"
+ chars.map {|c| "\\" + c }
+ end
+end

0 comments on commit ff42c5f

Please sign in to comment.