Skip to content

Commit

Permalink
Move unfinished grammar functionality to branch.
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelherold committed Oct 29, 2013
1 parent 596d114 commit 404ac6f
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pyisemail/__init__.py
@@ -1,5 +1,6 @@
__version__ = '0.0.1'
__author__ = "Michael Herold"

from pyisemail.grammar import Grammar
from pyisemail.email_validator import EmailValidator
from pyisemail.reference import Reference
from pyisemail.reference import Reference
31 changes: 31 additions & 0 deletions pyisemail/grammar.py
@@ -0,0 +1,31 @@
from pyparsing import ParserElement, ParseException
from pyisemail.diagnosis import InvalidDiagnosis, ValidDiagnosis


class Grammar(object):

def __init__(self, local_part, domain, addr_spec=None):
ParserElement.setDefaultWhitespaceChars("")
self.local_part = local_part
self.domain = domain
if addr_spec is not None:
self.addr_spec = addr_spec
else:
self.addr_spec = local_part + "@" + domain

def parse(self, address, diagnose=False):
try:
parsed = self.addr_spec.parseString(address)
if diagnose:
return (parsed is not None, ValidDiagnosis())
else:
return parsed is not None
except ParseException as err:
if diagnose:
if err.parserElement == "@" or "@" not in err.pstr:
diagnosis = InvalidDiagnosis('NODOMAIN')
else:
diagnosis = None
return (False, diagnosis)
else:
return False
3 changes: 3 additions & 0 deletions pyisemail/grammars/__init__.py
@@ -0,0 +1,3 @@
from pyisemail.grammars.rfc2822 import RFC2822
from pyisemail.grammars.rfc5322 import RFC5322
from pyisemail.grammars.rfc5322_obsolete import RFC5322Obsolete
78 changes: 78 additions & 0 deletions pyisemail/grammars/rfc2822.py
@@ -0,0 +1,78 @@
from pyparsing import Forward, Literal, OneOrMore, Optional
from pyparsing import Regex, ZeroOrMore
from pyisemail import Grammar


class RFC2822(Grammar):

def __init__(self):
parts = self.__create_parts()
self.local_part = parts['local_part']
self.domain = parts['domain']
self.addr_spec = parts['addr_spec']

def __create_parts(self):

CRLF = Literal("\r\n")
LOWASCII = Regex("[\x00-\x7f]")
WSP = Regex("[\x20\x09]")

# 3.2.1 Primitive Tokens
NO_WS_CTL = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]")
text = Regex("[\x01-\x09\x0b\x0c\x0e-\x7f]")

# 3.2.2 Quoted characters
obs_qp = r"\\" + LOWASCII
quoted_pair = (r"\\" + text) | obs_qp

# 3.2.3 Folding white space and comments
obs_FWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP))
comment = Forward()
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obs_FWS
ctext = NO_WS_CTL | Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]")
ccontent = ctext | quoted_pair | comment
comment << ("(" +
ZeroOrMore(Optional(FWS) + ccontent) +
Optional(FWS) +
")")
CFWS = (ZeroOrMore(Optional(FWS) + comment) +
((Optional(FWS) + comment) | FWS))

# 3.2.4 Atom
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]")
atom = Optional(CFWS) + OneOrMore(atext) + Optional(CFWS)
dot_atom_text = (OneOrMore(atext) +
ZeroOrMore("." + OneOrMore(atext)))
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS)

# 3.2.5 Quoted strings
qtext = NO_WS_CTL | Regex("[\x21\x23-\x5b\x5d-\x7e]")
qcontent = qtext | quoted_pair
quoted_string = (Optional(CFWS) + '"' +
ZeroOrMore(Optional(FWS) + qcontent) +
Optional(FWS) + '"' + Optional(CFWS))

# 3.2.6 Miscellaneous tokens
word = atom | quoted_string
obs_local_part = word + ZeroOrMore("." + word)

# 3.4.1 Addr-spec specification
dtext = NO_WS_CTL | Regex("[\x21-\x5a\x5e-\x7e]")
local_part = dot_atom | quoted_string | obs_local_part

dcontent = dtext | quoted_pair
domain_literal = (Optional(CFWS) + "[" +
ZeroOrMore(Optional(FWS) + dcontent) +
Optional(FWS) + "]" + Optional(CFWS))

obs_domain = atom + ZeroOrMore("." + atom)

domain = dot_atom | domain_literal | obs_domain

addr_spec = local_part + "@" + domain

return {
'local_part': local_part,
'domain': domain,
'addr_spec': addr_spec
}
63 changes: 63 additions & 0 deletions pyisemail/grammars/rfc5322.py
@@ -0,0 +1,63 @@
from pyparsing import Forward, Literal, OneOrMore, Optional
from pyparsing import Regex, ZeroOrMore
from pyisemail import Grammar


class RFC5322(Grammar):

def __init__(self):
parts = self.__create_parts()
self.local_part = parts["local_part"]
self.domain = parts["domain"]
self.addr_spec = parts["addr_spec"]

def __create_parts(self):
# Character classes from core rules
CRLF = Literal('\r\n')
DQUOTE = Literal('"')
HTAB = Literal('\x09')
SP = Literal(' ')
WSP = SP | HTAB
VCHAR = Regex("[\x21-\x7e]")

# 3.2.1 Quoted characters
quoted_pair = ("\\" + (VCHAR | WSP))

# 3.2.2 Folding white space and comments
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP))
ctext = Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]")
comment = Forward()
ccontent = ctext | quoted_pair | comment
comment << ("(" + ZeroOrMore(Optional(FWS) + ccontent) +
Optional(FWS) + ")")
CFWS = (OneOrMore(Optional(FWS) + comment) + Optional(FWS)) | FWS

# 3.2.3 Atom
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]")
dot_atom_text = (OneOrMore(atext) +
OneOrMore("." + OneOrMore(atext)))
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS)

# 3.2.4 Quoted Strings
qtext = Regex("[\x21\x23-\x5b\x5d-\x7e]")
qcontent = qtext | quoted_pair
quoted_string = (Optional(CFWS) + DQUOTE +
ZeroOrMore(Optional(FWS) + qcontent) +
Optional(FWS) + DQUOTE + Optional(CFWS))

# 3.4.1 Addr-spec Specification
local_part = dot_atom | quoted_string

dtext = Regex("[\x21-\x5a\x5e-\x7e]")
domain_literal = (Optional(CFWS) + "[" +
ZeroOrMore(Optional(FWS) + dtext) +
Optional(FWS) + "]" + Optional(CFWS))
domain = dot_atom | domain_literal

addr_spec = local_part + "@" + domain

return {
"local_part": local_part,
"domain": domain,
"addr_spec": addr_spec
}
80 changes: 80 additions & 0 deletions pyisemail/grammars/rfc5322_obsolete.py
@@ -0,0 +1,80 @@
from pyparsing import Forward, Literal, OneOrMore, Optional
from pyparsing import Regex, ZeroOrMore
from pyisemail import Grammar


class RFC5322Obsolete(Grammar):

def __init__(self):
parts = self.__create_parts()
self.local_part = parts["local_part"]
self.domain = parts["domain"]
self.addr_spec = parts["addr_spec"]

def __create_parts(self):
# Character classes from core rules
CR = Literal('\r')
LF = Literal('\n')
CRLF = Literal('\r\n')
DQUOTE = Literal('"')
HTAB = Literal('\x09')
SP = Literal(' ')
WSP = SP | HTAB
VCHAR = Regex("[\x21-\x7e]")

# 4.1 Miscellaneous Obsolete Tokens
obs_NO_WS_CTL = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]")
obs_ctext = obs_NO_WS_CTL
obs_qtext = obs_NO_WS_CTL
obs_qp = "\\" + ("\x00" | obs_NO_WS_CTL | LF | CR)
obs_FWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP))

# 3.2.1 Quoted characters
quoted_pair = ("\\" + (VCHAR | WSP)) | obs_qp

# 3.2.2 Folding white space and comments
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obs_FWS
ctext = Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]") | obs_ctext
comment = Forward()
ccontent = ctext | quoted_pair | comment
comment << ("(" + ZeroOrMore(Optional(FWS) + ccontent) +
Optional(FWS) + ")")
CFWS = (OneOrMore(Optional(FWS) + comment) + Optional(FWS)) | FWS

# 3.2.3 Atom
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]")
atom = Optional(CFWS) + OneOrMore(atext) + Optional(CFWS)
dot_atom_text = (OneOrMore(atext) + OneOrMore("." + OneOrMore(atext)))
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS)

# 3.2.4 Quoted Strings
qtext = Regex("[\x21\x23-\x5b\x5d-\x7e]") | obs_qtext
qcontent = qtext | quoted_pair
quoted_string = (Optional(CFWS) + DQUOTE +
ZeroOrMore(Optional(FWS) + qcontent) +
Optional(FWS) + DQUOTE + Optional(CFWS))

# 3.2.5 Miscellaneous Tokens
word = atom | quoted_string

# 4.4 Obsolete Addressing
obs_local_part = word + ZeroOrMore("." + word)
obs_domain = atom + ZeroOrMore("." + atom)
obs_dtext = obs_NO_WS_CTL | quoted_pair

# 3.4.1 Addr-spec Specification
local_part = dot_atom | quoted_string | obs_local_part

dtext = Regex("[\x21-\x5a\x5e-\x7e]") | obs_dtext
domain_literal = (Optional(CFWS) + "[" +
ZeroOrMore(Optional(FWS) + dtext) +
Optional(FWS) + "]" + Optional(CFWS))
domain = dot_atom | domain_literal | obs_domain

addr_spec = local_part + "@" + domain

return {
"local_part": local_part,
"domain": domain,
"addr_spec": addr_spec
}
15 changes: 15 additions & 0 deletions pyisemail/validators/grammar_validator.py
@@ -0,0 +1,15 @@
from pyisemail import EmailValidator
from pyisemail.grammars import RFC2822, RFC5322, RFC5322Obsolete


class GrammarValidator(EmailValidator):

def __init__(self):
self.grammars = {
'rfc2822': RFC2822(),
'rfc5322': RFC5322(),
'rfc5322_obsolete': RFC5322Obsolete()
}

def is_email(self, address, diagnose=False):
return self.grammars['rfc5322'].parse(address, diagnose)
3 changes: 2 additions & 1 deletion requirements.txt
@@ -1 +1,2 @@
dnspython>=1.10.0
dnspython>=1.10.0
pyparsing>=2.0.1
4 changes: 3 additions & 1 deletion setup.py
Expand Up @@ -29,14 +29,16 @@
author_email="michael.j.herold@gmail.com",
url="https://github.com/michaelherold/pyIsEmail",
license="MIT",
packages=["pyisemail", "pyisemail.diagnosis", "pyisemail.test"],
packages=["pyisemail", "pyisemail.diagnosis",
"pyisemail.grammars", "pyisemail.test"],
include_package_data=True,
exclude_package_data={
'': ['.gitignore']
},
zip_safe=False,
install_requires=[
"dnspython >= 1.10.0",
"pyparsing >= 2.0.1"
],
tests_require=["testtools >= 0.9.21", "testscenarios >= 0.3"],
test_suite="pyisemail.test",
Expand Down

0 comments on commit 404ac6f

Please sign in to comment.