Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move unfinished grammar functionality to branch.
- Loading branch information
1 parent
596d114
commit 404ac6f
Showing
9 changed files
with
277 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
__version__ = '0.0.1' | ||
__author__ = "Michael Herold" | ||
|
||
from pyisemail.grammar import Grammar | ||
from pyisemail.email_validator import EmailValidator | ||
from pyisemail.reference import Reference | ||
from pyisemail.reference import Reference |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from pyparsing import ParserElement, ParseException | ||
from pyisemail.diagnosis import InvalidDiagnosis, ValidDiagnosis | ||
|
||
|
||
class Grammar(object): | ||
|
||
def __init__(self, local_part, domain, addr_spec=None): | ||
ParserElement.setDefaultWhitespaceChars("") | ||
self.local_part = local_part | ||
self.domain = domain | ||
if addr_spec is not None: | ||
self.addr_spec = addr_spec | ||
else: | ||
self.addr_spec = local_part + "@" + domain | ||
|
||
def parse(self, address, diagnose=False): | ||
try: | ||
parsed = self.addr_spec.parseString(address) | ||
if diagnose: | ||
return (parsed is not None, ValidDiagnosis()) | ||
else: | ||
return parsed is not None | ||
except ParseException as err: | ||
if diagnose: | ||
if err.parserElement == "@" or "@" not in err.pstr: | ||
diagnosis = InvalidDiagnosis('NODOMAIN') | ||
else: | ||
diagnosis = None | ||
return (False, diagnosis) | ||
else: | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from pyisemail.grammars.rfc2822 import RFC2822 | ||
from pyisemail.grammars.rfc5322 import RFC5322 | ||
from pyisemail.grammars.rfc5322_obsolete import RFC5322Obsolete |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from pyparsing import Forward, Literal, OneOrMore, Optional | ||
from pyparsing import Regex, ZeroOrMore | ||
from pyisemail import Grammar | ||
|
||
|
||
class RFC2822(Grammar): | ||
|
||
def __init__(self): | ||
parts = self.__create_parts() | ||
self.local_part = parts['local_part'] | ||
self.domain = parts['domain'] | ||
self.addr_spec = parts['addr_spec'] | ||
|
||
def __create_parts(self): | ||
|
||
CRLF = Literal("\r\n") | ||
LOWASCII = Regex("[\x00-\x7f]") | ||
WSP = Regex("[\x20\x09]") | ||
|
||
# 3.2.1 Primitive Tokens | ||
NO_WS_CTL = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]") | ||
text = Regex("[\x01-\x09\x0b\x0c\x0e-\x7f]") | ||
|
||
# 3.2.2 Quoted characters | ||
obs_qp = r"\\" + LOWASCII | ||
quoted_pair = (r"\\" + text) | obs_qp | ||
|
||
# 3.2.3 Folding white space and comments | ||
obs_FWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP)) | ||
comment = Forward() | ||
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obs_FWS | ||
ctext = NO_WS_CTL | Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]") | ||
ccontent = ctext | quoted_pair | comment | ||
comment << ("(" + | ||
ZeroOrMore(Optional(FWS) + ccontent) + | ||
Optional(FWS) + | ||
")") | ||
CFWS = (ZeroOrMore(Optional(FWS) + comment) + | ||
((Optional(FWS) + comment) | FWS)) | ||
|
||
# 3.2.4 Atom | ||
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]") | ||
atom = Optional(CFWS) + OneOrMore(atext) + Optional(CFWS) | ||
dot_atom_text = (OneOrMore(atext) + | ||
ZeroOrMore("." + OneOrMore(atext))) | ||
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS) | ||
|
||
# 3.2.5 Quoted strings | ||
qtext = NO_WS_CTL | Regex("[\x21\x23-\x5b\x5d-\x7e]") | ||
qcontent = qtext | quoted_pair | ||
quoted_string = (Optional(CFWS) + '"' + | ||
ZeroOrMore(Optional(FWS) + qcontent) + | ||
Optional(FWS) + '"' + Optional(CFWS)) | ||
|
||
# 3.2.6 Miscellaneous tokens | ||
word = atom | quoted_string | ||
obs_local_part = word + ZeroOrMore("." + word) | ||
|
||
# 3.4.1 Addr-spec specification | ||
dtext = NO_WS_CTL | Regex("[\x21-\x5a\x5e-\x7e]") | ||
local_part = dot_atom | quoted_string | obs_local_part | ||
|
||
dcontent = dtext | quoted_pair | ||
domain_literal = (Optional(CFWS) + "[" + | ||
ZeroOrMore(Optional(FWS) + dcontent) + | ||
Optional(FWS) + "]" + Optional(CFWS)) | ||
|
||
obs_domain = atom + ZeroOrMore("." + atom) | ||
|
||
domain = dot_atom | domain_literal | obs_domain | ||
|
||
addr_spec = local_part + "@" + domain | ||
|
||
return { | ||
'local_part': local_part, | ||
'domain': domain, | ||
'addr_spec': addr_spec | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from pyparsing import Forward, Literal, OneOrMore, Optional | ||
from pyparsing import Regex, ZeroOrMore | ||
from pyisemail import Grammar | ||
|
||
|
||
class RFC5322(Grammar): | ||
|
||
def __init__(self): | ||
parts = self.__create_parts() | ||
self.local_part = parts["local_part"] | ||
self.domain = parts["domain"] | ||
self.addr_spec = parts["addr_spec"] | ||
|
||
def __create_parts(self): | ||
# Character classes from core rules | ||
CRLF = Literal('\r\n') | ||
DQUOTE = Literal('"') | ||
HTAB = Literal('\x09') | ||
SP = Literal(' ') | ||
WSP = SP | HTAB | ||
VCHAR = Regex("[\x21-\x7e]") | ||
|
||
# 3.2.1 Quoted characters | ||
quoted_pair = ("\\" + (VCHAR | WSP)) | ||
|
||
# 3.2.2 Folding white space and comments | ||
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | ||
ctext = Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]") | ||
comment = Forward() | ||
ccontent = ctext | quoted_pair | comment | ||
comment << ("(" + ZeroOrMore(Optional(FWS) + ccontent) + | ||
Optional(FWS) + ")") | ||
CFWS = (OneOrMore(Optional(FWS) + comment) + Optional(FWS)) | FWS | ||
|
||
# 3.2.3 Atom | ||
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]") | ||
dot_atom_text = (OneOrMore(atext) + | ||
OneOrMore("." + OneOrMore(atext))) | ||
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS) | ||
|
||
# 3.2.4 Quoted Strings | ||
qtext = Regex("[\x21\x23-\x5b\x5d-\x7e]") | ||
qcontent = qtext | quoted_pair | ||
quoted_string = (Optional(CFWS) + DQUOTE + | ||
ZeroOrMore(Optional(FWS) + qcontent) + | ||
Optional(FWS) + DQUOTE + Optional(CFWS)) | ||
|
||
# 3.4.1 Addr-spec Specification | ||
local_part = dot_atom | quoted_string | ||
|
||
dtext = Regex("[\x21-\x5a\x5e-\x7e]") | ||
domain_literal = (Optional(CFWS) + "[" + | ||
ZeroOrMore(Optional(FWS) + dtext) + | ||
Optional(FWS) + "]" + Optional(CFWS)) | ||
domain = dot_atom | domain_literal | ||
|
||
addr_spec = local_part + "@" + domain | ||
|
||
return { | ||
"local_part": local_part, | ||
"domain": domain, | ||
"addr_spec": addr_spec | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
from pyparsing import Forward, Literal, OneOrMore, Optional | ||
from pyparsing import Regex, ZeroOrMore | ||
from pyisemail import Grammar | ||
|
||
|
||
class RFC5322Obsolete(Grammar): | ||
|
||
def __init__(self): | ||
parts = self.__create_parts() | ||
self.local_part = parts["local_part"] | ||
self.domain = parts["domain"] | ||
self.addr_spec = parts["addr_spec"] | ||
|
||
def __create_parts(self): | ||
# Character classes from core rules | ||
CR = Literal('\r') | ||
LF = Literal('\n') | ||
CRLF = Literal('\r\n') | ||
DQUOTE = Literal('"') | ||
HTAB = Literal('\x09') | ||
SP = Literal(' ') | ||
WSP = SP | HTAB | ||
VCHAR = Regex("[\x21-\x7e]") | ||
|
||
# 4.1 Miscellaneous Obsolete Tokens | ||
obs_NO_WS_CTL = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]") | ||
obs_ctext = obs_NO_WS_CTL | ||
obs_qtext = obs_NO_WS_CTL | ||
obs_qp = "\\" + ("\x00" | obs_NO_WS_CTL | LF | CR) | ||
obs_FWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP)) | ||
|
||
# 3.2.1 Quoted characters | ||
quoted_pair = ("\\" + (VCHAR | WSP)) | obs_qp | ||
|
||
# 3.2.2 Folding white space and comments | ||
FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obs_FWS | ||
ctext = Regex("[\x21-\x27\x2a-\x5b\x5d-\x7e]") | obs_ctext | ||
comment = Forward() | ||
ccontent = ctext | quoted_pair | comment | ||
comment << ("(" + ZeroOrMore(Optional(FWS) + ccontent) + | ||
Optional(FWS) + ")") | ||
CFWS = (OneOrMore(Optional(FWS) + comment) + Optional(FWS)) | FWS | ||
|
||
# 3.2.3 Atom | ||
atext = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]") | ||
atom = Optional(CFWS) + OneOrMore(atext) + Optional(CFWS) | ||
dot_atom_text = (OneOrMore(atext) + OneOrMore("." + OneOrMore(atext))) | ||
dot_atom = Optional(CFWS) + dot_atom_text + Optional(CFWS) | ||
|
||
# 3.2.4 Quoted Strings | ||
qtext = Regex("[\x21\x23-\x5b\x5d-\x7e]") | obs_qtext | ||
qcontent = qtext | quoted_pair | ||
quoted_string = (Optional(CFWS) + DQUOTE + | ||
ZeroOrMore(Optional(FWS) + qcontent) + | ||
Optional(FWS) + DQUOTE + Optional(CFWS)) | ||
|
||
# 3.2.5 Miscellaneous Tokens | ||
word = atom | quoted_string | ||
|
||
# 4.4 Obsolete Addressing | ||
obs_local_part = word + ZeroOrMore("." + word) | ||
obs_domain = atom + ZeroOrMore("." + atom) | ||
obs_dtext = obs_NO_WS_CTL | quoted_pair | ||
|
||
# 3.4.1 Addr-spec Specification | ||
local_part = dot_atom | quoted_string | obs_local_part | ||
|
||
dtext = Regex("[\x21-\x5a\x5e-\x7e]") | obs_dtext | ||
domain_literal = (Optional(CFWS) + "[" + | ||
ZeroOrMore(Optional(FWS) + dtext) + | ||
Optional(FWS) + "]" + Optional(CFWS)) | ||
domain = dot_atom | domain_literal | obs_domain | ||
|
||
addr_spec = local_part + "@" + domain | ||
|
||
return { | ||
"local_part": local_part, | ||
"domain": domain, | ||
"addr_spec": addr_spec | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from pyisemail import EmailValidator | ||
from pyisemail.grammars import RFC2822, RFC5322, RFC5322Obsolete | ||
|
||
|
||
class GrammarValidator(EmailValidator): | ||
|
||
def __init__(self): | ||
self.grammars = { | ||
'rfc2822': RFC2822(), | ||
'rfc5322': RFC5322(), | ||
'rfc5322_obsolete': RFC5322Obsolete() | ||
} | ||
|
||
def is_email(self, address, diagnose=False): | ||
return self.grammars['rfc5322'].parse(address, diagnose) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
dnspython>=1.10.0 | ||
dnspython>=1.10.0 | ||
pyparsing>=2.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters