-
Notifications
You must be signed in to change notification settings - Fork 203
/
yahoo.py
185 lines (135 loc) · 4.96 KB
/
yahoo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# coding:utf-8
'''
Email address validation plugin for yahoo.com email addresses.
Notes for primary e-mail:
4-32 characters
must start with letter
must end with letter or number
must use letters, numbers, underscores (_)
only one dot (.) allowed
no consecutive dot (.) or underscore (_)
no dot-underscore (._) or underscore-dot (_.)
case is ignored
tags not supported
Grammar:
local-part -> alpha { [ dot | underscore ] ( alpha | num ) }
Other limitations:
1. No more than a single dot (.) is allowed in the local-part
2. Length of local-part must be no more than 32 characters, and no
less than 4 characters.
Notes for disposable e-mails using "AddressGuard":
example: base-keyword@yahoo.com
base and keyword may each be up to 32 characters
base may contain letters, numbers, underscores
base must start with a letter
keyword may contain letters and numbers
a single hyphen (-) connects the base and keyword
Grammar:
local-part -> alpha { [ alpha | num | underscore ] } hyphen { [ alpha | num ] }
'''
import re
from flanker.addresslib.plugins._tokenizer import TokenStream
from flanker.addresslib._parser.lexer import _UNICODE_CHAR
ALPHA = re.compile(r'''
( [A-Za-z]
| {unicode_char}
)+
''', re.MULTILINE | re.VERBOSE)
NUMERIC = re.compile(r'''
( [0-9]
)+
''', re.MULTILINE | re.VERBOSE)
ALPHANUM = re.compile(r'''
( [A-Za-z0-9]
| {unicode_char}
)+
''', re.MULTILINE | re.VERBOSE)
DOT = re.compile(r'\.', re.MULTILINE | re.VERBOSE)
UNDERSCORE = re.compile(r'\_', re.MULTILINE | re.VERBOSE)
HYPHEN = re.compile(r'\-', re.MULTILINE | re.VERBOSE)
YAHOO_MANAGED = ['yahoo.com', 'ymail.com', 'rocketmail.com']
def validate(email_addr):
# Setup for handling EmailAddress type instead of literal string
localpart = email_addr.mailbox
managed = managed_email(email_addr.hostname)
# check string exists and not empty
if not localpart:
return False
# must start with letter
if len(localpart) < 1 or (ALPHA.match(localpart[0]) is None and managed):
return False
# must end with letter or digit
if ALPHANUM.match(localpart[-1]) is None:
return False
# only disposable addresses may contain hyphens
if HYPHEN.search(localpart):
return _validate_disposable(email_addr)
# otherwise, normal validation
return _validate_primary(email_addr)
def _validate_primary(email_addr):
# Setup for handling EmailAddress type instead of literal string
localpart = email_addr.mailbox
managed = managed_email(email_addr.hostname)
# length check
l = len(localpart)
if l < 4 or l > 32:
return False
# no more than one dot (.)
if localpart.count('.') > 1:
return False
# Grammar: local-part -> alpha { [ dot | underscore ] ( alpha | num ) }"
stream = TokenStream(localpart)
# local-part must being with alpha
alpa = stream.get_token(ALPHA)
if alpa is None and managed:
return False
while True:
# optional dot or underscore token
stream.get_token(DOT) or stream.get_token(UNDERSCORE)
# alpha or numeric
alpanum = stream.get_token(ALPHA) or stream.get_token(NUMERIC)
if alpanum is None:
break
# alpha or numeric must be end of stream
if not stream.end_of_stream():
return False
return True
def _validate_disposable(email_addr):
# Setup for handling EmailAddress type instead of literal string
localpart = email_addr.mailbox
managed = managed_email(email_addr.hostname)
# length check (base + hyphen + keyword)
l = len(localpart)
if l < 3 or l > 65:
return False
# single hyphen
if localpart.count('-') != 1:
return False
# base and keyword length limit
parts = localpart.split('-')
for part in parts:
l = len(part)
if l < 1 or l > 32:
return False
# Grammar: local-part -> alpha { [ alpha | num | underscore ] } hyphen { [ alpha | num ] }
stream = TokenStream(localpart)
# must being with alpha
begin = stream.get_token(ALPHA)
if begin is None and managed:
return False
while True:
# alpha, num, underscore
base = stream.get_token(ALPHANUM) or stream.get_token(UNDERSCORE)
if base is None:
break
# hyphen
hyphen = stream.get_token(HYPHEN)
if hyphen is None:
return False
# keyword must be alpha, num
stream.get_token(ALPHANUM)
if not stream.end_of_stream():
return False
return True
def managed_email(hostname):
return hostname in YAHOO_MANAGED