Skip to content

Commit

Permalink
Detect Office365 bounces
Browse files Browse the repository at this point in the history
  • Loading branch information
horkhe committed Oct 18, 2017
1 parent d74f584 commit 9ad0e43
Show file tree
Hide file tree
Showing 8 changed files with 1,247 additions and 84 deletions.
17 changes: 9 additions & 8 deletions build.sh
Expand Up @@ -7,12 +7,13 @@ set -u
if [[ ${TRAVIS_PYTHON_VERSION} == 2.7* ]]; then
nosetests --with-coverage --cover-package=flanker
else
nosetests --with-coverage --cover-package=flanker tests/mime/bounce_tests.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/threading_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/tokenizer_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/headers/encodedword_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/headers/headers_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/headers/parametrized_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/headers/parsing_test.py
nosetests --with-coverage --cover-package=flanker tests/mime/message/headers/wrappers_test.py
nosetests --with-coverage --cover-package=flanker \
tests/mime/bounce_tests.py \
tests/mime/message/threading_test.py \
tests/mime/message/tokenizer_test.py \
tests/mime/message/headers/encodedword_test.py \
tests/mime/message/headers/headers_test.py \
tests/mime/message/headers/parametrized_test.py \
tests/mime/message/headers/parsing_test.py \
tests/mime/message/headers/wrappers_test.py
fi
79 changes: 39 additions & 40 deletions flanker/mime/bounce.py
@@ -1,78 +1,77 @@
from collections import deque
from contextlib import closing

import attr
import regex as re
import six
from attr.validators import instance_of
from six.moves import range

from flanker.mime.message.headers import MimeHeaders
from flanker.mime.message.headers.parsing import parse_stream


_HEADERS = ('Action',
'Content-Description',
'Diagnostic-Code',
'Final-Recipient',
'Received',
'Remote-Mta',
'Reporting-Mta',
'Status')

_RE_STATUS = re.compile(r'\d\.\d+\.\d+', re.IGNORECASE)


@attr.s(frozen=True)
class Result(object):
score = attr.ib(validator=instance_of(float))
status = attr.ib(validator=instance_of(six.text_type))
diagnostic_code = attr.ib(validator=instance_of(six.text_type))
notification = attr.ib(validator=instance_of(six.text_type))


def detect(message):
headers = collect(message)
return Result(
score=len(headers) / float(len(HEADERS)),
status=get_status(headers),
notification=get_notification(message),
diagnostic_code=headers.get('Diagnostic-Code'))
headers = _collect_headers(message)
return Result(score=len(headers) / float(len(_HEADERS)),
status=_get_status(headers),
diagnostic_code=headers.get('Diagnostic-Code', u''),
notification=_get_notification(message))


def collect(message):
def _collect_headers(message):
collected = deque()
for p in message.walk(with_self=True):
for h in HEADERS:
for h in _HEADERS:
if h in p.headers:
collected.append((h, p.headers[h]))
if p.content_type.is_delivery_status():
collected += collect_from_status(p.body)
collected += _collect_headers_from_status(p.body)

return MimeHeaders(collected)


def collect_from_status(body):
def _collect_headers_from_status(body):
out = deque()
with closing(six.StringIO(body)) as stream:
for i in range(3):
out += parse_stream(stream)

return out


def get_status(headers):
def _get_status(headers):
for v in headers.getall('Status'):
if RE_STATUS.match(v.strip()):
if _RE_STATUS.match(v.strip()):
return v

return u''

def get_notification(message):

def _get_notification(message):
for part in message.walk():
content_desc = part.headers.get('Content-Description', '').lower()
if content_desc == 'notification':
return part.body

return None


HEADERS = ('Action',
'Content-Description',
'Diagnostic-Code',
'Final-Recipient',
'Received',
'Remote-Mta',
'Reporting-Mta',
'Status')

RE_STATUS = re.compile(r'\d\.\d+\.\d+', re.IGNORECASE)


class Result(object):
def __init__(self, score, status, notification, diagnostic_code):
self.score = score
self.status = status
self.notification = notification
self.diagnostic_code = diagnostic_code

def __repr__(self):
return (u'bounce.Result(status={}, score={}, notification={},'
u' diag_code={})'.format(self.status, self.score,
self.notification,
self.diagnostic_code))
return u''
6 changes: 3 additions & 3 deletions flanker/mime/message/headers/parsing.py
Expand Up @@ -30,15 +30,15 @@ def parse_header(header):
"""
name, val = _split_header(header)
if not is_pure_ascii(name):
raise DecodingError("Non-ascii header name")
raise DecodingError('Non-ascii header name')

return name, parse_header_value(name, encodedword.unfold(val))


def parse_header_value(name, val):
if not is_pure_ascii(val):
if parametrized.is_parametrized(name, val):
raise DecodingError("Unsupported value in content- header")
raise DecodingError('Unsupported value in content- header')

return to_unicode(val)

Expand All @@ -62,7 +62,7 @@ def _read_header_lines(fp):
lines = deque()
for line in fp:
if len(line) > _MAX_LINE_LENGTH:
raise DecodingError("Line is too long: %d" % len(line))
raise DecodingError('Line is too long: %d' % len(line))

if is_empty(line):
break
Expand Down
14 changes: 2 additions & 12 deletions flanker/mime/message/part.py
Expand Up @@ -367,25 +367,15 @@ def remove_headers(self, *header_names):
@property
def bounce(self):
"""
If the message is NOT bounce, then `None` is returned. Otherwise
it returns a bounce object that provides the values:
* score - a value between 0 and 1, where 0 means that the message is
definitely not a bounce, and 1 means that is definitely a
bounce;
* status - delivery status;
* notification - human readable description;
* diagnostic_code - smtp diagnostic codes;
Can raise MimeError in case if MIME is screwed.
Deprecated: use bounce.detect(message).
"""
if not self._bounce:
self._bounce = bounce.detect(self)
return self._bounce

def is_bounce(self, probability=0.3):
"""
Determines whether the message is a bounce message based on
given probability. 0.3 is a good conservative base.
Deprecated: use bounce.detect(message).
"""
return self.bounce.score > probability

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup, find_packages

setup(name='flanker',
version='0.7.4',
version='0.8.0',
description='Mailgun Parsing Tools',
long_description=open('README.rst').read(),
classifiers=[],
Expand Down
1 change: 1 addition & 0 deletions tests/__init__.py
Expand Up @@ -28,6 +28,7 @@ def read_fixture_bytes(path):

# mime fixture files
BOUNCE = read_fixture_bytes('messages/bounce/zed.eml')
BOUNCE_OFFICE365 = read_fixture_bytes('messages/bounce/office365.eml')
MAILBOX_FULL = read_fixture_bytes('messages/bounce/mailbox-full.eml')
NDN = read_fixture_bytes('messages/bounce/delayed.eml')
NDN_BROKEN = read_fixture_bytes('messages/bounce/delayed-broken.eml')
Expand Down

0 comments on commit 9ad0e43

Please sign in to comment.