From 88f5ab8c8c192ac2ac25af3352e6fc6d162969bd Mon Sep 17 00:00:00 2001 From: Jacob Rideout Date: Fri, 30 Nov 2018 02:01:03 -0500 Subject: [PATCH] Add RTF support to the command line with decompression --- .travis.yml | 2 +- setup.py | 2 +- tests/test_cmdline.py | 41 ++++++++++++++++++++++++++++++++++++ tests/test_decoding.py | 47 +++++++++++++++++++++++++++++------------- tnefparse/cmdline.py | 23 ++++++++++++++++----- tnefparse/tnef.py | 18 +++++++++++++--- 6 files changed, 109 insertions(+), 24 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1f2fa26..f9afeca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ python: install: - pip install -e . - - pip install pytest pytest-cov pytest-console-scripts codecov + - pip install pytest pytest-cov pytest-console-scripts codecov compressed_rtf script: - pytest --cov diff --git a/setup.py b/setup.py index fbd45e1..a892c6a 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ license='LGPL', packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), setup_requires=["pytest-runner", "pytest-console-scripts"], - tests_require = ['pytest', 'coverage'], + tests_require = ['pytest', 'coverage', 'compressed_rtf'], include_package_data=True, zip_safe=True, entry_points = { diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index e9c53e7..82b8fc9 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -7,10 +7,51 @@ def test_cmdline_overview(script_runner): assert ret.success assert "Overview of tests/examples/body.tnef" in ret.stdout assert ret.stderr == '' + assert ret.success def test_cmdline_attch_extract(script_runner): tmpdir = tempfile.mkdtemp() ret = script_runner.run('tnefparse', '-a', '-p', tmpdir, 'tests/examples/one-file.tnef') assert ret.stderr == 'Successfully wrote 1 files\n' + assert ret.success shutil.rmtree(tmpdir) + + +def test_cmdline_no_body(script_runner): + ret = script_runner.run('tnefparse', '-b', 'tests/examples/one-file.tnef') + assert ret.stderr == 'No body found\n' + assert not ret.success + + +def test_cmdline_body(script_runner): + ret = script_runner.run('tnefparse', '-b', 'tests/examples/unicode-mapi-attr.tnef') + assert len(ret.stdout) == 12 + assert ret.stderr == '' + assert ret.success + + +def test_cmdline_no_htmlbody(script_runner): + ret = script_runner.run('tnefparse', '-hb', 'tests/examples/one-file.tnef') + assert ret.stderr == 'No HTML body found\n' + assert not ret.success + + +def test_cmdline_htmlbody(script_runner): + ret = script_runner.run('tnefparse', '-hb', 'tests/examples/body.tnef') + assert len(ret.stdout) == 5358 + assert ret.stderr == '' + assert ret.success + + +def test_cmdline_no_rtfbody(script_runner): + ret = script_runner.run('tnefparse', '-rb', 'tests/examples/one-file.tnef') + assert ret.stderr == 'No RTF body found\n' + assert not ret.success + + +def test_cmdline_rtfbody(script_runner): + ret = script_runner.run('tnefparse', '-rb', 'tests/examples/rtf.tnef') + assert len(ret.stdout) == 593 + assert ret.stderr == '' + assert ret.success diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 2a8d52e..eb3ccff 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -14,35 +14,48 @@ ("body.tnef", 0x1125, [], 'htmlbody', [0x9006, 0x9007, 0x800d, 0x8005, 0x8020, 0x8009, 0x9004, 0x9003] ), - ("two-files.tnef", 0x237, ["AUTHORS", "README"], None, - [0x9006, 0x9007, 0x8008, 0x8009, 0x06, 0x8020, 0x8005, 0x8004, 0x800d, 0x9003, - 0x9002, 0x8012, 0x8013, 0x8010, 0x800f, 0x9005, 0x9002, 0x8012, 0x8013, 0x8010, - 0x800f, 0x9005] + [0x9006, 0x9007, 0x8008, 0x8009, 0x0006, 0x8020, 0x8005, 0x8004, 0x800d, 0x9003, + 0x9002, 0x8012, 0x8013, 0x8010, 0x800f, 0x9005, 0x9002, 0x8012, 0x8013, 0x8010, + 0x800f, 0x9005] ), ("data-before-name.tnef", 0x0d, ["AUTOEXEC.BAT", "CONFIG.SYS", "boot.ini"], 'rtfbody', [0x9006, 0x9007, 0x8008, 0x800d, 0x8006, 0x9003, 0x9002, 0x8013, 0x800f, 0x8010, - 0x8011, 0x9005, 0x9002, 0x8013, 0x800f, 0x8010, 0x8011, 0x9005, 0x9002, 0x8013, - 0x800f, 0x8010, 0x8011, 0x9005] + 0x8011, 0x9005, 0x9002, 0x8013, 0x800f, 0x8010, 0x8011, 0x9005, 0x9002, 0x8013, + 0x800f, 0x8010, 0x8011, 0x9005] ), ("multi-name-property.tnef", 0xc6c7, [], None, [0x9006, 0x9007, 0x9003] ), ("MAPI_ATTACH_DATA_OBJ.tnef", 0x1af, ['VIA_Nytt_1402.doc', 'VIA_Nytt_1402.pdf', 'VIA_Nytt_14021.htm'], 'rtfbody', - [0x9006, 0x9007, 0x9003, 0x9002, 0x9005, 0x9002, 0x9005, 0x9002, 0x9005]), - ("MAPI_OBJECT.tnef", 0x08, ['Untitled_Attachment'], 'rtfbody', []), + [0x9006, 0x9007, 0x9003, 0x9002, 0x9005, 0x9002, 0x9005, 0x9002, 0x9005] + ), + ("MAPI_OBJECT.tnef", 0x08, ['Untitled_Attachment'], 'rtfbody', + [0x9006, 0x9007, 0x8008, 0x8005, 0x8020, 0x8009, 0x8004, 0x800d, + 0x9003, 0x9002, 0x8010, 0x8012, 0x8013, 0x9005] + ), ("garbage-at-end.tnef", 0x415, [], None, - [0x9006, 0x9007, 0x8008, 0x800d, 0x800a, 0x9003]), - ("long-filename.tnef", 0x1422, ['allproductsmar2000.dat'], 'rtfbody', []), + [0x9006, 0x9007, 0x8008, 0x800d, 0x800a, 0x9003] + ), + ("long-filename.tnef", 0x1422, ['allproductsmar2000.dat'], 'rtfbody', + [0x9006, 0x9007, 0x8008, 0x8005, 0x0006, 0x8020, 0x8009, 0x8004, + 0x800d, 0x9003, 0x9002, 0x8012, 0x8013, 0x8011, 0x8010, 0x800f, + 0x9005] + ), ("missing-filenames.tnef", 0x601, - ['generpts.src', 'TechlibDEC99.doc', 'TechlibDEC99-JAN00.doc', 'TechlibNOV99.doc'], 'rtfbody', []), - ("multi-value-attribute.tnef", 0x1512, ['208225__5_seconds__Voice_Mail.mp3'], None, []), + ['generpts.src', 'TechlibDEC99.doc', 'TechlibDEC99-JAN00.doc', 'TechlibNOV99.doc'], 'rtfbody', + [0x9006, 0x9007, 0x8008, 0x8005, 0x8020, 0x8009, 0x8004, 0x800d, 0x9003, 0x9002, + 0x8012, 0x8013, 0x8010, 0x8011, 0x800f, 0x9005, 0x9002, 0x8012, 0x8013, 0x8010, + 0x800f, 0x9005, 0x9002, 0x8012, 0x8013, 0x8010, 0x800f, 0x9005, 0x9002, 0x8012, + 0x8013, 0x8010, 0x800f, 0x9005] + ), + ("multi-value-attribute.tnef", 0x1512, ['208225__5_seconds__Voice_Mail.mp3'], 'rtfbody', []), ("one-file.tnef", 0x237, ['AUTHORS'], None, []), ("rtf.tnef", 0xc02, [], 'rtfbody', []), ("triples.tnef", 0xea64, [], 'body', []), - ("unicode-mapi-attr-name.tnef", 0x69ec, ['spaconsole2.cfg', 'image001.png', 'image002.png', 'image003.png'], None, []), - ("unicode-mapi-attr.tnef", 0x408f, ['example.dat'], None, []), + ("unicode-mapi-attr-name.tnef", 0x69ec, ['spaconsole2.cfg', 'image001.png', 'image002.png', 'image003.png'], 'htmlbody', []), + ("unicode-mapi-attr.tnef", 0x408f, ['example.dat'], 'body', []), ("umlaut.tnef", 0xa2e, ['TBZ PARIV GmbH.jpg', 'image003.jpg', u'UmlautAnhang-\xe4\xfc\xf6.txt'], 'rtfbody', []), ) @@ -70,6 +83,12 @@ def test_decode(tnefspec): if body: assert getattr(t, body) + assert t.has_body() + else: + assert not t.has_body() + + if t.rtfbody: + assert t.rtfbody[0:5] == b'{\\rtf' if objs: assert objcodes(t) == objs, "wrong objs: %s" % ["0x%2.2x" % o.name for o in t.objects] diff --git a/tnefparse/cmdline.py b/tnefparse/cmdline.py index 6b62874..5c20b56 100644 --- a/tnefparse/cmdline.py +++ b/tnefparse/cmdline.py @@ -32,6 +32,9 @@ argument('-hb', '--htmlbody', action='store_true', help='extract the HTML body to stdout') +argument('-rb', '--rtfbody', action='store_true', + help='extract the RTF body to stdout') + argument('-l', '--logging', choices=["DEBUG", "INFO", "WARN", "ERROR"], help="enable logging by setting a log level") @@ -83,11 +86,21 @@ def tnefparse(): for a in t.attachments: with open(pth + a.name.decode('utf-8'), "wb") as afp: afp.write(a.data) - sys.exit("Successfully wrote %i files" % len(t.attachments)) + sys.stderr.write("Successfully wrote %i files\n" % len(t.attachments)) + sys.exit() + + def print_body(attr, description): + body = getattr(t, attr) + if body is None: + sys.exit("No %s found" % description) + elif isinstance(body, bytes): + sys.stdout.write(body.decode('latin-1')) + else: + sys.stdout.write(body) if args.body: - print(getattr(t, "body", "No body found")) - + print_body("body", "body") if args.htmlbody: - body = getattr(t, "htmlbody", ["No HTML body found"]) - print(body[0]) + print_body("htmlbody", "HTML body") + if args.rtfbody: + print_body("rtfbody", "RTF body") diff --git a/tnefparse/tnef.py b/tnefparse/tnef.py index bc99358..0fcd1fd 100644 --- a/tnefparse/tnef.py +++ b/tnefparse/tnef.py @@ -211,7 +211,7 @@ def __init__(self, data, do_checksum=True): self.msgprops = [] self.body = None self.htmlbody = None - self.rtfbody = None + self._rtfbody = None offset = 6 if not do_checksum: @@ -244,7 +244,7 @@ def __init__(self, data, do_checksum=True): elif p.name == TNEFMAPI_Attribute.MAPI_BODY_HTML: self.htmlbody = p.data elif p.name == TNEFMAPI_Attribute.MAPI_RTF_COMPRESSED: - self.rtfbody = p.data + self._rtfbody = p.data elif obj.name == TNEF.ATTBODY: self.body = obj.data elif obj.name == TNEF.ATTTNEFVERSION: @@ -289,7 +289,19 @@ def __init__(self, data, do_checksum=True): logger.debug("Unhandled TNEF Object: %s" % obj) def has_body(self): - return True if (self.body or self.htmlbody or self.rtfbody) else False + return True if (self.body or self.htmlbody or self._rtfbody) else False + + @property + def rtfbody(self): + if self._rtfbody: + try: + from compressed_rtf import decompress + return decompress(self._rtfbody + b'\x00') + except ImportError: + logger.warning("Returning compressed RTF. Install compressed_rtf to decompress") + return self._rtfbody + else: + return None def __str__(self): atts = (", %i attachments" % len(self.attachments)) if self.attachments else ''