diff --git a/chat_unifier/file_iterators/pidgin.py b/chat_unifier/file_iterators/pidgin.py index ff4d0ee..3dd69d8 100644 --- a/chat_unifier/file_iterators/pidgin.py +++ b/chat_unifier/file_iterators/pidgin.py @@ -1,13 +1,25 @@ import os +_IGNORED_MEDIA = ['irc'] + def iterate_files(directory): for root, dirs, filenames in os.walk(directory): for filename in filenames: if _is_log_file(filename): - yield os.path.join(root, filename) + log_path = os.path.join(root, filename) + if _get_log_medium(log_path) in _IGNORED_MEDIA: + continue + yield log_path def _is_log_file(filename): _, extension = os.path.splitext(filename) return extension == '.html' + + +def _get_log_medium(log_path): + path_parts = log_path.split(os.path.sep) + if len(path_parts) < 4: + return None + return path_parts[-4] diff --git a/chat_unifier/parsers/pidgin/html_reader.py b/chat_unifier/parsers/pidgin/html_reader.py index 26e07cc..e8a824d 100644 --- a/chat_unifier/parsers/pidgin/html_reader.py +++ b/chat_unifier/parsers/pidgin/html_reader.py @@ -60,11 +60,15 @@ def __init__(self): def results(self): return self._results + def feed(self, html): + html_annotated = _annotate_html(html) + HTMLParser.feed(self, html_annotated) + def handle_starttag(self, tag, attrs): attrs_dict = dict(attrs) if tag == 'title': self._update_state(_STATE_PARSING_TITLE) - if ((self._state == _STATE_SEEKING_NEXT_MESSAGE) and (tag == 'font')): + elif ((self._state == _STATE_SEEKING_NEXT_MESSAGE) and (tag == 'font')): if 'color' in attrs_dict: font_color = attrs_dict['color'] if _is_local_user_font_color(font_color): @@ -94,6 +98,9 @@ def handle_endtag(self, tag): def handle_startendtag(self, tag, attrs): if ((self._state == _STATE_PARSING_CONTENTS) and (tag == 'br')): + self._add_message_contents('\n') + elif ((self._state == _STATE_PARSING_CONTENTS) and + (tag == 'message-end')): self._update_state(_STATE_SEEKING_NEXT_MESSAGE) def handle_data(self, data): @@ -106,22 +113,59 @@ def handle_data(self, data): elif self._state == _STATE_PARSING_CONTENTS: if not data.strip(): return - self._add_message_contents(data) + self._add_message_contents(data.decode('utf8')) + + def handle_entityref(self, name): + decoded = _decode_html_entity_ref(name) + if self._state == _STATE_PARSING_CONTENTS: + self._add_message_contents(decoded) + elif self._state == _STATE_PARSING_DISPLAY_NAME: + self._add_display_name(decoded) + + def handle_charref(self, name): + decoded = _decode_html_char_ref(name) + if self._state == _STATE_PARSING_CONTENTS: + self._add_message_contents(decoded) + elif self._state == _STATE_PARSING_DISPLAY_NAME: + self._add_display_name(decoded) def _add_title(self, title): - self.results.append((RESULT_TYPE_TITLE, title)) + self._results.append((RESULT_TYPE_TITLE, title)) def _add_message_start(self, message_type): - self.results.append((RESULT_TYPE_MESSAGE_START, message_type)) + self._results.append((RESULT_TYPE_MESSAGE_START, message_type)) def _add_timestamp(self, timestamp): - self.results.append((RESULT_TYPE_TIMESTAMP, timestamp)) + self._results.append((RESULT_TYPE_TIMESTAMP, timestamp)) def _add_display_name(self, display_name): - self.results.append((RESULT_TYPE_DISPLAY_NAME, display_name)) + self._append_or_coalesce_result(RESULT_TYPE_DISPLAY_NAME, display_name) def _add_message_contents(self, message_contents): - self.results.append((RESULT_TYPE_MESSAGE_CONTENTS, message_contents)) + self._append_or_coalesce_result(RESULT_TYPE_MESSAGE_CONTENTS, + message_contents) + + def _append_or_coalesce_result(self, result_type, result_value): + if self._results: + last_result_type, last_result_value = self._results[-1] + if last_result_type == result_type: + self._results.pop() + result_value = last_result_value + result_value + self._results.append((result_type, result_value)) def _update_state(self, new_state): self._state = new_state + + +def _annotate_html(html): + # We need to specially mark line-terminating
tags otherwise there's + # ambiguity in where the message ends (
can appear within messages). + return html.replace('\r\n', '\n').replace('
\n', '\n') + + +def _decode_html_entity_ref(entity_ref): + return HTMLParser().unescape('&' + entity_ref + ';') + + +def _decode_html_char_ref(entity_ref): + return HTMLParser().unescape('&#' + entity_ref + ';') diff --git a/chat_unifier/parsers/pidgin/parser.py b/chat_unifier/parsers/pidgin/parser.py new file mode 100644 index 0000000..964aa8b --- /dev/null +++ b/chat_unifier/parsers/pidgin/parser.py @@ -0,0 +1,154 @@ +from __future__ import absolute_import + +import datetime +import re + +from chat_unifier import models +from chat_unifier.parsers.pidgin import html_reader + +_TITLE_PATTERN = re.compile( + r'^Conversation with (?P.+) at (?P\d{1,2}/\d{1,2}/\d{4}) (?P\d{1,2}:\d{1,2}:\d{1,2}) (?P[AP]M) on (?P.+) \((?P.+)\)$' +) + + +class Error(Exception): + pass + + +class UnexpectedResultType(Error): + pass + + +class UnexpectedMessageDirection(Error): + pass + + +class InvalidMetadata(Error): + pass + + +class Parser(object): + + def parse(self, log_contents): + reader = html_reader.Reader() + reader.feed(log_contents) + converter = _ResultsToHistoryConverter(reader.results) + return converter.convert() + + +class _ResultsToHistoryConverter(object): + + def __init__(self, results): + self._results = results + self._metadata = None + self._last_timestamp = None + + def convert(self): + self._process_metadata() + return models.History( + local_username=self._metadata['local_username'], + remote_username=self._metadata['remote_username'], + messages=self._process_messages()) + + def _process_metadata(self): + title = self._pop_result_with_type(html_reader.RESULT_TYPE_TITLE) + self._metadata = _metadata_from_title(title) + self._last_timestamp = self._metadata['start_timestamp'] + + def _process_messages(self): + messages = [] + while self._results: + messages.append(self._process_next_message()) + return messages + + def _process_next_message(self): + message_direction = self._pop_result_with_type( + html_reader.RESULT_TYPE_MESSAGE_START) + timestamp_raw = self._pop_result_with_type( + html_reader.RESULT_TYPE_TIMESTAMP) + # TODO(mtlynch): Save the display name. + self._pop_result_with_type(html_reader.RESULT_TYPE_DISPLAY_NAME) + contents = self._pop_result_with_type( + html_reader.RESULT_TYPE_MESSAGE_CONTENTS) + + return models.Message( + sender=self._sender_from_message_direction(message_direction), + timestamp=self._parse_message_timestamp(timestamp_raw), + contents=contents) + + def _sender_from_message_direction(self, message_direction): + if message_direction == html_reader.MESSAGE_DIRECTION_OUTGOING: + return self._metadata['local_username'] + elif message_direction == html_reader.MESSAGE_DIRECTION_INCOMING: + return self._metadata['remote_username'] + else: + raise UnexpectedMessageDirection( + 'Unrecognized message direction: %s' % message_direction) + + def _parse_message_timestamp(self, time_string): + # Strip parens from timestamp. + time_string = time_string[1:-1] + if _timestamp_includes_date(time_string): + timestamp = datetime.datetime.strptime(time_string, + '%m/%d/%Y %I:%M:%S %p') + else: + datetime_string = ( + self._last_timestamp.strftime('%m/%d/%Y') + ' ' + time_string) + timestamp = datetime.datetime.strptime(datetime_string, + '%m/%d/%Y %I:%M:%S %p') + if self._timestamp_rolled_over_to_next_day(timestamp): + timestamp += datetime.timedelta(days=1) + + self._last_timestamp = timestamp + return timestamp + + def _timestamp_rolled_over_to_next_day(self, timestamp): + return timestamp < self._last_timestamp + + def _peek_next_result_type(self): + if not self._results: + return None + result_type, _ = self._results[0] + return result_type + + def _pop_result_with_type(self, result_type_expected): + result_type, result_value = self._results.pop(0) + if result_type != result_type_expected: + raise UnexpectedResultType( + 'Expected result type %s, but got %s:%s' % + (result_type_expected, result_type, result_value)) + return result_value + + +def _metadata_from_title(title): + match = _TITLE_PATTERN.match(title) + if not match: + raise InvalidMetadata('Unexpected metadata format: %s' % title) + local_username = _strip_username_suffix(match.group('local_username')) + return { + 'local_username': + local_username, + 'remote_username': + match.group('remote_username'), + 'medium': + match.group('medium'), + 'start_timestamp': + _parse_timestamp_parts( + match.group('start_date'), match.group('start_time'), + match.group('am_pm')), + } + + +def _strip_username_suffix(username): + if '/' in username: + return username.split('/')[0] + return username + + +def _timestamp_includes_date(timestamp): + return '/' in timestamp + + +def _parse_timestamp_parts(date_string, time_string, am_pm): + timestamp_string = '%s %s %s' % (date_string, time_string, am_pm) + return datetime.datetime.strptime(timestamp_string, '%m/%d/%Y %I:%M:%S %p') diff --git a/main.py b/main.py index d2b4967..0277ef2 100755 --- a/main.py +++ b/main.py @@ -7,6 +7,7 @@ from chat_unifier import json_serializer from chat_unifier import history_merger +from chat_unifier.parsers.pidgin import parser as pidgin_parser from chat_unifier.parsers.trillian_xml import parser as trillian_parser from chat_unifier.file_iterators import pidgin as pidgin_iterator from chat_unifier.file_iterators import trillian_xml as trillian_xml_iterator @@ -31,7 +32,7 @@ def main(args): merger = history_merger.Merger() processors = [ (args.trillian, trillian_xml_iterator, trillian_parser.Parser()), - (args.pidgin, pidgin_iterator, None), + (args.pidgin, pidgin_iterator, pidgin_parser.Parser()), ] for dir_roots, file_iterator, log_parser in processors: if dir_roots: @@ -50,11 +51,12 @@ def _process_log_dirs(dir_roots, file_iterator, log_parser, merger): def _process_log_dir(dir_root, file_iterator, log_parser, merger): logger.info('Searching for logs in %s', dir_root) for log_path in file_iterator.iterate_files(dir_root): - if not log_parser: - logger.info('Skipping %s', log_path) - continue + logger.info('Parsing %s', log_path) with open(log_path) as log_handle: - merger.add(parser.parse(log_handle.read())) + try: + merger.add(log_parser.parse(log_handle.read())) + except Exception as ex: + logger.error('Failed to parse: %s', ex.message) logger.info('Parsed %s', os.path.basename(log_path)) diff --git a/tests/file_iterators/test_pidgin.py b/tests/file_iterators/test_pidgin.py index 9af8b2f..7389b6c 100644 --- a/tests/file_iterators/test_pidgin.py +++ b/tests/file_iterators/test_pidgin.py @@ -31,3 +31,23 @@ def test_picks_correct_log_files(self): '/log/aim/LocalUser123/RemoteUser456/2006-11-19.195755-0500EST.html', '/log/aim/LocalUser123/RemoteUser456/2006-11-22.112333-0500EST.html', ], [f for f in pidgin.iterate_files('/logs')]) + + def test_ignores_irc_log_files(self): + with mock.patch.object(os, 'walk') as mock_walk: + mock_walk.return_value = [ + ('/logs', ('aim', 'irc'), ('README.txt',)), + ('/logs/aim', ('LocalUser123',), ()), + ('/log/aim/LocalUser123', ('RemoteUser345',), ()), + ('/log/aim/LocalUser123/RemoteUser345', (), + ('2007-02-24.020826-0500EST.html', + '2007-02-25.154550-0500EST.html')), + ('/log/irc', ('localuser123@irc.freenode.net',), ()), + ('/log/irc/localuser123@irc.freenode.net', ('#dummy.chat',), + ()), + ('/log/irc/localuser123@irc.freenode.net/#dummy.chat', (), + ('2006-06-21.200806-0400EST.html',)), + ] + self.assertEqual([ + '/log/aim/LocalUser123/RemoteUser345/2007-02-24.020826-0500EST.html', + '/log/aim/LocalUser123/RemoteUser345/2007-02-25.154550-0500EST.html', + ], [f for f in pidgin.iterate_files('/logs')]) diff --git a/tests/parsers/pidgin/test_html_reader.py b/tests/parsers/pidgin/test_html_reader.py index a3d7431..94b8a3b 100644 --- a/tests/parsers/pidgin/test_html_reader.py +++ b/tests/parsers/pidgin/test_html_reader.py @@ -6,6 +6,7 @@ class HtmlReaderTest(unittest.TestCase): def setUp(self): + self.maxDiff = None self.reader = html_reader.Reader() def test_parse_log_with_simple_conversation(self): @@ -31,6 +32,29 @@ def test_parse_log_with_simple_conversation(self): (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'), ], self.reader.results) + def test_parse_log_with_simple_conversation_with_cr_lf_line_endings(self): + self.reader.feed(""" +Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

+(12:18:08 PM) Alice: how are you
+(12:18:37 PM) Bob:good good
+ +""".lstrip().replace('\n', '\r\n')) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_INCOMING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(12:18:08 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'how are you'), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_OUTGOING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(12:18:37 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'), + ], self.reader.results) + def test_parse_log_with_no_closing_body_and_html_tags(self): self.reader.feed(""" Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

@@ -53,6 +77,50 @@ def test_parse_log_with_no_closing_body_and_html_tags(self): (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'), ], self.reader.results) + def test_allows_br_in_message_body(self): + self.reader.feed(""" +Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)

+(3:33:44 PM) Bob:
PARKER
I saw you last week and you weren't showing at all. Tell me Steve, what trimester are you in?
+(3:34:13 PM) Bob:
STEVE
Well this baby's different Parker, because it's on a semester system
+ +""".lstrip()) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_OUTGOING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(3:33:44 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, + '\nPARKER\nI saw you last week and you weren\'t showing at all. Tell me Steve, what trimester are you in?' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_OUTGOING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(3:34:13 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, + '\nSTEVE\nWell this baby\'s different Parker, because it\'s on a semester system' + ), + ], self.reader.results) + + def test_allows_nbsp_in_message_body(self): + self.reader.feed(""" +Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)

+(12:47:11 PM) Alice:  yeah?
+ +""".lstrip()) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_INCOMING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(12:47:11 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, u'\xa0yeah?'), + ], self.reader.results) + def test_parse_log_ignores_special_message_types(self): self.reader.feed(""" Conversation with RemoteUser345 at 12/20/2006 6:05:37 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 6:05:37 PM on LocalUser123 (aim)

@@ -79,3 +147,57 @@ def test_parse_log_ignores_special_message_types(self): (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'), (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'wassup'), ], self.reader.results) + + def test_decodes_html_encoded_entities_in_message_contents(self): + self.reader.feed(""" +Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+ +""".lstrip()) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_INCOMING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(2:45:36 AM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Gabe:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, + u'we need a \'bigger fish to fry\' poster'), + ], self.reader.results) + + def test_decodes_html_encoded_entities_in_display_name(self): + self.reader.feed(""" +Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)

Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)

+(3:36:09 PM) Alice <AUTO-REPLY>: I am away from my computer right now.
+ +""".lstrip()) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_INCOMING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(3:36:09 PM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice :'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, + ' I am away from my computer right now.'), + ], self.reader.results) + + def test_decodes_html_encoded_char_refs_in_message_contents(self): + self.reader.feed(""" +Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+ +""".lstrip()) + self.assertEqual([ + (html_reader.RESULT_TYPE_TITLE, + 'Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)' + ), + (html_reader.RESULT_TYPE_MESSAGE_START, + html_reader.MESSAGE_DIRECTION_INCOMING), + (html_reader.RESULT_TYPE_TIMESTAMP, '(2:45:36 AM)'), + (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Gabe:'), + (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, + u'we need a \'bigger fish to fry\' poster'), + ], self.reader.results) diff --git a/tests/parsers/pidgin/test_parser.py b/tests/parsers/pidgin/test_parser.py new file mode 100644 index 0000000..5b033e7 --- /dev/null +++ b/tests/parsers/pidgin/test_parser.py @@ -0,0 +1,115 @@ +import datetime +import unittest + +from chat_unifier import models +from chat_unifier.parsers.pidgin import parser + + +class PidginParserTest(unittest.TestCase): + + def test_parse_log_with_one_simple_conversation(self): + self.assertEqual( + parser.Parser().parse(""" +Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

+(12:18:08 PM) Alice: how are you
+(12:18:37 PM) Bob:good good
+ +""".lstrip()), + models.History( + local_username='LocalUser123', + remote_username='RemoteUser345', + messages=[ + models.Message( + sender='RemoteUser345', + timestamp=datetime.datetime(2006, 12, 20, 12, 18, 8), + contents='how are you'), + models.Message( + sender='LocalUser123', + timestamp=datetime.datetime(2006, 12, 20, 12, 18, 37), + contents='good good') + ])) + + def test_parse_log_with_one_simple_conversation_starting_in_am(self): + self.assertEqual( + parser.Parser().parse(""" +Conversation with RemoteUser345 at 12/20/2006 12:18:08 AM on LocalUser123 (aim)

Conversation with RemoteUser345 at 12/20/2006 12:18:08 AM on LocalUser123 (aim)

+(12:18:08 AM) Alice: how are you
+(12:18:37 AM) Bob:good good
+ +""".lstrip()), + models.History( + local_username='LocalUser123', + remote_username='RemoteUser345', + messages=[ + models.Message( + sender='RemoteUser345', + timestamp=datetime.datetime(2006, 12, 20, 0, 18, 8), + contents='how are you'), + models.Message( + sender='LocalUser123', + timestamp=datetime.datetime(2006, 12, 20, 0, 18, 37), + contents='good good') + ])) + + def test_detects_when_timestamps_roll_over_to_next_day(self): + self.assertEqual( + parser.Parser().parse(""" +Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)

+(11:59:59 PM) Alice: get ready for midnight
+(12:00:02 AM) Alice: that was everything I dreamed
+ +""".lstrip()), + models.History( + local_username='LocalUser123', + remote_username='RemoteUser345', + messages=[ + models.Message( + sender='RemoteUser345', + timestamp=datetime.datetime(2006, 5, 15, 23, 59, 59), + contents='get ready for midnight'), + models.Message( + sender='RemoteUser345', + timestamp=datetime.datetime(2006, 5, 16, 0, 0, 2), + contents='that was everything I dreamed') + ])) + + def test_handles_message_timestamps_with_date(self): + self.assertEqual( + parser.Parser().parse(""" +Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)

Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)

+(5/15/2006 2:35:18 PM) Alice: hello there
+ +""".lstrip()), + models.History( + local_username='LocalUser123', + remote_username='RemoteUser345', + messages=[ + models.Message( + sender='RemoteUser345', + timestamp=datetime.datetime(2006, 5, 15, 14, 35, 18), + contents='hello there'), + ])) + + def test_handles_messages_with_many_contents_results(self): + self.assertEqual( + parser.Parser().parse(""" +Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)

+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+ +""".lstrip()), + models.History( + local_username='localuser567@example.com', + remote_username='remoteuser123@example.com', + messages=[ + models.Message( + sender='remoteuser123@example.com', + timestamp=datetime.datetime(2007, 4, 9, 2, 45, 36), + contents='we need a \'bigger fish to fry\' poster'), + ])) + + def test_raises_exception_when_title_has_unexpected_format(self): + with self.assertRaises(parser.InvalidMetadata): + parser.Parser().parse(""" +BADTITLE BADTITLE BADTITLE BADTITLE

Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)

+ +""".lstrip())