diff --git a/chat_unifier/file_iterators/pidgin.py b/chat_unifier/file_iterators/pidgin.py
index ff4d0ee..3dd69d8 100644
--- a/chat_unifier/file_iterators/pidgin.py
+++ b/chat_unifier/file_iterators/pidgin.py
@@ -1,13 +1,25 @@
import os
+_IGNORED_MEDIA = ['irc']
+
def iterate_files(directory):
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
if _is_log_file(filename):
- yield os.path.join(root, filename)
+ log_path = os.path.join(root, filename)
+ if _get_log_medium(log_path) in _IGNORED_MEDIA:
+ continue
+ yield log_path
def _is_log_file(filename):
_, extension = os.path.splitext(filename)
return extension == '.html'
+
+
+def _get_log_medium(log_path):
+ path_parts = log_path.split(os.path.sep)
+ if len(path_parts) < 4:
+ return None
+ return path_parts[-4]
diff --git a/chat_unifier/parsers/pidgin/html_reader.py b/chat_unifier/parsers/pidgin/html_reader.py
index 26e07cc..e8a824d 100644
--- a/chat_unifier/parsers/pidgin/html_reader.py
+++ b/chat_unifier/parsers/pidgin/html_reader.py
@@ -60,11 +60,15 @@ def __init__(self):
def results(self):
return self._results
+ def feed(self, html):
+ html_annotated = _annotate_html(html)
+ HTMLParser.feed(self, html_annotated)
+
def handle_starttag(self, tag, attrs):
attrs_dict = dict(attrs)
if tag == 'title':
self._update_state(_STATE_PARSING_TITLE)
- if ((self._state == _STATE_SEEKING_NEXT_MESSAGE) and (tag == 'font')):
+ elif ((self._state == _STATE_SEEKING_NEXT_MESSAGE) and (tag == 'font')):
if 'color' in attrs_dict:
font_color = attrs_dict['color']
if _is_local_user_font_color(font_color):
@@ -94,6 +98,9 @@ def handle_endtag(self, tag):
def handle_startendtag(self, tag, attrs):
if ((self._state == _STATE_PARSING_CONTENTS) and (tag == 'br')):
+ self._add_message_contents('\n')
+ elif ((self._state == _STATE_PARSING_CONTENTS) and
+ (tag == 'message-end')):
self._update_state(_STATE_SEEKING_NEXT_MESSAGE)
def handle_data(self, data):
@@ -106,22 +113,59 @@ def handle_data(self, data):
elif self._state == _STATE_PARSING_CONTENTS:
if not data.strip():
return
- self._add_message_contents(data)
+ self._add_message_contents(data.decode('utf8'))
+
+ def handle_entityref(self, name):
+ decoded = _decode_html_entity_ref(name)
+ if self._state == _STATE_PARSING_CONTENTS:
+ self._add_message_contents(decoded)
+ elif self._state == _STATE_PARSING_DISPLAY_NAME:
+ self._add_display_name(decoded)
+
+ def handle_charref(self, name):
+ decoded = _decode_html_char_ref(name)
+ if self._state == _STATE_PARSING_CONTENTS:
+ self._add_message_contents(decoded)
+ elif self._state == _STATE_PARSING_DISPLAY_NAME:
+ self._add_display_name(decoded)
def _add_title(self, title):
- self.results.append((RESULT_TYPE_TITLE, title))
+ self._results.append((RESULT_TYPE_TITLE, title))
def _add_message_start(self, message_type):
- self.results.append((RESULT_TYPE_MESSAGE_START, message_type))
+ self._results.append((RESULT_TYPE_MESSAGE_START, message_type))
def _add_timestamp(self, timestamp):
- self.results.append((RESULT_TYPE_TIMESTAMP, timestamp))
+ self._results.append((RESULT_TYPE_TIMESTAMP, timestamp))
def _add_display_name(self, display_name):
- self.results.append((RESULT_TYPE_DISPLAY_NAME, display_name))
+ self._append_or_coalesce_result(RESULT_TYPE_DISPLAY_NAME, display_name)
def _add_message_contents(self, message_contents):
- self.results.append((RESULT_TYPE_MESSAGE_CONTENTS, message_contents))
+ self._append_or_coalesce_result(RESULT_TYPE_MESSAGE_CONTENTS,
+ message_contents)
+
+ def _append_or_coalesce_result(self, result_type, result_value):
+ if self._results:
+ last_result_type, last_result_value = self._results[-1]
+ if last_result_type == result_type:
+ self._results.pop()
+ result_value = last_result_value + result_value
+ self._results.append((result_type, result_value))
def _update_state(self, new_state):
self._state = new_state
+
+
+def _annotate_html(html):
+ # We need to specially mark line-terminating
tags otherwise there's
+ # ambiguity in where the message ends (
can appear within messages).
+ return html.replace('\r\n', '\n').replace('
\n', '\n')
+
+
+def _decode_html_entity_ref(entity_ref):
+ return HTMLParser().unescape('&' + entity_ref + ';')
+
+
+def _decode_html_char_ref(entity_ref):
+ return HTMLParser().unescape('' + entity_ref + ';')
diff --git a/chat_unifier/parsers/pidgin/parser.py b/chat_unifier/parsers/pidgin/parser.py
new file mode 100644
index 0000000..964aa8b
--- /dev/null
+++ b/chat_unifier/parsers/pidgin/parser.py
@@ -0,0 +1,154 @@
+from __future__ import absolute_import
+
+import datetime
+import re
+
+from chat_unifier import models
+from chat_unifier.parsers.pidgin import html_reader
+
+_TITLE_PATTERN = re.compile(
+ r'^Conversation with (?P.+) at (?P\d{1,2}/\d{1,2}/\d{4}) (?P\d{1,2}:\d{1,2}:\d{1,2}) (?P[AP]M) on (?P.+) \((?P.+)\)$'
+)
+
+
+class Error(Exception):
+ pass
+
+
+class UnexpectedResultType(Error):
+ pass
+
+
+class UnexpectedMessageDirection(Error):
+ pass
+
+
+class InvalidMetadata(Error):
+ pass
+
+
+class Parser(object):
+
+ def parse(self, log_contents):
+ reader = html_reader.Reader()
+ reader.feed(log_contents)
+ converter = _ResultsToHistoryConverter(reader.results)
+ return converter.convert()
+
+
+class _ResultsToHistoryConverter(object):
+
+ def __init__(self, results):
+ self._results = results
+ self._metadata = None
+ self._last_timestamp = None
+
+ def convert(self):
+ self._process_metadata()
+ return models.History(
+ local_username=self._metadata['local_username'],
+ remote_username=self._metadata['remote_username'],
+ messages=self._process_messages())
+
+ def _process_metadata(self):
+ title = self._pop_result_with_type(html_reader.RESULT_TYPE_TITLE)
+ self._metadata = _metadata_from_title(title)
+ self._last_timestamp = self._metadata['start_timestamp']
+
+ def _process_messages(self):
+ messages = []
+ while self._results:
+ messages.append(self._process_next_message())
+ return messages
+
+ def _process_next_message(self):
+ message_direction = self._pop_result_with_type(
+ html_reader.RESULT_TYPE_MESSAGE_START)
+ timestamp_raw = self._pop_result_with_type(
+ html_reader.RESULT_TYPE_TIMESTAMP)
+ # TODO(mtlynch): Save the display name.
+ self._pop_result_with_type(html_reader.RESULT_TYPE_DISPLAY_NAME)
+ contents = self._pop_result_with_type(
+ html_reader.RESULT_TYPE_MESSAGE_CONTENTS)
+
+ return models.Message(
+ sender=self._sender_from_message_direction(message_direction),
+ timestamp=self._parse_message_timestamp(timestamp_raw),
+ contents=contents)
+
+ def _sender_from_message_direction(self, message_direction):
+ if message_direction == html_reader.MESSAGE_DIRECTION_OUTGOING:
+ return self._metadata['local_username']
+ elif message_direction == html_reader.MESSAGE_DIRECTION_INCOMING:
+ return self._metadata['remote_username']
+ else:
+ raise UnexpectedMessageDirection(
+ 'Unrecognized message direction: %s' % message_direction)
+
+ def _parse_message_timestamp(self, time_string):
+ # Strip parens from timestamp.
+ time_string = time_string[1:-1]
+ if _timestamp_includes_date(time_string):
+ timestamp = datetime.datetime.strptime(time_string,
+ '%m/%d/%Y %I:%M:%S %p')
+ else:
+ datetime_string = (
+ self._last_timestamp.strftime('%m/%d/%Y') + ' ' + time_string)
+ timestamp = datetime.datetime.strptime(datetime_string,
+ '%m/%d/%Y %I:%M:%S %p')
+ if self._timestamp_rolled_over_to_next_day(timestamp):
+ timestamp += datetime.timedelta(days=1)
+
+ self._last_timestamp = timestamp
+ return timestamp
+
+ def _timestamp_rolled_over_to_next_day(self, timestamp):
+ return timestamp < self._last_timestamp
+
+ def _peek_next_result_type(self):
+ if not self._results:
+ return None
+ result_type, _ = self._results[0]
+ return result_type
+
+ def _pop_result_with_type(self, result_type_expected):
+ result_type, result_value = self._results.pop(0)
+ if result_type != result_type_expected:
+ raise UnexpectedResultType(
+ 'Expected result type %s, but got %s:%s' %
+ (result_type_expected, result_type, result_value))
+ return result_value
+
+
+def _metadata_from_title(title):
+ match = _TITLE_PATTERN.match(title)
+ if not match:
+ raise InvalidMetadata('Unexpected metadata format: %s' % title)
+ local_username = _strip_username_suffix(match.group('local_username'))
+ return {
+ 'local_username':
+ local_username,
+ 'remote_username':
+ match.group('remote_username'),
+ 'medium':
+ match.group('medium'),
+ 'start_timestamp':
+ _parse_timestamp_parts(
+ match.group('start_date'), match.group('start_time'),
+ match.group('am_pm')),
+ }
+
+
+def _strip_username_suffix(username):
+ if '/' in username:
+ return username.split('/')[0]
+ return username
+
+
+def _timestamp_includes_date(timestamp):
+ return '/' in timestamp
+
+
+def _parse_timestamp_parts(date_string, time_string, am_pm):
+ timestamp_string = '%s %s %s' % (date_string, time_string, am_pm)
+ return datetime.datetime.strptime(timestamp_string, '%m/%d/%Y %I:%M:%S %p')
diff --git a/main.py b/main.py
index d2b4967..0277ef2 100755
--- a/main.py
+++ b/main.py
@@ -7,6 +7,7 @@
from chat_unifier import json_serializer
from chat_unifier import history_merger
+from chat_unifier.parsers.pidgin import parser as pidgin_parser
from chat_unifier.parsers.trillian_xml import parser as trillian_parser
from chat_unifier.file_iterators import pidgin as pidgin_iterator
from chat_unifier.file_iterators import trillian_xml as trillian_xml_iterator
@@ -31,7 +32,7 @@ def main(args):
merger = history_merger.Merger()
processors = [
(args.trillian, trillian_xml_iterator, trillian_parser.Parser()),
- (args.pidgin, pidgin_iterator, None),
+ (args.pidgin, pidgin_iterator, pidgin_parser.Parser()),
]
for dir_roots, file_iterator, log_parser in processors:
if dir_roots:
@@ -50,11 +51,12 @@ def _process_log_dirs(dir_roots, file_iterator, log_parser, merger):
def _process_log_dir(dir_root, file_iterator, log_parser, merger):
logger.info('Searching for logs in %s', dir_root)
for log_path in file_iterator.iterate_files(dir_root):
- if not log_parser:
- logger.info('Skipping %s', log_path)
- continue
+ logger.info('Parsing %s', log_path)
with open(log_path) as log_handle:
- merger.add(parser.parse(log_handle.read()))
+ try:
+ merger.add(log_parser.parse(log_handle.read()))
+ except Exception as ex:
+ logger.error('Failed to parse: %s', ex.message)
logger.info('Parsed %s', os.path.basename(log_path))
diff --git a/tests/file_iterators/test_pidgin.py b/tests/file_iterators/test_pidgin.py
index 9af8b2f..7389b6c 100644
--- a/tests/file_iterators/test_pidgin.py
+++ b/tests/file_iterators/test_pidgin.py
@@ -31,3 +31,23 @@ def test_picks_correct_log_files(self):
'/log/aim/LocalUser123/RemoteUser456/2006-11-19.195755-0500EST.html',
'/log/aim/LocalUser123/RemoteUser456/2006-11-22.112333-0500EST.html',
], [f for f in pidgin.iterate_files('/logs')])
+
+ def test_ignores_irc_log_files(self):
+ with mock.patch.object(os, 'walk') as mock_walk:
+ mock_walk.return_value = [
+ ('/logs', ('aim', 'irc'), ('README.txt',)),
+ ('/logs/aim', ('LocalUser123',), ()),
+ ('/log/aim/LocalUser123', ('RemoteUser345',), ()),
+ ('/log/aim/LocalUser123/RemoteUser345', (),
+ ('2007-02-24.020826-0500EST.html',
+ '2007-02-25.154550-0500EST.html')),
+ ('/log/irc', ('localuser123@irc.freenode.net',), ()),
+ ('/log/irc/localuser123@irc.freenode.net', ('#dummy.chat',),
+ ()),
+ ('/log/irc/localuser123@irc.freenode.net/#dummy.chat', (),
+ ('2006-06-21.200806-0400EST.html',)),
+ ]
+ self.assertEqual([
+ '/log/aim/LocalUser123/RemoteUser345/2007-02-24.020826-0500EST.html',
+ '/log/aim/LocalUser123/RemoteUser345/2007-02-25.154550-0500EST.html',
+ ], [f for f in pidgin.iterate_files('/logs')])
diff --git a/tests/parsers/pidgin/test_html_reader.py b/tests/parsers/pidgin/test_html_reader.py
index a3d7431..94b8a3b 100644
--- a/tests/parsers/pidgin/test_html_reader.py
+++ b/tests/parsers/pidgin/test_html_reader.py
@@ -6,6 +6,7 @@
class HtmlReaderTest(unittest.TestCase):
def setUp(self):
+ self.maxDiff = None
self.reader = html_reader.Reader()
def test_parse_log_with_simple_conversation(self):
@@ -31,6 +32,29 @@ def test_parse_log_with_simple_conversation(self):
(html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'),
], self.reader.results)
+ def test_parse_log_with_simple_conversation_with_cr_lf_line_endings(self):
+ self.reader.feed("""
+Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)
+(12:18:08 PM) Alice: how are you
+(12:18:37 PM) Bob:good good
+
+""".lstrip().replace('\n', '\r\n'))
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_INCOMING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(12:18:08 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'how are you'),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_OUTGOING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(12:18:37 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'),
+ ], self.reader.results)
+
def test_parse_log_with_no_closing_body_and_html_tags(self):
self.reader.feed("""
Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)
@@ -53,6 +77,50 @@ def test_parse_log_with_no_closing_body_and_html_tags(self):
(html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'good good'),
], self.reader.results)
+ def test_allows_br_in_message_body(self):
+ self.reader.feed("""
+Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)
+(3:33:44 PM) Bob:
PARKER
I saw you last week and you weren't showing at all. Tell me Steve, what trimester are you in?
+(3:34:13 PM) Bob:
STEVE
Well this baby's different Parker, because it's on a semester system
+
+""".lstrip())
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_OUTGOING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(3:33:44 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS,
+ '\nPARKER\nI saw you last week and you weren\'t showing at all. Tell me Steve, what trimester are you in?'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_OUTGOING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(3:34:13 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Bob:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS,
+ '\nSTEVE\nWell this baby\'s different Parker, because it\'s on a semester system'
+ ),
+ ], self.reader.results)
+
+ def test_allows_nbsp_in_message_body(self):
+ self.reader.feed("""
+Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)
+(12:47:11 PM) Alice: yeah?
+
+""".lstrip())
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with RemoteUser345 at 12/20/2006 3:33:44 PM on LocalUser123 (aim)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_INCOMING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(12:47:11 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS, u'\xa0yeah?'),
+ ], self.reader.results)
+
def test_parse_log_ignores_special_message_types(self):
self.reader.feed("""
Conversation with RemoteUser345 at 12/20/2006 6:05:37 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 6:05:37 PM on LocalUser123 (aim)
@@ -79,3 +147,57 @@ def test_parse_log_ignores_special_message_types(self):
(html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice:'),
(html_reader.RESULT_TYPE_MESSAGE_CONTENTS, 'wassup'),
], self.reader.results)
+
+ def test_decodes_html_encoded_entities_in_message_contents(self):
+ self.reader.feed("""
+Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)
+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+
+""".lstrip())
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_INCOMING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(2:45:36 AM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Gabe:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS,
+ u'we need a \'bigger fish to fry\' poster'),
+ ], self.reader.results)
+
+ def test_decodes_html_encoded_entities_in_display_name(self):
+ self.reader.feed("""
+Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)
+(3:36:09 PM) Alice <AUTO-REPLY>: I am away from my computer right now.
+
+""".lstrip())
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with RemoteUser987 at 7/4/2006 3:36:09 PM on LocalUser123 (aim)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_INCOMING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(3:36:09 PM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Alice :'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS,
+ ' I am away from my computer right now.'),
+ ], self.reader.results)
+
+ def test_decodes_html_encoded_char_refs_in_message_contents(self):
+ self.reader.feed("""
+Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)
+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+
+""".lstrip())
+ self.assertEqual([
+ (html_reader.RESULT_TYPE_TITLE,
+ 'Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)'
+ ),
+ (html_reader.RESULT_TYPE_MESSAGE_START,
+ html_reader.MESSAGE_DIRECTION_INCOMING),
+ (html_reader.RESULT_TYPE_TIMESTAMP, '(2:45:36 AM)'),
+ (html_reader.RESULT_TYPE_DISPLAY_NAME, 'Gabe:'),
+ (html_reader.RESULT_TYPE_MESSAGE_CONTENTS,
+ u'we need a \'bigger fish to fry\' poster'),
+ ], self.reader.results)
diff --git a/tests/parsers/pidgin/test_parser.py b/tests/parsers/pidgin/test_parser.py
new file mode 100644
index 0000000..5b033e7
--- /dev/null
+++ b/tests/parsers/pidgin/test_parser.py
@@ -0,0 +1,115 @@
+import datetime
+import unittest
+
+from chat_unifier import models
+from chat_unifier.parsers.pidgin import parser
+
+
+class PidginParserTest(unittest.TestCase):
+
+ def test_parse_log_with_one_simple_conversation(self):
+ self.assertEqual(
+ parser.Parser().parse("""
+Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)
+(12:18:08 PM) Alice: how are you
+(12:18:37 PM) Bob:good good
+
+""".lstrip()),
+ models.History(
+ local_username='LocalUser123',
+ remote_username='RemoteUser345',
+ messages=[
+ models.Message(
+ sender='RemoteUser345',
+ timestamp=datetime.datetime(2006, 12, 20, 12, 18, 8),
+ contents='how are you'),
+ models.Message(
+ sender='LocalUser123',
+ timestamp=datetime.datetime(2006, 12, 20, 12, 18, 37),
+ contents='good good')
+ ]))
+
+ def test_parse_log_with_one_simple_conversation_starting_in_am(self):
+ self.assertEqual(
+ parser.Parser().parse("""
+Conversation with RemoteUser345 at 12/20/2006 12:18:08 AM on LocalUser123 (aim)Conversation with RemoteUser345 at 12/20/2006 12:18:08 AM on LocalUser123 (aim)
+(12:18:08 AM) Alice: how are you
+(12:18:37 AM) Bob:good good
+
+""".lstrip()),
+ models.History(
+ local_username='LocalUser123',
+ remote_username='RemoteUser345',
+ messages=[
+ models.Message(
+ sender='RemoteUser345',
+ timestamp=datetime.datetime(2006, 12, 20, 0, 18, 8),
+ contents='how are you'),
+ models.Message(
+ sender='LocalUser123',
+ timestamp=datetime.datetime(2006, 12, 20, 0, 18, 37),
+ contents='good good')
+ ]))
+
+ def test_detects_when_timestamps_roll_over_to_next_day(self):
+ self.assertEqual(
+ parser.Parser().parse("""
+Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)
+(11:59:59 PM) Alice: get ready for midnight
+(12:00:02 AM) Alice: that was everything I dreamed
+
+""".lstrip()),
+ models.History(
+ local_username='LocalUser123',
+ remote_username='RemoteUser345',
+ messages=[
+ models.Message(
+ sender='RemoteUser345',
+ timestamp=datetime.datetime(2006, 5, 15, 23, 59, 59),
+ contents='get ready for midnight'),
+ models.Message(
+ sender='RemoteUser345',
+ timestamp=datetime.datetime(2006, 5, 16, 0, 0, 2),
+ contents='that was everything I dreamed')
+ ]))
+
+ def test_handles_message_timestamps_with_date(self):
+ self.assertEqual(
+ parser.Parser().parse("""
+Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)Conversation with RemoteUser345 at 5/15/2006 11:59:59 PM on LocalUser123 (aim)
+(5/15/2006 2:35:18 PM) Alice: hello there
+
+""".lstrip()),
+ models.History(
+ local_username='LocalUser123',
+ remote_username='RemoteUser345',
+ messages=[
+ models.Message(
+ sender='RemoteUser345',
+ timestamp=datetime.datetime(2006, 5, 15, 14, 35, 18),
+ contents='hello there'),
+ ]))
+
+ def test_handles_messages_with_many_contents_results(self):
+ self.assertEqual(
+ parser.Parser().parse("""
+Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)Conversation with remoteuser123@example.com at 4/9/2007 2:45:36 AM on localuser567@example.com/Home (jabber)
+(2:45:36 AM) Gabe: we need a 'bigger fish to fry' poster
+
+""".lstrip()),
+ models.History(
+ local_username='localuser567@example.com',
+ remote_username='remoteuser123@example.com',
+ messages=[
+ models.Message(
+ sender='remoteuser123@example.com',
+ timestamp=datetime.datetime(2007, 4, 9, 2, 45, 36),
+ contents='we need a \'bigger fish to fry\' poster'),
+ ]))
+
+ def test_raises_exception_when_title_has_unexpected_format(self):
+ with self.assertRaises(parser.InvalidMetadata):
+ parser.Parser().parse("""
+BADTITLE BADTITLE BADTITLE BADTITLEConversation with RemoteUser345 at 12/20/2006 12:18:08 PM on LocalUser123 (aim)
+
+""".lstrip())