Skip to content

Commit

Permalink
Adding a file iterator for Pidgin/Gaim
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Oct 29, 2018
1 parent 358fd10 commit 8bef34d
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 7 deletions.
13 changes: 13 additions & 0 deletions chat_unifier/file_iterators/pidgin.py
@@ -0,0 +1,13 @@
import os


def iterate_files(directory):
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
if _is_log_file(filename):
yield os.path.join(root, filename)


def _is_log_file(filename):
_, extension = os.path.splitext(filename)
return extension == '.html'
32 changes: 25 additions & 7 deletions main.py
Expand Up @@ -8,6 +8,7 @@
from chat_unifier import json_serializer
from chat_unifier import history_merger
from chat_unifier.parsers.trillian_xml import parser as trillian_parser
from chat_unifier.file_iterators import pidgin as pidgin_iterator
from chat_unifier.file_iterators import trillian_xml as trillian_xml_iterator

logger = logging.getLogger(__name__)
Expand All @@ -28,23 +29,40 @@ def main(args):
configure_logging()
logger.info('Started runnning')
merger = history_merger.Merger()
for log_dir in args.trillian:
logger.info('Searching for logs in %s', log_dir)
for log_path in trillian_xml_iterator.iterate_files(log_dir):
with open(log_path) as log_handle:
parser = trillian_parser.Parser()
merger.add(parser.parse(log_handle.read()))
logger.info('Parsed %s', os.path.basename(log_path))
processors = [
(args.trillian, trillian_xml_iterator, trillian_parser.Parser()),
(args.pidgin, pidgin_iterator, None),
]
for dir_roots, file_iterator, log_parser in processors:
if dir_roots:
_process_log_dirs(dir_roots, file_iterator, log_parser, merger)
print json.dumps([h for h in merger],
indent=2,
sort_keys=True,
cls=json_serializer.Serializer)


def _process_log_dirs(dir_roots, file_iterator, log_parser, merger):
for dir_root in dir_roots:
_process_log_dir(dir_root, file_iterator, log_parser, merger)


def _process_log_dir(dir_root, file_iterator, log_parser, merger):
logger.info('Searching for logs in %s', dir_root)
for log_path in file_iterator.iterate_files(dir_root):
if not log_parser:
logger.info('Skipping %s', log_path)
continue
with open(log_path) as log_handle:
merger.add(parser.parse(log_handle.read()))
logger.info('Parsed %s', os.path.basename(log_path))


if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='Chat Unifier',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--trillian', action='append', help='Trillian XML log root')
parser.add_argument('--pidgin', action='append', help='Pidgin log root')
main(parser.parse_args())
33 changes: 33 additions & 0 deletions tests/file_iterators/test_pidgin.py
@@ -0,0 +1,33 @@
import os
import unittest

import mock

from chat_unifier.file_iterators import pidgin


class PidginFileIteratorTest(unittest.TestCase):

def setUp(self):
self.maxDiff = None

def test_picks_correct_log_files(self):
with mock.patch.object(os, 'walk') as mock_walk:
mock_walk.return_value = [
('/logs', ('aim',), ('README.txt',)),
('/logs/aim', ('LocalUser123',), ()),
('/log/aim/LocalUser123', ('RemoteUser345', 'RemoteUser456'),
()),
('/log/aim/LocalUser123/RemoteUser345', (),
('2007-02-24.020826-0500EST.html',
'2007-02-25.154550-0500EST.html')),
('/log/aim/LocalUser123/RemoteUser456', (),
('2006-11-19.195755-0500EST.html',
'2006-11-22.112333-0500EST.html')),
]
self.assertEqual([
'/log/aim/LocalUser123/RemoteUser345/2007-02-24.020826-0500EST.html',
'/log/aim/LocalUser123/RemoteUser345/2007-02-25.154550-0500EST.html',
'/log/aim/LocalUser123/RemoteUser456/2006-11-19.195755-0500EST.html',
'/log/aim/LocalUser123/RemoteUser456/2006-11-22.112333-0500EST.html',
], [f for f in pidgin.iterate_files('/logs')])

0 comments on commit 8bef34d

Please sign in to comment.