From e27c8dca90d0ce73de3d5c037afd74379801be8e Mon Sep 17 00:00:00 2001 From: Janneke van der Zwaan Date: Sat, 24 Sep 2016 13:52:30 +0200 Subject: [PATCH 1/3] For writing file diffs, try to read files with multiple encodings When calculating file diffs, we try to read files with both utf-8, and latin-1 encoding. These are the default encodings of python 3 and python 3. If the file can't be opened using one of those, a warning is issued. Refs #121 --- recipy/log.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/recipy/log.py b/recipy/log.py index 8177c59..e862eb7 100644 --- a/recipy/log.py +++ b/recipy/log.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import os import datetime import sys @@ -11,6 +12,7 @@ from tinydb import Query import difflib import warnings +import codecs from recipyCommon.version_control import add_git_info, add_svn_info, hash_file from recipyCommon.config import option_set, get_db_path @@ -293,17 +295,41 @@ def output_file_diffs(): if not option_set('data', 'file_diff_outputs'): return - db = open_or_create_db() - diffs_table = db.table('filediffs') - diffs = diffs_table.search(Query().run_id == RUN_ID) + encodings = ['utf-8', 'latin-1'] + + with open_or_create_db() as db: + diffs_table = db.table('filediffs') + diffs = diffs_table.search(Query().run_id == RUN_ID) + for item in diffs: - diff = difflib.unified_diff(open(item['tempfilename']).readlines(), - open(item['filename']).readlines(), - fromfile='before this run', - tofile='after this run') - diffs_table.update({'diff': ''.join([l for l in diff])}, - eids=[item.eid]) + lines1 = None + lines2 = None + for enc in encodings: + try: + with codecs.open(item['tempfilename'], encoding=enc) as f: + lines1 = f.readlines() + except UnicodeDecodeError: + pass + + try: + with codecs.open(item['filename'], encoding=enc) as f: + lines2 = f.readlines() + except UnicodeDecodeError: + pass + + if lines1 is not None and lines2 is not None: + diff = difflib.unified_diff(lines1, + lines2, + fromfile='before this run', + tofile='after this run') + with open_or_create_db() as db: + diffs_table.update({'diff': ''.join([l for l in diff])}, + eids=[item.eid]) + else: + msg = ('Unable to read file "{}" using supported encodings ({}). ' + 'To be able to store file diffs, use one of the supported ' + 'encodings to write the output file.') + warnings.warn(msg.format(item['filename'], ', '.join(encodings))) # delete temporary file os.remove(item['tempfilename']) - db.close() From c21cf76d580ae46b77d91ed99064645873ebf5b9 Mon Sep 17 00:00:00 2001 From: Janneke van der Zwaan Date: Sat, 24 Sep 2016 14:18:21 +0200 Subject: [PATCH 2/3] Only calculate file diffs for non-binary files --- recipy/log.py | 4 +++- requirements.txt | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/recipy/log.py b/recipy/log.py index e862eb7..6a186b7 100644 --- a/recipy/log.py +++ b/recipy/log.py @@ -13,6 +13,7 @@ import difflib import warnings import codecs +from binaryornot.check import is_binary from recipyCommon.version_control import add_git_info, add_svn_info, hash_file from recipyCommon.config import option_set, get_db_path @@ -171,7 +172,8 @@ def log_output(filename, source): db = open_or_create_db() - if option_set('data', 'file_diff_outputs') and os.path.isfile(filename): + if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \ + and not is_binary(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) diff --git a/requirements.txt b/requirements.txt index 48c292a..55bb26b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ flask-testing blinker six colorama +binaryornot From 45e430e936a927e5f7a14824fdd17afbc383b872 Mon Sep 17 00:00:00 2001 From: Janneke van der Zwaan Date: Sat, 24 Sep 2016 14:22:16 +0200 Subject: [PATCH 3/3] Add debug message for storing file diffs --- recipy/log.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recipy/log.py b/recipy/log.py index 6a186b7..80ba092 100644 --- a/recipy/log.py +++ b/recipy/log.py @@ -304,6 +304,9 @@ def output_file_diffs(): diffs = diffs_table.search(Query().run_id == RUN_ID) for item in diffs: + if option_set('general', 'debug'): + print('Storing file diff for "%s"' % item['filename']) + lines1 = None lines2 = None for enc in encodings: