Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly handle latin-1 encoding in file diffs #48934

Merged
merged 12 commits into from
Aug 7, 2018
24 changes: 7 additions & 17 deletions salt/modules/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

# Import python libs
import datetime
import difflib
import errno
import fnmatch
import io
Expand Down Expand Up @@ -1567,7 +1566,7 @@ def comment_line(path,
check_perms(path, None, pre_user, pre_group, pre_mode)

# Return a diff using the two dictionaries
return ''.join(difflib.unified_diff(orig_file, new_file))
return __utils__['stringutils.get_diff'](orig_file, new_file)


def _get_flags(flags):
Expand Down Expand Up @@ -2038,9 +2037,7 @@ def line(path, content=None, match=None, mode=None, location=None,
if show_changes:
with salt.utils.files.fopen(path, 'r') as fp_:
path_content = salt.utils.data.decode_list(fp_.read().splitlines(True))
changes_diff = ''.join(difflib.unified_diff(
path_content, body
))
changes_diff = __utils__['stringutils.get_diff'](path_content, body)
if __opts__['test'] is False:
fh_ = None
try:
Expand Down Expand Up @@ -2426,18 +2423,15 @@ def replace(path,
if not dry_run and not salt.utils.platform.is_windows():
check_perms(path, None, pre_user, pre_group, pre_mode)

def get_changes():
orig_file_as_str = [salt.utils.stringutils.to_unicode(x) for x in orig_file]
new_file_as_str = [salt.utils.stringutils.to_unicode(x) for x in new_file]
return ''.join(difflib.unified_diff(orig_file_as_str, new_file_as_str))
differences = __utils__['stringutils.get_diff'](orig_file, new_file)

if show_changes:
return get_changes()
return differences

# We may have found a regex line match but don't need to change the line
# (for situations where the pattern also matches the repl). Revert the
# has_changes flag to False if the final result is unchanged.
if not get_changes():
if not differences:
has_changes = False

return has_changes
Expand Down Expand Up @@ -2688,7 +2682,7 @@ def _add_content(linesep, lines=None, include_marker_start=True,
)

if block_found:
diff = ''.join(difflib.unified_diff(orig_file, new_file))
diff = __utils__['stringutils.get_diff'](orig_file, new_file)
has_changes = diff is not ''
if has_changes and not dry_run:
# changes detected
Expand Down Expand Up @@ -5018,11 +5012,7 @@ def get_diff(file1,
else:
if show_filenames:
args.extend(files)
ret = ''.join(
difflib.unified_diff(
*salt.utils.data.decode(args)
)
)
ret = __utils__['stringutils.get_diff'](*args)
return ret
return ''

Expand Down
118 changes: 86 additions & 32 deletions salt/utils/stringutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Import Python libs
from __future__ import absolute_import, print_function, unicode_literals
import base64
import difflib
import errno
import fnmatch
import logging
Expand All @@ -31,21 +32,32 @@ def to_bytes(s, encoding=None, errors='strict'):
Given bytes, bytearray, str, or unicode (python 2), return bytes (str for
python 2)
'''
if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)

if not encoding:
raise ValueError('encoding cannot be empty')

exc = None
if six.PY3:
if isinstance(s, bytes):
return s
if isinstance(s, bytearray):
return bytes(s)
if isinstance(s, six.string_types):
if encoding:
return s.encode(encoding, errors)
else:
for enc in encoding:
try:
# Try UTF-8 first
return s.encode('utf-8', errors)
except UnicodeEncodeError:
# Fall back to detected encoding
return s.encode(__salt_system_encoding__, errors)
return s.encode(enc, errors)
except UnicodeEncodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeEncodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected bytes, bytearray, or str')
else:
return to_str(s, encoding, errors)
Expand All @@ -61,35 +73,48 @@ def _normalize(s):
except TypeError:
return s

if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)

if not encoding:
raise ValueError('encoding cannot be empty')

# This shouldn't be six.string_types because if we're on PY2 and we already
# have a string, we should just return it.
if isinstance(s, str):
return _normalize(s)

exc = None
if six.PY3:
if isinstance(s, (bytes, bytearray)):
if encoding:
return _normalize(s.decode(encoding, errors))
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s.decode('utf-8', errors))
except UnicodeDecodeError:
# Fall back to detected encoding
return _normalize(s.decode(__salt_system_encoding__, errors))
return _normalize(s.decode(enc, errors))
except UnicodeDecodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str, bytes, or bytearray not {}'.format(type(s)))
else:
if isinstance(s, bytearray):
return str(s) # future lint: disable=blacklisted-function
if isinstance(s, unicode): # pylint: disable=incompatible-py3-code,undefined-variable
if encoding:
return _normalize(s).encode(encoding, errors)
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s).encode('utf-8', errors)
except UnicodeEncodeError:
# Fall back to detected encoding
return _normalize(s).encode(__salt_system_encoding__, errors)
return _normalize(s).encode(enc, errors)
except UnicodeEncodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str, bytearray, or unicode')


Expand All @@ -100,6 +125,16 @@ def to_unicode(s, encoding=None, errors='strict', normalize=False):
def _normalize(s):
return unicodedata.normalize('NFC', s) if normalize else s

if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)

if not encoding:
raise ValueError('encoding cannot be empty')

exc = None
if six.PY3:
if isinstance(s, str):
return _normalize(s)
Expand All @@ -113,15 +148,16 @@ def _normalize(s):
if isinstance(s, unicode): # pylint: disable=incompatible-py3-code
return _normalize(s)
elif isinstance(s, (str, bytearray)):
if encoding:
return _normalize(s.decode(encoding, errors))
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s.decode('utf-8', errors))
except UnicodeDecodeError:
# Fall back to detected encoding
return _normalize(s.decode(__salt_system_encoding__, errors))
return _normalize(s.decode(enc, errors))
except UnicodeDecodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str or bytearray')


Expand Down Expand Up @@ -513,3 +549,21 @@ def get_context(template, line, num_lines=5, marker=None):
buf[error_line_in_context] += marker

return '---\n{0}\n---'.format('\n'.join(buf))


def get_diff(a, b, *args, **kwargs):
'''
Perform diff on two iterables containing lines from two files, and return
the diff as as string. Lines are normalized to str types to avoid issues
with unicode on PY2.
'''
encoding = ('utf-8', 'latin-1', __salt_system_encoding__)
# Late import to avoid circular import
import salt.utils.data
return ''.join(
difflib.unified_diff(
salt.utils.data.decode_list(a, encoding=encoding),
salt.utils.data.decode_list(b, encoding=encoding),
*args, **kwargs
)
)
5 changes: 5 additions & 0 deletions tests/integration/files/file/base/issue-48777/new.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<html>
<body>
r�ksm�rg�s
</body>
</html>
4 changes: 4 additions & 0 deletions tests/integration/files/file/base/issue-48777/old.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<html>
<body>
</body>
</html>
Loading