Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unspecified-encoding checker #3826 #4753

Merged
merged 6 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -521,3 +521,5 @@ contributors:
* Yilei Yang: contributor

* Marcin Kurczewski (rr-): contributor

* Daniel van Noord (DanielNoord): contributor
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Release date: TBA
..
Put new features here and also in 'doc/whatsnew/2.10.rst'

* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding
DanielNoord marked this conversation as resolved.
Show resolved Hide resolved

Closes #3826


What's New in Pylint 2.9.6?
===========================
Expand Down
3 changes: 3 additions & 0 deletions doc/whatsnew/2.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ Summary -- Release highlights
New checkers
============

* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding

Closes #3826
cdce8p marked this conversation as resolved.
Show resolved Hide resolved


Other Changes
Expand Down
2 changes: 1 addition & 1 deletion pylint/checkers/similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def Run(argv=None):
min_lines, ignore_comments, ignore_docstrings, ignore_imports, ignore_signatures
)
for filename in args:
with open(filename) as stream:
with open(filename, encoding="utf-8") as stream:
sim.append_stream(filename, stream)
sim.run()
sys.exit(0)
Expand Down
2 changes: 1 addition & 1 deletion pylint/checkers/spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def open(self):
dict_name, self.config.spelling_private_dict_file
)
self.private_dict_file = open( # pylint: disable=consider-using-with
self.config.spelling_private_dict_file, "a"
self.config.spelling_private_dict_file, "a", encoding="utf-8"
)
else:
self.spelling_dict = enchant.Dict(dict_name)
Expand Down
55 changes: 52 additions & 3 deletions pylint/checkers/stdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com>
# Copyright (c) 2021 Matus Valo <matusvalo@users.noreply.github.com>
# Copyright (c) 2021 victor <16359131+jiajunsu@users.noreply.github.com>
# Copyright (c) 2021 Daniel van Noord <13665637+DanielNoord@users.noreply.github.com>

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
Expand All @@ -44,12 +45,13 @@
from pylint.checkers import BaseChecker, DeprecatedMixin, utils
from pylint.interfaces import IAstroidChecker

OPEN_FILES = {"open", "file"}
OPEN_FILES_MODE = ("open", "file")
OPEN_FILES_ENCODING = ("open",)
UNITTEST_CASE = "unittest.case"
THREADING_THREAD = "threading.Thread"
COPY_COPY = "copy.copy"
OS_ENVIRON = "os._Environ"
ENV_GETTERS = {"os.getenv"}
ENV_GETTERS = ("os.getenv",)
SUBPROCESS_POPEN = "subprocess.Popen"
SUBPROCESS_RUN = "subprocess.run"
OPEN_MODULE = "_io"
Expand Down Expand Up @@ -425,6 +427,13 @@ class StdlibChecker(DeprecatedMixin, BaseChecker):
"deprecated-decorator",
"The decorator is marked as deprecated and will be removed in the future.",
),
"W1514": (
"Using open without explicitly specifying an encoding",
"unspecified-encoding",
"It is better to specify an encoding when opening documents. "
"Using the system default implicitly can create problems on other operating systems. "
"See https://www.python.org/dev/peps/pep-0597/",
),
}

def __init__(self, linter=None):
Expand Down Expand Up @@ -485,6 +494,7 @@ def _check_shallow_copy_environ(self, node):
"subprocess-popen-preexec-fn",
"subprocess-run-check",
"deprecated-class",
"unspecified-encoding",
)
def visit_call(self, node):
"""Visit a Call node."""
Expand All @@ -494,8 +504,19 @@ def visit_call(self, node):
if inferred is astroid.Uninferable:
continue
if inferred.root().name == OPEN_MODULE:
if getattr(node.func, "name", None) in OPEN_FILES:
if (
isinstance(node.func, astroid.Name)
and node.func.name in OPEN_FILES_MODE
):
self._check_open_mode(node)
if (
isinstance(node.func, astroid.Name)
and node.func.name in OPEN_FILES_ENCODING
) or (
isinstance(node.func, astroid.Attribute)
Pierre-Sassoulas marked this conversation as resolved.
Show resolved Hide resolved
and node.func.attrname in OPEN_FILES_ENCODING
):
self._check_open_encoded(node)
elif inferred.root().name == UNITTEST_CASE:
self._check_redundant_assert(node, inferred)
elif isinstance(inferred, astroid.ClassDef):
Expand Down Expand Up @@ -573,6 +594,34 @@ def _check_open_mode(self, node):
):
self.add_message("bad-open-mode", node=node, args=mode_arg.value)

def _check_open_encoded(self, node: astroid.Call) -> None:
"""Check that the encoded argument of an open call is valid."""
mode_arg = None
try:
mode_arg = utils.get_argument_from_call(node, position=1, keyword="mode")
except utils.NoSuchArgumentError:
pass

if mode_arg:
mode_arg = utils.safe_infer(mode_arg)
if not mode_arg or "b" not in mode_arg.value:
encoding_arg = None
try:
encoding_arg = utils.get_argument_from_call(
node, position=None, keyword="encoding"
)
except utils.NoSuchArgumentError:
self.add_message("unspecified-encoding", node=node)

if encoding_arg:
encoding_arg = utils.safe_infer(encoding_arg)

if (
isinstance(encoding_arg, astroid.Const)
and encoding_arg.value is None
):
self.add_message("unspecified-encoding", node=node)

def _check_env_function(self, node, infer):
env_name_kwarg = "key"
env_value_kwarg = "default"
Expand Down
2 changes: 1 addition & 1 deletion pylint/config/find_default_config_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def _toml_has_config(path):
with open(path) as toml_handle:
with open(path, encoding="utf-8") as toml_handle:
try:
content = toml.load(toml_handle)
except TomlDecodeError as error:
Expand Down
2 changes: 1 addition & 1 deletion pylint/config/option_manager_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def read_config_file(self, config_file=None, verbose=None):
parser = self.cfgfile_parser

if config_file.endswith(".toml"):
with open(config_file) as fp:
with open(config_file, encoding="utf-8") as fp:
content = toml.load(fp)

try:
Expand Down
9 changes: 6 additions & 3 deletions pylint/lint/pylinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,9 @@ def _load_reporters(self) -> None:
(reporter_output,) = reporter_output

# pylint: disable=consider-using-with
output_file = stack.enter_context(open(reporter_output, "w"))
output_file = stack.enter_context(
open(reporter_output, "w", encoding="utf-8")
)

reporter.set_output(output_file)
output_files.append(output_file)
Expand Down Expand Up @@ -617,8 +619,9 @@ def set_option(self, optname, value, action=None, optdict=None):
except KeyError:
meth = self._bw_options_methods[optname]
warnings.warn(
"%s is deprecated, replace it by %s"
% (optname, optname.split("-")[0]),
"{} is deprecated, replace it by {}".format(
optname, optname.split("-")[0]
),
Pierre-Sassoulas marked this conversation as resolved.
Show resolved Hide resolved
DeprecationWarning,
)
value = utils._check_csv(value)
Expand Down
2 changes: 1 addition & 1 deletion pylint/lint/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def __init__(

if self._output:
try:
with open(self._output, "w") as output:
with open(self._output, "w", encoding="utf-8") as output:
linter.reporter.set_output(output)
linter.check(args)
score_value = linter.generate_reports()
Expand Down
2 changes: 1 addition & 1 deletion pylint/pyreverse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_default_options():
if home:
rcfile = os.path.join(home, RCFILE)
try:
with open(rcfile) as file_handle:
with open(rcfile, encoding="utf-8") as file_handle:
options = file_handle.read().split()
except OSError:
pass # ignore if no config file found
Expand Down
4 changes: 3 additions & 1 deletion pylint/pyreverse/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def __init__(self, config):

def set_printer(self, file_name, basename):
"""initialize VCGWriter for a UML graph"""
self.graph_file = open(file_name, "w+") # pylint: disable=consider-using-with
self.graph_file = open( # pylint: disable=consider-using-with
file_name, "w+", encoding="utf-8"
)
self.printer = VCGPrinter(self.graph_file)
self.printer.open_graph(
title=basename,
Expand Down
4 changes: 2 additions & 2 deletions pylint/testutils/lint_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ def multiset_difference(
# pylint: disable=consider-using-with
def _open_expected_file(self):
try:
return open(self._test_file.expected_output)
return open(self._test_file.expected_output, encoding="utf-8")
except FileNotFoundError:
return StringIO("")

# pylint: disable=consider-using-with
def _open_source_file(self):
if self._test_file.base == "invalid_encoded_data":
return open(self._test_file.source)
return open(self._test_file.source, encoding="utf-8")
if "latin1" in self._test_file.base:
return open(self._test_file.source, encoding="latin1")
return open(self._test_file.source, encoding="utf8")
Expand Down
4 changes: 2 additions & 2 deletions script/bump_changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def main() -> None:
logging.debug(f"Launching bump_changelog with args: {args}")
if "dev" in args.version:
return
with open(DEFAULT_CHANGELOG_PATH) as f:
with open(DEFAULT_CHANGELOG_PATH, encoding="utf-8") as f:
content = f.read()
content = transform_content(content, args.version)
with open(DEFAULT_CHANGELOG_PATH, "w") as f:
with open(DEFAULT_CHANGELOG_PATH, "w", encoding="utf-8") as f:
f.write(content)


Expand Down
4 changes: 2 additions & 2 deletions script/fix_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def main(argv: Union[List[str], None] = None) -> int:

return_value: int = 0
for file_name in args.filenames:
with open(file_name) as fp:
with open(file_name, encoding="utf-8") as fp:
orignal_content = fp.read()
content = orignal_content
# Modify files
Expand All @@ -91,7 +91,7 @@ def main(argv: Union[List[str], None] = None) -> int:
content = changelog_insert_empty_lines(content, args.subtitle_prefix)
# If modified, write changes and eventually return 1
if orignal_content != content:
with open(file_name, "w") as fp:
with open(file_name, "w", encoding="utf-8") as fp:
fp.write(content)
return_value |= 1
return return_value
Expand Down
2 changes: 1 addition & 1 deletion tests/checkers/unittest_similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def test_get_map_data():
# Manually perform a 'map' type function
for source_fname in source_streams:
sim = similar.SimilarChecker(linter)
with open(source_fname) as stream:
with open(source_fname, encoding="utf-8") as stream:
sim.append_stream(source_fname, stream)
# The map bit, can you tell? ;)
data.extend(sim.get_map_data())
Expand Down
20 changes: 10 additions & 10 deletions tests/functional/b/bad_open_mode_py3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@

NAME = "foo.bar"
open(NAME, "wb")
open(NAME, "w")
open(NAME, "w", encoding="utf-8")
open(NAME, "rb")
open(NAME, "x")
open(NAME, "x", encoding="utf-8")
open(NAME, "br")
open(NAME, "+r")
open(NAME, "+r", encoding="utf-8")
open(NAME, "xb")
open(NAME, "rwx") # [bad-open-mode]
open(NAME, "rr") # [bad-open-mode]
open(NAME, "+") # [bad-open-mode]
open(NAME, "xw") # [bad-open-mode]
open(NAME, "rwx", encoding="utf-8") # [bad-open-mode]
open(NAME, "rr", encoding="utf-8") # [bad-open-mode]
open(NAME, "+", encoding="utf-8") # [bad-open-mode]
open(NAME, "xw", encoding="utf-8") # [bad-open-mode]
open(NAME, "ab+")
open(NAME, "a+b")
open(NAME, "+ab")
open(NAME, "+rUb")
open(NAME, "x+b")
open(NAME, "Ua") # [bad-open-mode]
open(NAME, "Ur++") # [bad-open-mode]
open(NAME, "Ut")
open(NAME, "Ua", encoding="utf-8") # [bad-open-mode]
open(NAME, "Ur++", encoding="utf-8") # [bad-open-mode]
open(NAME, "Ut", encoding="utf-8")
open(NAME, "Ubr")
2 changes: 1 addition & 1 deletion tests/functional/c/consider/consider_using_with.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_suppress_in_exit_stack():
"""Regression test for issue #4654 (false positive)"""
with contextlib.ExitStack() as stack:
_ = stack.enter_context(
open("/sys/firmware/devicetree/base/hwid,location", "r")
open("/sys/firmware/devicetree/base/hwid,location", "r", encoding="utf-8")
) # must not trigger


Expand Down
30 changes: 15 additions & 15 deletions tests/functional/c/consider/consider_using_with_open.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# pylint: disable=missing-function-docstring, missing-module-docstring, invalid-name, import-outside-toplevel
# pylint: disable=missing-class-docstring, too-few-public-methods, unused-variable, multiple-statements
# pylint: disable=missing-class-docstring, too-few-public-methods, unused-variable, multiple-statements, line-too-long
"""
The functional test for the standard ``open()`` function has to be moved in a separate file,
because PyPy has to be excluded for the tests as the ``open()`` function is uninferable in PyPy.
Expand All @@ -8,26 +8,26 @@
"""
from contextlib import contextmanager


myfile = open("test.txt") # [consider-using-with]
myfile = open("test.txt", encoding="utf-8") # [consider-using-with]


def test_open():
fh = open("test.txt") # [consider-using-with]
fh = open("test.txt", encoding="utf-8") # [consider-using-with]
fh.close()

with open("test.txt") as fh: # must not trigger
with open("test.txt", encoding="utf-8") as fh: # must not trigger
fh.read()


def test_open_in_enter():
"""Message must not trigger if the resource is allocated in a context manager."""

class MyContextManager:
def __init__(self):
self.file_handle = None

def __enter__(self):
self.file_handle = open("foo.txt", "w") # must not trigger
self.file_handle = open("foo.txt", "w", encoding="utf-8") # must not trigger

def __exit__(self, exc_type, exc_value, traceback):
self.file_handle.close()
Expand All @@ -36,31 +36,31 @@ def __exit__(self, exc_type, exc_value, traceback):
@contextmanager
def test_open_in_with_contextlib():
"""Message must not trigger if the resource is allocated in a context manager."""
file_handle = open("foo.txt", "w") # must not trigger
file_handle = open("foo.txt", "w", encoding="utf-8") # must not trigger
yield file_handle
file_handle.close()


def test_open_outside_assignment():
open("foo").read() # [consider-using-with]
content = open("foo").read() # [consider-using-with]
open("foo", encoding="utf-8").read() # [consider-using-with]
content = open("foo", encoding="utf-8").read() # [consider-using-with]


def test_open_inside_with_block():
with open("foo") as fh:
open("bar") # [consider-using-with]
with open("foo", encoding="utf-8") as fh:
open("bar", encoding="utf-8") # [consider-using-with]


def test_ternary_if_in_with_block(file1, file2, which):
"""Regression test for issue #4676 (false positive)"""
with (open(file1) if which else open(file2)) as input_file: # must not trigger
with (open(file1, encoding="utf-8") if which else open(file2, encoding="utf-8")) as input_file: # must not trigger
return input_file.read()


def test_single_line_with(file1):
with open(file1): return file1.read() # must not trigger
with open(file1, encoding="utf-8"): return file1.read() # must not trigger


def test_multiline_with_items(file1, file2, which):
with (open(file1) if which
else open(file2)) as input_file: return input_file.read()
with (open(file1, encoding="utf-8") if which
else open(file2, encoding="utf-8")) as input_file: return input_file.read()