diff --git a/dvc/analytics.py b/dvc/analytics.py index bb62483ff9..92033e8bcf 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -68,7 +68,7 @@ def send(path): url = "https://analytics.dvc.org" headers = {"content-type": "application/json"} - with open(path) as fobj: + with open(path, encoding="utf-8") as fobj: report = json.load(fobj) report.update(_runtime_info()) @@ -173,13 +173,13 @@ def _find_or_create_user_id(): try: with Lock(lockfile): try: - with open(fname) as fobj: + with open(fname, encoding="utf-8") as fobj: user_id = json.load(fobj)["user_id"] except (FileNotFoundError, ValueError, KeyError): user_id = str(uuid.uuid4()) - with open(fname, "w") as fobj: + with open(fname, "w", encoding="utf-8") as fobj: json.dump({"user_id": user_id}, fobj) return user_id diff --git a/dvc/api.py b/dvc/api.py index 56ec03b709..2441622e9c 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -113,7 +113,7 @@ def make_checkpoint(): root_dir, Repo.DVC_DIR, "tmp", CheckpointTask.SIGNAL_FILE ) - with builtins.open(signal_file, "w") as fobj: + with builtins.open(signal_file, "w", encoding="utf-8") as fobj: # NOTE: force flushing/writing empty file to disk, otherwise when # run in certain contexts (pytest) file may not actually be written fobj.write("") diff --git a/dvc/config.py b/dvc/config.py index 064b839ae4..3b3eac7ff3 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -141,7 +141,7 @@ def init(dvc_dir): dvc.config.Config: config object. """ config_file = os.path.join(dvc_dir, Config.CONFIG) - open(config_file, "w+").close() + open(config_file, "w+", encoding="utf-8").close() return Config(dvc_dir) def load(self, validate=True, config=None): diff --git a/dvc/fs/gdrive.py b/dvc/fs/gdrive.py index 599fd47178..5bd9e6404c 100644 --- a/dvc/fs/gdrive.py +++ b/dvc/fs/gdrive.py @@ -189,7 +189,7 @@ def fs(self): temporary_save_path = self._gdrive_service_credentials_path if is_credentials_temp: - with open(temporary_save_path, "w") as cred_file: + with open(temporary_save_path, "w", encoding="utf-8") as cred_file: cred_file.write( os.getenv(GDriveFileSystem.GDRIVE_CREDENTIALS_DATA) ) diff --git a/dvc/ignore.py b/dvc/ignore.py index 30d86e6e55..3e0582130f 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -393,7 +393,7 @@ def init(path): if os.path.exists(dvcignore): return dvcignore - with open(dvcignore, "w") as fobj: + with open(dvcignore, "w", encoding="utf-8") as fobj: fobj.write( "# Add patterns of files dvc should ignore, which could improve\n" "# the performance. Learn more at\n" diff --git a/dvc/info.py b/dvc/info.py index 577934b95c..c605cdfbca 100644 --- a/dvc/info.py +++ b/dvc/info.py @@ -93,7 +93,7 @@ def _get_linktype_support_info(repo): fname = "." + str(uuid.uuid4()) src = os.path.join(repo.odb.local.cache_dir, fname) - open(src, "w").close() + open(src, "w", encoding="utf-8").close() dst = os.path.join(repo.root_dir, fname) cache = [] diff --git a/dvc/machine/__init__.py b/dvc/machine/__init__.py index acc3b84b63..4bb8d4cce6 100644 --- a/dvc/machine/__init__.py +++ b/dvc/machine/__init__.py @@ -183,7 +183,7 @@ def create(self, name: Optional[str]): """Create and start the specified machine instance.""" config, backend = self.get_config_and_backend(name) if "startup_script" in config: - with open(config["startup_script"]) as fobj: + with open(config["startup_script"], encoding="utf-8") as fobj: startup_script = fobj.read() else: startup_script = DEFAULT_STARTUP_SCRIPT diff --git a/dvc/render/html.py b/dvc/render/html.py index e5b650bef4..d66b2cd1d9 100644 --- a/dvc/render/html.py +++ b/dvc/render/html.py @@ -89,7 +89,7 @@ def write( page_html = None if template_path: - with open(template_path) as fobj: + with open(template_path, encoding="utf-8") as fobj: page_html = fobj.read() document = HTML(page_html, refresh_seconds=refresh_seconds) @@ -102,6 +102,6 @@ def write( index = Path(os.path.join(path, "index.html")) - with open(index, "w") as fd: + with open(index, "w", encoding="utf-8") as fd: fd.write(document.embed()) return index diff --git a/dvc/repo/experiments/executor/local.py b/dvc/repo/experiments/executor/local.py index f5658e128e..a90831db25 100644 --- a/dvc/repo/experiments/executor/local.py +++ b/dvc/repo/experiments/executor/local.py @@ -64,7 +64,7 @@ def __init__( def _config(self, cache_dir): local_config = os.path.join(self.dvc_dir, "config.local") logger.debug("Writing experiments local config '%s'", local_config) - with open(local_config, "w") as fobj: + with open(local_config, "w", encoding="utf-8") as fobj: fobj.write(f"[cache]\n dir = {cache_dir}") def cleanup(self): diff --git a/dvc/repo/plots/template.py b/dvc/repo/plots/template.py index 159c62d2a9..1e1384dd9f 100644 --- a/dvc/repo/plots/template.py +++ b/dvc/repo/plots/template.py @@ -167,7 +167,7 @@ def load(self, name: str = None) -> Template: if name is not None: template_path = self._find_in_project(name) if template_path: - with open(template_path, "r") as fd: + with open(template_path, "r", encoding="utf-8") as fd: content = fd.read() return Template(content, name) else: @@ -195,5 +195,9 @@ def init(self): .joinpath(template) .read_text() ) - with open(os.path.join(self.templates_dir, template), "w") as fd: + with open( + os.path.join(self.templates_dir, template), + "w", + encoding="utf-8", + ) as fd: fd.write(content) diff --git a/dvc/rwlock.py b/dvc/rwlock.py index 29d7d90ca0..8b2e5f5a48 100644 --- a/dvc/rwlock.py +++ b/dvc/rwlock.py @@ -35,7 +35,7 @@ def __init__(self, path): def _edit_rwlock(lock_dir): path = os.path.join(lock_dir, "rwlock") try: - with open(path) as fobj: + with open(path, encoding="utf-8") as fobj: lock = SCHEMA(json.load(fobj)) except FileNotFoundError: lock = SCHEMA({}) @@ -46,7 +46,7 @@ def _edit_rwlock(lock_dir): lock["read"] = defaultdict(list, lock["read"]) lock["write"] = defaultdict(dict, lock["write"]) yield lock - with open(path, "w+") as fobj: + with open(path, "w+", encoding="utf-8") as fobj: json.dump(lock, fobj) diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index f852d0713e..0566efae1d 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -195,7 +195,7 @@ def ignore_remove(self, path): if not os.path.exists(gitignore): return - with open(gitignore) as fobj: + with open(gitignore, encoding="utf-8") as fobj: lines = fobj.readlines() filtered = list(filter(lambda x: x.strip() != entry.strip(), lines)) @@ -204,14 +204,14 @@ def ignore_remove(self, path): os.unlink(gitignore) return - with open(gitignore, "w") as fobj: + with open(gitignore, "w", encoding="utf-8") as fobj: fobj.writelines(filtered) self.track_file(relpath(gitignore)) def _install_hook(self, name): hook = self._hook_path(name) - with open(hook, "w+") as fobj: + with open(hook, "w+", encoding="utf-8") as fobj: fobj.write(f"#!/bin/sh\nexec dvc git-hook {name} $@\n") os.chmod(hook, 0o777) diff --git a/dvc/system.py b/dvc/system.py index 0387bc8b59..b8a74fdabc 100644 --- a/dvc/system.py +++ b/dvc/system.py @@ -66,7 +66,7 @@ def _reflink_linux(src, dst): try: ret = 255 - with open(src) as s, open(dst, "w+") as d: + with open(src, "rb") as s, open(dst, "wb+") as d: ret = fcntl.ioctl(d.fileno(), FICLONE, s.fileno()) finally: if ret != 0: diff --git a/dvc/updater.py b/dvc/updater.py index 43c9c18a0d..c97a27f32b 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -68,7 +68,7 @@ def _check(self): self.fetch() return - with open(self.updater_file) as fobj: + with open(self.updater_file, encoding="utf-8") as fobj: import json try: @@ -105,7 +105,7 @@ def _get_latest_version(self): logger.debug(msg.format(exc)) return - with open(self.updater_file, "w+") as fobj: + with open(self.updater_file, "w+", encoding="utf-8") as fobj: json.dump(info, fobj) def _notify(self, latest: str, pkg: Optional[str] = PKG) -> None: diff --git a/setup.cfg b/setup.cfg index 45e8188cc5..f95ea1532d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -145,7 +145,7 @@ tests = pydocstyle==6.1.1 jaraco.windows==5.7.0 # pylint requirements - pylint==2.9.6 + pylint==2.11.1 # we use this to suppress pytest-related false positives in our tests. pylint-pytest==1.0.3 # we use this to suppress some messages in tests, eg: foo/bar naming, diff --git a/tests/dir_helpers.py b/tests/dir_helpers.py index e10cd737e4..5137fafe5b 100644 --- a/tests/dir_helpers.py +++ b/tests/dir_helpers.py @@ -247,7 +247,7 @@ def read_text(self, *args, **kwargs): # pylint: disable=signature-differs path.name: path.read_text(*args, **kwargs) for path in self.iterdir() } - return super().read_text(*args, **kwargs) + return super().read_text(*args, encoding="utf-8", **kwargs) def hash_to_path_info(self, hash_): return self / hash_[0:2] / hash_[2:] diff --git a/tests/func/experiments/test_remote.py b/tests/func/experiments/test_remote.py index 2e8a6cae25..096be54dc0 100644 --- a/tests/func/experiments/test_remote.py +++ b/tests/func/experiments/test_remote.py @@ -242,7 +242,7 @@ def test_push_pull_cache( hash_ = digest(str(x)) path = os.path.join(local_remote.url, hash_[:2], hash_[2:]) assert os.path.exists(path) - assert open(path).read() == str(x) + assert open(path, encoding="utf-8").read() == str(x) remove(dvc.odb.local.cache_dir) @@ -251,7 +251,7 @@ def test_push_pull_cache( hash_ = digest(str(x)) path = os.path.join(dvc.odb.local.cache_dir, hash_[:2], hash_[2:]) assert os.path.exists(path) - assert open(path).read() == str(x) + assert open(path, encoding="utf-8").read() == str(x) def test_auth_error_list(tmp_dir, scm, dvc, http_auth_patch): diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py index 2620e37f47..2c36eb864b 100644 --- a/tests/func/experiments/test_show.py +++ b/tests/func/experiments/test_show.py @@ -477,7 +477,7 @@ def test_show_with_broken_repo(tmp_dir, scm, dvc, exp_stage, caplog): exp1 = dvc.experiments.run(exp_stage.addressing, params=["foo=2"]) exp2 = dvc.experiments.run(exp_stage.addressing, params=["foo=3"]) - with open("dvc.yaml", "a") as fd: + with open("dvc.yaml", "a", encoding="utf-8") as fd: fd.write("breaking the yaml!") result = dvc.experiments.show() diff --git a/tests/func/metrics/test_show.py b/tests/func/metrics/test_show.py index 7cdbb3cc72..bda1db3ea8 100644 --- a/tests/func/metrics/test_show.py +++ b/tests/func/metrics/test_show.py @@ -277,7 +277,7 @@ def test_log_errors( ) scm.tag("v1") - with open(file, "a") as fd: + with open(file, "a", encoding="utf-8") as fd: fd.write("\nMALFORMED!") result = dvc.metrics.show(revs=["v1"]) diff --git a/tests/func/params/test_show.py b/tests/func/params/test_show.py index 47bd7d9759..d07cda0b41 100644 --- a/tests/func/params/test_show.py +++ b/tests/func/params/test_show.py @@ -158,7 +158,7 @@ def test_log_errors(tmp_dir, scm, dvc, capsys, file, error_path): ) rename = (tmp_dir / file).read_text() - with open(tmp_dir / file, "a") as fd: + with open(tmp_dir / file, "a", encoding="utf-8") as fd: fd.write("\nmalformed!") scm.add([PIPELINE_FILE, "params_other.yaml"]) diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index ede5ce3cf8..b5141ae518 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -184,7 +184,7 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): def test_ignore_parsing_error(tmp_dir, dvc, run_copy_metrics): - with open("file", "wb") as fobj: + with open("file", "wb", encoding=None) as fobj: fobj.write(b"\xc1") run_copy_metrics("file", "plot_file.json", plots=["plot_file.json"]) @@ -216,7 +216,7 @@ def test_log_errors( ) scm.tag("v1") - with open(file, "a") as fd: + with open(file, "a", encoding="utf-8") as fd: fd.write("\nMALFORMED!") result = dvc.plots.show(onerror=onerror_collect) diff --git a/tests/func/test_add.py b/tests/func/test_add.py index d1970f413c..3261794c28 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -87,7 +87,7 @@ def test_add_executable(tmp_dir, dvc): def test_add_unicode(tmp_dir, dvc): - with open("\xe1", "wb") as fd: + with open("\xe1", "wb", encoding=None) as fd: fd.write(b"something") (stage,) = dvc.add("\xe1") @@ -143,7 +143,7 @@ def test_warn_about_large_directories(self): # Create a lot of files for iteration in range(LARGE_DIR_SIZE + 1): path = os.path.join("large-dir", str(iteration)) - with open(path, "w") as fobj: + with open(path, "w", encoding="utf-8") as fobj: fobj.write(path) assert main(["add", "--recursive", "large-dir"]) == 0 @@ -581,7 +581,7 @@ def test(self): foo_stage = relpath(self.FOO + DVC_FILE_SUFFIX) # corrupt stage file - with open(foo_stage, "a+") as file: + with open(foo_stage, "a+", encoding="utf-8") as file: file.write("this will break yaml file structure") self._caplog.clear() diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index b9caff4eac..9c67b8223b 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -73,7 +73,7 @@ def test(self): cache = self.foo_stage.outs[0].cache_path os.chmod(cache, 0o644) - with open(cache, "a") as fd: + with open(cache, "a", encoding="utf-8") as fd: fd.write("1") with pytest.raises(CheckoutError): @@ -107,7 +107,7 @@ def test(self): ) os.chmod(cache, 0o644) - with open(cache, "w+") as fobj: + with open(cache, "w+", encoding="utf-8") as fobj: fobj.write("1") with pytest.raises(CheckoutError): @@ -127,7 +127,7 @@ class CheckoutBase(TestDvcGit): GIT_IGNORE = ".gitignore" def commit_data_file(self, fname, content="random text"): - with open(fname, "w") as fd: + with open(fname, "w", encoding="utf-8") as fd: fd.write(content) stages = self.dvc.add(fname) self.assertEqual(len(stages), 1) @@ -136,7 +136,7 @@ def commit_data_file(self, fname, content="random text"): self.dvc.scm.commit("adding " + fname) def read_ignored(self): - with open(self.GIT_IGNORE) as f: + with open(self.GIT_IGNORE, encoding="utf-8") as f: return [s.strip("\n") for s in f.readlines()] def outs_info(self, stage): @@ -188,7 +188,7 @@ def test(self, mock_prompt): stage = stages[0] working_dir_change = os.path.join(self.DATA_DIR, "not_cached.txt") - with open(working_dir_change, "w") as f: + with open(working_dir_change, "w", encoding="utf-8") as f: f.write("not_cached") ret = main(["checkout", stage.relpath]) @@ -204,7 +204,7 @@ def test_force(self, mock_prompt): stage = stages[0] working_dir_change = os.path.join(self.DATA_DIR, "not_cached.txt") - with open(working_dir_change, "w") as f: + with open(working_dir_change, "w", encoding="utf-8") as f: f.write("not_cached") ret = main(["checkout", stage.relpath]) @@ -610,7 +610,7 @@ def test_checkout_stats_on_failure(tmp_dir, dvc, scm): # corrupt cache cache = stage.outs[0].cache_path os.chmod(cache, 0o644) - with open(cache, "a") as fd: + with open(cache, "a", encoding="utf-8") as fd: fd.write("destroy cache") scm.checkout("HEAD~") diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index a62663c841..94b0e17fae 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -112,7 +112,7 @@ def _check_status(status, **kwargs): dvc.cloud.pull(foo_hashes) assert os.path.exists(cache) assert os.path.isfile(cache) - with open(cache) as fd: + with open(cache, encoding="utf-8") as fd: assert fd.read() == "foo" dvc.cloud.pull(dir_hashes) @@ -166,7 +166,7 @@ def test_cloud_cli(tmp_dir, dvc, remote): assert os.path.isfile("foo") assert os.path.isdir("data_dir") - with open(cache) as fd: + with open(cache, encoding="utf-8") as fd: assert fd.read() == "foo" assert os.path.isfile(cache_dir) diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 9a25bb7d35..efea4a7c3e 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -60,7 +60,7 @@ def _check_cache(self, num): def test(self): fname = "file" - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("v1.0") stages = self.dvc.add(fname) @@ -71,7 +71,7 @@ def test(self): self.dvc.scm.checkout("test", create_new=True) self.dvc.remove(stages[0].relpath) - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("test") stages = self.dvc.add(fname) self.assertEqual(len(stages), 1) @@ -80,7 +80,7 @@ def test(self): self.dvc.scm.checkout("master") self.dvc.remove(stages[0].relpath) - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("trash") stages = self.dvc.add(fname) self.assertEqual(len(stages), 1) @@ -88,7 +88,7 @@ def test(self): self.dvc.scm.commit("trash") self.dvc.remove(stages[0].relpath) - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("master") stages = self.dvc.add(fname) self.assertEqual(len(stages), 1) @@ -138,7 +138,7 @@ def test(self): # ADD FILE ONLY IN MAIN PROJECT fname = "only_in_first" - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("only in main repo") stages = self.dvc.add(fname) @@ -146,7 +146,7 @@ def test(self): # ADD FILE IN MAIN PROJECT THAT IS ALSO IN SECOND PROJECT fname = "in_both" - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("in both repos") stages = self.dvc.add(fname) @@ -156,7 +156,7 @@ def test(self): os.chdir(self.additional_path) # ADD FILE ONLY IN SECOND PROJECT fname = "only_in_second" - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("only in additional repo") stages = self.additional_dvc.add(fname) @@ -164,7 +164,7 @@ def test(self): # ADD FILE IN SECOND PROJECT THAT IS ALSO IN MAIN PROJECT fname = "in_both" - with open(fname, "w+") as fobj: + with open(fname, "w+", encoding="utf-8") as fobj: fobj.write("in both repos") stages = self.additional_dvc.add(fname) diff --git a/tests/func/test_import_url.py b/tests/func/test_import_url.py index b61e4529fb..6367247ac8 100644 --- a/tests/func/test_import_url.py +++ b/tests/func/test_import_url.py @@ -34,13 +34,13 @@ def test(self): filename = str(uuid4()) tmpfile = os.path.join(tmpdir, filename) - with open(tmpfile, "w") as fd: + with open(tmpfile, "w", encoding="utf-8") as fd: fd.write("content") ret = main(["import-url", tmpfile]) self.assertEqual(ret, 0) self.assertTrue(os.path.exists(filename)) - with open(filename) as fd: + with open(filename, encoding="utf-8") as fd: self.assertEqual(fd.read(), "content") @@ -59,7 +59,7 @@ def setUp(self): super().setUp() tmp_dir = self.mkdtemp() self.external_source = os.path.join(tmp_dir, "file") - with open(self.external_source, "w") as fobj: + with open(self.external_source, "w", encoding="utf-8") as fobj: fobj.write("content") def test(self): @@ -334,7 +334,9 @@ def test_import_url_to_remote_directory(tmp_dir, dvc, workspace, local_remote): assert len(stage.outs) == 1 hash_info = stage.outs[0].hash_info - with open(local_remote.hash_to_path_info(hash_info.value)) as stream: + with open( + local_remote.hash_to_path_info(hash_info.value), encoding="utf-8" + ) as stream: file_parts = json.load(stream) assert len(file_parts) == 3 diff --git a/tests/func/test_lockfile.py b/tests/func/test_lockfile.py index dfd7d4e15b..eefeb210f7 100644 --- a/tests/func/test_lockfile.py +++ b/tests/func/test_lockfile.py @@ -23,7 +23,7 @@ def read_lock_file(file=PIPELINE_LOCK): - with open(file) as f: + with open(file, encoding="utf-8") as f: data = parse_yaml_for_update(f.read(), file) assert isinstance(data, OrderedDict) return data diff --git a/tests/func/test_move.py b/tests/func/test_move.py index 6973c8577a..1c7fc966bc 100644 --- a/tests/func/test_move.py +++ b/tests/func/test_move.py @@ -66,7 +66,9 @@ def test(self): class TestMoveFileWithExtension(TestDvc): def test(self): - with open(os.path.join(self.dvc.root_dir, "file.csv"), "w") as fd: + with open( + os.path.join(self.dvc.root_dir, "file.csv"), "w", encoding="utf-8" + ) as fd: fd.write("1,2,3\n") self.dvc.add("file.csv") diff --git a/tests/func/test_odb.py b/tests/func/test_odb.py index a04c1d4cd3..4b09b5c30d 100644 --- a/tests/func/test_odb.py +++ b/tests/func/test_odb.py @@ -106,10 +106,10 @@ def test(self): self.assertFalse(os.path.exists(os.path.join(".dvc", "cache"))) - with open("common", "w+") as fd: + with open("common", "w+", encoding="utf-8") as fd: fd.write("common") - with open("unique", "w+") as fd: + with open("unique", "w+", encoding="utf-8") as fd: fd.write(d) ret = main(["add", "common", "unique"]) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 4481ca00a3..b9b90efa6d 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -343,7 +343,7 @@ def test(self): os.mkdir(idir) f = os.path.join(idir, "file") - with open(f, "w+") as fobj: + with open(f, "w+", encoding="utf-8") as fobj: fobj.write(str(d)) ret = main( @@ -443,7 +443,7 @@ def test(self): ) self.assertTrue(file3_stage is not None) - with open(code2, "a") as fobj: + with open(code2, "a", encoding="utf-8") as fobj: fobj.write("\n\n") stages = self.dvc.reproduce(file3_stage.path, force_downstream=True) @@ -670,7 +670,7 @@ def test(self): 'shutil.copyfile("{}", os.path.join("{}", "{}"))' ) - with open(dir_code, "w+") as fd: + with open(dir_code, "w+", encoding="utf-8") as fd: fd.write(code.format(dir_name, file_name, dir_name, file_name)) stage = self._run( @@ -696,7 +696,7 @@ def test(self): dir_name = "dir" dir_code = "dir_code.py" - with open(dir_code, "w+") as fd: + with open(dir_code, "w+", encoding="utf-8") as fd: fd.write( "import os; import sys; import shutil; " "shutil.copytree(sys.argv[1], sys.argv[2])" @@ -715,7 +715,7 @@ def test(self): stages = self.dvc.reproduce(target) self.assertEqual(len(stages), 0) - with open(self.DATA_SUB, "a") as fd: + with open(self.DATA_SUB, "a", encoding="utf-8") as fd: fd.write("add") stages = self.dvc.reproduce(target) @@ -775,14 +775,14 @@ def test(self): single_stage=True, ) - with open(fname) as fd: + with open(fname, encoding="utf-8") as fd: self.assertEqual(os.getenv("SHELL"), fd.read().strip()) os.unlink(fname) self.dvc.reproduce(stage) - with open(fname) as fd: + with open(fname, encoding="utf-8") as fd: self.assertEqual(os.getenv("SHELL"), fd.read().strip()) @@ -1013,11 +1013,11 @@ def _rewrite_file(path_elements, new_content): path_elements = [path_elements] file = Path(os.sep.join(path_elements)) file.unlink() - file.write_text(new_content) + file.write_text(new_content, encoding="utf-8") def _read_out(stage): - return Path(stage.outs[0].fspath).read_text() + return Path(stage.outs[0].fspath).read_text(encoding="utf-8") def test_recursive_repro_default(dvc, repro_dir): diff --git a/tests/func/test_run_single_stage.py b/tests/func/test_run_single_stage.py index a443dc349b..9c3f771736 100644 --- a/tests/func/test_run_single_stage.py +++ b/tests/func/test_run_single_stage.py @@ -112,7 +112,7 @@ def test(self): single_stage=True, ) self.assertFalse(os.path.exists("out")) - with open(".gitignore") as fobj: + with open(".gitignore", encoding="utf-8") as fobj: self.assertEqual(fobj.read(), "/out\n") @@ -269,7 +269,7 @@ def test_not_dir(self): path = os.path.join(self._root_dir, str(uuid.uuid4())) os.mkdir(path) path = os.path.join(path, str(uuid.uuid4())) - open(path, "a").close() + open(path, "a", encoding="utf-8").close() self.dvc.run(cmd="command", wdir=path, single_stage=True) @@ -304,7 +304,7 @@ def test_not_found(self): class TestRunRemoveOuts(TestDvc): def test(self): - with open(self.CODE, "w+") as fobj: + with open(self.CODE, "w+", encoding="utf-8") as fobj: fobj.write("import sys\n") fobj.write("import os\n") fobj.write("if os.path.exists(sys.argv[1]):\n") @@ -321,7 +321,7 @@ def test(self): class TestRunUnprotectOutsCopy(TestDvc): def test(self): - with open(self.CODE, "w+") as fobj: + with open(self.CODE, "w+", encoding="utf-8") as fobj: fobj.write("import sys\n") fobj.write("with open(sys.argv[1], 'a+') as fobj:\n") fobj.write(" fobj.write('foo')\n") @@ -344,7 +344,7 @@ def test(self): ) self.assertEqual(ret, 0) self.assertTrue(os.access(self.FOO, os.W_OK)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") ret = main( @@ -364,13 +364,13 @@ def test(self): ) self.assertEqual(ret, 0) self.assertTrue(os.access(self.FOO, os.W_OK)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") class TestRunUnprotectOutsSymlink(TestDvc): def test(self): - with open(self.CODE, "w+") as fobj: + with open(self.CODE, "w+", encoding="utf-8") as fobj: fobj.write("import sys\n") fobj.write("import os\n") fobj.write("with open(sys.argv[1], 'a+') as fobj:\n") @@ -402,7 +402,7 @@ def test(self): self.assertFalse(os.access(self.FOO, os.W_OK)) self.assertTrue(System.is_symlink(self.FOO)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") ret = main( @@ -429,13 +429,13 @@ def test(self): self.assertFalse(os.access(self.FOO, os.W_OK)) self.assertTrue(System.is_symlink(self.FOO)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") class TestRunUnprotectOutsHardlink(TestDvc): def test(self): - with open(self.CODE, "w+") as fobj: + with open(self.CODE, "w+", encoding="utf-8") as fobj: fobj.write("import sys\n") fobj.write("import os\n") fobj.write("with open(sys.argv[1], 'a+') as fobj:\n") @@ -461,7 +461,7 @@ def test(self): self.assertEqual(ret, 0) self.assertFalse(os.access(self.FOO, os.W_OK)) self.assertTrue(System.is_hardlink(self.FOO)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") ret = main( @@ -482,7 +482,7 @@ def test(self): self.assertEqual(ret, 0) self.assertFalse(os.access(self.FOO, os.W_OK)) self.assertTrue(System.is_hardlink(self.FOO)) - with open(self.FOO) as fd: + with open(self.FOO, encoding="utf-8") as fd: self.assertEqual(fd.read(), "foo") @@ -577,7 +577,7 @@ def test_cached(self): ] ) self.assertEqual(ret, 0) - with open("metrics.txt") as fd: + with open("metrics.txt", encoding="utf-8") as fd: self.assertEqual(fd.read().rstrip(), "test") def test_not_cached(self): @@ -591,7 +591,7 @@ def test_not_cached(self): ] ) self.assertEqual(ret, 0) - with open("metrics.txt") as fd: + with open("metrics.txt", encoding="utf-8") as fd: self.assertEqual(fd.read().rstrip(), "test") @@ -698,7 +698,7 @@ def test_rerun_changed_out(tmp_dir, run_copy): tmp_dir.gen("foo", "foo content") assert run_copy("foo", "out", single_stage=True) is not None - Path("out").write_text("modification") + Path("out").write_text("modification", encoding="utf-8") with pytest.raises(StageFileAlreadyExistsError): run_copy("foo", "out", force=False, single_stage=True) @@ -769,7 +769,7 @@ def should_append_upon_repro(self, file, stage_file): ret = main(["repro", stage_file]) self.assertEqual(0, ret) - with open(file) as fobj: + with open(file, encoding="utf-8") as fobj: lines = fobj.readlines() self.assertEqual(2, len(lines)) @@ -829,7 +829,7 @@ def test(self): class TestRerunWithSameOutputs(TestDvc): def _read_content_only(self, path): - with open(path) as fobj: + with open(path, encoding="utf-8") as fobj: return [line.rstrip() for line in fobj] @property @@ -909,7 +909,7 @@ def test(self): clean_staging() os.chmod(self.BAR, 0o644) - with open(self.BAR, "w") as fd: + with open(self.BAR, "w", encoding="utf-8") as fd: fd.write("corrupting the output cache") patch_checkout = mock.patch.object( diff --git a/tests/func/test_scm.py b/tests/func/test_scm.py index 30dca2f4dd..85d65fe09b 100644 --- a/tests/func/test_scm.py +++ b/tests/func/test_scm.py @@ -327,4 +327,7 @@ def test_git_stash_clear(tmp_dir, scm, ref): # NOTE: some backends will completely remove reflog file on clear, some # will only truncate it, either case means an empty stash log_path = os.path.join(os.fspath(tmp_dir), ".git", "logs", *parts) - assert not os.path.exists(log_path) or not open(log_path).read() + assert ( + not os.path.exists(log_path) + or not open(log_path, encoding="utf-8").read() + ) diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index a7c5d5126a..c53cf7a80e 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -144,7 +144,7 @@ def test_remote_dependency(self): os.makedirs(storage) - with open(file_path, "w") as fobj: + with open(file_path, "w", encoding="utf-8") as fobj: fobj.write("Isle of Dogs") assert main(["remote", "add", "tmp", tmp_path]) == 0 @@ -157,7 +157,7 @@ def test_remote_dependency(self): def test_md5_ignores_comments(tmp_dir, dvc): (stage,) = tmp_dir.dvc_gen("foo", "foo content") - with open(stage.path, "a") as f: + with open(stage.path, "a", encoding="utf-8") as f: f.write("# End comment\n") new_stage = SingleStageFile(dvc, stage.path).stage diff --git a/tests/func/test_update.py b/tests/func/test_update.py index d1e9b6d189..84405cd32d 100644 --- a/tests/func/test_update.py +++ b/tests/func/test_update.py @@ -216,7 +216,7 @@ def test_update_rev(tmp_dir, dvc, scm, git_dir): "rev": "branch1", "rev_lock": branch1_head, } - with open(tmp_dir / "foo") as f: + with open(tmp_dir / "foo", encoding="utf-8") as f: assert "foobar" == f.read() stage = dvc.update(["foo.dvc"], rev="branch2")[0] @@ -225,7 +225,7 @@ def test_update_rev(tmp_dir, dvc, scm, git_dir): "rev": "branch2", "rev_lock": branch2_head, } - with open(tmp_dir / "foo") as f: + with open(tmp_dir / "foo", encoding="utf-8") as f: assert "foobar foo" == f.read() diff --git a/tests/remotes/gdrive.py b/tests/remotes/gdrive.py index 1357085d91..38bfc3a17b 100644 --- a/tests/remotes/gdrive.py +++ b/tests/remotes/gdrive.py @@ -51,7 +51,7 @@ def should_test(): @cached_property def config(self): tmp_path = tmp_fname() - with open(tmp_path, "w") as stream: + with open(tmp_path, "w", encoding="utf-8") as stream: raw_credentials = os.getenv( GDriveFileSystem.GDRIVE_CREDENTIALS_DATA ) diff --git a/tests/unit/scm/test_git.py b/tests/unit/scm/test_git.py index 80fd9322f6..3b86bd527c 100644 --- a/tests/unit/scm/test_git.py +++ b/tests/unit/scm/test_git.py @@ -308,7 +308,7 @@ def test_ignore_remove_empty(tmp_dir, scm, git): path_to_gitignore = tmp_dir / ".gitignore" - with open(path_to_gitignore, "a") as f: + with open(path_to_gitignore, "a", encoding="utf-8") as f: for entry in test_entries: f.write(entry["entry"] + "\n") diff --git a/tests/unit/test_dvcfile.py b/tests/unit/test_dvcfile.py index c862db3235..8291aaaca1 100644 --- a/tests/unit/test_dvcfile.py +++ b/tests/unit/test_dvcfile.py @@ -112,7 +112,7 @@ def test_stage_load_file_exists_but_dvcignored(tmp_dir, dvc, scm, file): @pytest.mark.parametrize("file", ["foo.dvc", "dvc.yaml"]) def test_try_loading_dvcfile_that_is_gitignored(tmp_dir, dvc, scm, file): - with open(tmp_dir / ".gitignore", "a+") as fd: + with open(tmp_dir / ".gitignore", "a+", encoding="utf-8") as fd: fd.write(file) # create a file just to avoid other checks diff --git a/tests/unit/test_updater.py b/tests/unit/test_updater.py index df1c7bc0b4..8a5319fbbf 100644 --- a/tests/unit/test_updater.py +++ b/tests/unit/test_updater.py @@ -46,7 +46,7 @@ def test_fetch(mocker, updater): mock_get.assert_called_once_with(Updater.URL, timeout=Updater.TIMEOUT_GET) assert os.path.isfile(updater.updater_file) - with open(updater.updater_file) as fobj: + with open(updater.updater_file, encoding="utf-8") as fobj: info = json.load(fobj) assert info["version"] == __version__ @@ -88,7 +88,7 @@ def test_check_updates(mocker, capsys, updater, current, latest, notify): mocker.patch("sys.stdout.isatty", return_value=True) updater.current = current - with open(updater.updater_file, "w+") as f: + with open(updater.updater_file, "w+", encoding="utf-8") as f: json.dump({"version": latest}, f) updater.check() @@ -108,7 +108,7 @@ def test_check_updates(mocker, capsys, updater, current, latest, notify): def test_check_refetches_each_day(mock_tty, updater, caplog, mocker): updater.current = "0.0.8" - with open(updater.updater_file, "w+") as f: + with open(updater.updater_file, "w+", encoding="utf-8") as f: json.dump({"version": "0.0.9"}, f) fetch = mocker.patch.object(updater, "fetch") @@ -127,7 +127,7 @@ def test_check_fetches_on_invalid_data_format( mock_tty, updater, caplog, mocker ): updater.current = "0.0.5" - with open(updater.updater_file, "w+") as f: + with open(updater.updater_file, "w+", encoding="utf-8") as f: f.write('"{"version: "0.0.6"') fetch = mocker.patch.object(updater, "fetch") caplog.clear() diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index c14cc7ae31..fd534c768c 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -13,7 +13,7 @@ def get_gitignore_content(): - with open(Git.GITIGNORE) as gitignore: + with open(Git.GITIGNORE, encoding="utf-8") as gitignore: return gitignore.read().splitlines() diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py index f3ef6a22c4..8ca526df66 100644 --- a/tests/utils/httpd.py +++ b/tests/utils/httpd.py @@ -22,7 +22,7 @@ def end_headers(self): file = self.translate_path(self.path) if not os.path.isdir(file) and os.path.exists(file): - with open(file) as fd: + with open(file, encoding="utf-8") as fd: encoded_text = fd.read().encode("utf8") checksum = hashlib.md5(encoded_text).hexdigest() self.send_header("Content-MD5", checksum)