diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 38a374e5473a6..688fedb55ca5a 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -41,16 +41,13 @@ tables = pytest.importorskip("tables") -def test_context(tmp_path): - path1 = tmp_path / "test1.h5" +def test_context(setup_path, tmp_path): try: - with HDFStore(path1) as tbl: + with HDFStore(tmp_path / setup_path) as tbl: raise ValueError("blah") except ValueError: pass - - path2 = tmp_path / "test2.h5" - with HDFStore(path2) as tbl: + with HDFStore(tmp_path / setup_path) as tbl: tbl["a"] = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD"), dtype=object), @@ -979,11 +976,10 @@ def test_copy(propindexes, temp_file): index=Index([f"i-{i}" for i in range(30)]), ) - path = temp_file - with HDFStore(path) as st: + with HDFStore(temp_file) as st: st.append("df", df, data_columns=["A"]) with tempfile.NamedTemporaryFile() as new_f: - with HDFStore(path) as store: + with HDFStore(temp_file) as store: with contextlib.closing( store.copy(new_f.name, keys=None, propindexes=propindexes) ) as tstore: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ab27fda8dcdf5..5ff7ad5a4e785 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -86,12 +86,11 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" - def test_stringify_file_and_path_like(self): + def test_stringify_file_and_path_like(self, temp_file): # GH 38125: do not stringify file objects that are also path-like fsspec = pytest.importorskip("fsspec") - with tm.ensure_clean() as path: - with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: - assert fsspec_obj == icom.stringify_path(fsspec_obj) + with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) def test_infer_compression_from_path(self, compression_format, path_type): @@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) - def test_write_fspath_all(self, writer_name, writer_kwargs, module): + def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") df = pd.DataFrame({"A": [1, 2]}) - with p1 as string, p2 as fspath: - pytest.importorskip(module) - mypath = CustomFSPath(fspath) - writer = getattr(df, writer_name) - - writer(string, **writer_kwargs) - writer(mypath, **writer_kwargs) - with open(string, "rb") as f_str, open(fspath, "rb") as f_path: - if writer_name == "to_excel": - # binary representation of excel contains time creation - # data that causes flaky CI failures - result = pd.read_excel(f_str, **writer_kwargs) - expected = pd.read_excel(f_path, **writer_kwargs) - tm.assert_frame_equal(result, expected) - else: - result = f_str.read() - expected = f_path.read() - assert result == expected - - def test_write_fspath_hdf5(self): + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + writer(mypath, **writer_kwargs) + with open(string, "rb") as f_str, open(fspath, "rb") as f_path: + if writer_name == "to_excel": + # binary representation of excel contains time creation + # data that causes flaky CI failures + result = pd.read_excel(f_str, **writer_kwargs) + expected = pd.read_excel(f_path, **writer_kwargs) + tm.assert_frame_equal(result, expected) + else: + result = f_str.read() + expected = f_path.read() + assert result == expected + + def test_write_fspath_hdf5(self, tmp_path): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll # have to read and compare equality pytest.importorskip("tables") df = pd.DataFrame({"A": [1, 2]}) - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") - with p1 as string, p2 as fspath: - mypath = CustomFSPath(fspath) - df.to_hdf(mypath, key="bar") - df.to_hdf(string, key="bar") + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") - result = pd.read_hdf(fspath, key="bar") - expected = pd.read_hdf(string, key="bar") + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") tm.assert_frame_equal(result, expected) @@ -432,35 +429,33 @@ def test_next(self, mmap_file): with pytest.raises(StopIteration, match=r"^$"): next(wrapper) - def test_unknown_engine(self): - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path) - with pytest.raises(ValueError, match="Unknown engine"): - pd.read_csv(path, engine="pyt") - - def test_binary_mode(self): + def test_unknown_engine(self, temp_file): + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(temp_file, engine="pyt") + + def test_binary_mode(self, temp_file): """ 'encoding' shouldn't be passed to 'open' in binary mode. GH 35058 """ - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path, mode="w+b") - tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0)) @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) @pytest.mark.parametrize("compression_", ["bz2", "xz"]) - def test_warning_missing_utf_bom(self, encoding, compression_): + def test_warning_missing_utf_bom(self, encoding, compression_, temp_file): """ bz2 and xz do not write the byte order mark (BOM) for utf-16/32. @@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_): columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): - df.to_csv(path, compression=compression_, encoding=encoding) + with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): + df.to_csv(temp_file, compression=compression_, encoding=encoding) - # reading should fail (otherwise we wouldn't need the warning) - msg = ( - r"UTF-\d+ stream does not start with BOM|" - r"'utf-\d+' codec can't decode byte" - ) - with pytest.raises(UnicodeError, match=msg): - pd.read_csv(path, compression=compression_, encoding=encoding) + # reading should fail (otherwise we wouldn't need the warning) + msg = ( + r"UTF-\d+ stream does not start with BOM|" + r"'utf-\d+' codec can't decode byte" + ) + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(temp_file, compression=compression_, encoding=encoding) def test_is_fsspec_url(): @@ -514,38 +508,36 @@ def test_is_fsspec_url_chained(): @pytest.mark.parametrize("format", ["csv", "json"]) -def test_codecs_encoding(format): +def test_codecs_encoding(format, temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, mode="w", encoding="utf-8") as handle: - getattr(expected, f"to_{format}")(handle) - with open(path, encoding="utf-8") as handle: - if format == "csv": - df = pd.read_csv(handle, index_col=0) - else: - df = pd.read_json(handle) + with open(temp_file, mode="w", encoding="utf-8") as handle: + getattr(expected, f"to_{format}")(handle) + with open(temp_file, encoding="utf-8") as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) tm.assert_frame_equal(expected, df) -def test_codecs_get_writer_reader(): +def test_codecs_get_writer_reader(temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, "wb") as handle: - with codecs.getwriter("utf-8")(handle) as encoded: - expected.to_csv(encoded) - with open(path, "rb") as handle: - with codecs.getreader("utf-8")(handle) as encoded: - df = pd.read_csv(encoded, index_col=0) + with open(temp_file, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(temp_file, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) tm.assert_frame_equal(expected, df) @@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg): @pytest.mark.parametrize("encoding_errors", ["strict", "replace"]) @pytest.mark.parametrize("format", ["csv", "json"]) -def test_encoding_errors(encoding_errors, format): +def test_encoding_errors(encoding_errors, format, temp_file): # GH39450 msg = "'utf-8' codec can't decode byte" bad_encoding = b"\xe4" @@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format): + b'"}}' ) reader = partial(pd.read_json, orient="index") - with tm.ensure_clean() as path: - file = Path(path) - file.write_bytes(content) + file = temp_file + file.write_bytes(content) - if encoding_errors != "replace": - with pytest.raises(UnicodeDecodeError, match=msg): - reader(path, encoding_errors=encoding_errors) - else: - df = reader(path, encoding_errors=encoding_errors) - decoded = bad_encoding.decode(errors=encoding_errors) - expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) - tm.assert_frame_equal(df, expected) + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(temp_file, encoding_errors=encoding_errors) + else: + df = reader(temp_file, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("encoding_errors", [0, None]) @@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors): reader(content) -def test_bad_encdoing_errors(): +def test_bad_encdoing_errors(temp_file): # GH 39777 - with tm.ensure_clean() as path: - with pytest.raises(LookupError, match="unknown error handler name"): - icom.get_handle(path, "w", errors="bad") + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(temp_file, "w", errors="bad") @pytest.mark.skipif(WASM, reason="limited file system access on WASM")