diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index fb6c91c4aacf9..24ba9be4383eb 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`) - Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`) - Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) +- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`) - Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`) - Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`) - Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`) diff --git a/pandas/_testing.py b/pandas/_testing.py index dc24bc3368e7d..224c8d540c6bb 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -6,11 +6,13 @@ import gzip import operator import os +from pathlib import Path +import random import re from shutil import rmtree import string import tempfile -from typing import Any, Callable, ContextManager, List, Optional, Type, Union, cast +from typing import IO, Any, Callable, ContextManager, List, Optional, Type, Union, cast import warnings import zipfile @@ -578,66 +580,48 @@ def close(fignum=None): @contextmanager -def ensure_clean(filename=None, return_filelike=False, **kwargs): +def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any): """ Gets a temporary path and agrees to remove on close. + This implementation does not use tempfile.mkstemp to avoid having a file handle. + If the code using the returned path wants to delete the file itself, windows + requires that no program has a file handle to it. + Parameters ---------- filename : str (optional) - if None, creates a temporary file which is then removed when out of - scope. if passed, creates temporary file with filename as ending. + suffix of the created file. return_filelike : bool (default False) if True, returns a file-like which is *always* cleaned. Necessary for savefig and other functions which want to append extensions. **kwargs - Additional keywords passed in for creating a temporary file. - :meth:`tempFile.TemporaryFile` is used when `return_filelike` is ``True``. - :meth:`tempfile.mkstemp` is used when `return_filelike` is ``False``. - Note that the `filename` parameter will be passed in as the `suffix` - argument to either function. + Additional keywords are passed to open(). - See Also - -------- - tempfile.TemporaryFile - tempfile.mkstemp """ - filename = filename or "" - fd = None - - kwargs["suffix"] = filename + folder = Path(tempfile.gettempdir()) - if return_filelike: - f = tempfile.TemporaryFile(**kwargs) - - try: - yield f - finally: - f.close() - else: - # Don't generate tempfile if using a path with directory specified. - if len(os.path.dirname(filename)): - raise ValueError("Can't pass a qualified name to ensure_clean()") + if filename is None: + filename = "" + filename = ( + "".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename + ) + path = folder / filename - try: - fd, filename = tempfile.mkstemp(**kwargs) - except UnicodeEncodeError: - import pytest + path.touch() - pytest.skip("no unicode file names on this system") + handle_or_str: Union[str, IO] = str(path) + if return_filelike: + kwargs.setdefault("mode", "w+b") + handle_or_str = open(path, **kwargs) - try: - yield filename - finally: - try: - os.close(fd) - except OSError: - print(f"Couldn't close file descriptor: {fd} (file: {filename})") - try: - if os.path.exists(filename): - os.remove(filename) - except OSError as e: - print(f"Exception on removing file: {e}") + try: + yield handle_or_str + finally: + if not isinstance(handle_or_str, str): + handle_or_str.close() + if path.is_file(): + path.unlink() @contextmanager diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 6f296d3c8d92f..b7fe630af90ef 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -13,7 +13,6 @@ import datetime from io import BytesIO import os -from pathlib import Path import struct import sys from typing import Any, AnyStr, Dict, List, Optional, Sequence, Tuple, Union, cast @@ -2462,8 +2461,8 @@ def write_file(self) -> None: if self.handles.compression["method"] is not None: # ZipFile creates a file (with the same name) for each write call. # Write it first into a buffer and then write the buffer to the ZipFile. - self._output_file = self.handles.handle - self.handles.handle = BytesIO() + self._output_file, self.handles.handle = self.handles.handle, BytesIO() + self.handles.created_handles.append(self.handles.handle) try: self._write_header( @@ -2484,20 +2483,21 @@ def write_file(self) -> None: self._write_value_labels() self._write_file_close_tag() self._write_map() - except Exception as exc: self._close() - if isinstance(self._fname, (str, Path)): + except Exception as exc: + self.handles.close() + if isinstance(self._fname, (str, os.PathLike)) and os.path.isfile( + self._fname + ): try: os.unlink(self._fname) except OSError: warnings.warn( f"This save was not successful but {self._fname} could not " - "be deleted. This file is not valid.", + "be deleted. This file is not valid.", ResourceWarning, ) raise exc - else: - self._close() def _close(self) -> None: """ @@ -2509,11 +2509,8 @@ def _close(self) -> None: # write compression if self._output_file is not None: assert isinstance(self.handles.handle, BytesIO) - bio = self.handles.handle - bio.seek(0) - self.handles.handle = self._output_file - self.handles.handle.write(bio.read()) # type: ignore[arg-type] - bio.close() + bio, self.handles.handle = self.handles.handle, self._output_file + self.handles.handle.write(bio.getvalue()) # type: ignore[arg-type] def _write_map(self) -> None: """No-op, future compatibility""" diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e67769bc774b0..6a2ac2f6003d7 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -657,30 +657,27 @@ def test_excel_date_datetime_format(self, engine, ext, path): ) with tm.ensure_clean(ext) as filename2: - writer1 = ExcelWriter(path) - writer2 = ExcelWriter( + with ExcelWriter(path) as writer1: + df.to_excel(writer1, "test1") + + with ExcelWriter( filename2, date_format="DD.MM.YYYY", datetime_format="DD.MM.YYYY HH-MM-SS", - ) - - df.to_excel(writer1, "test1") - df.to_excel(writer2, "test1") - - writer1.close() - writer2.close() + ) as writer2: + df.to_excel(writer2, "test1") - reader1 = ExcelFile(path) - reader2 = ExcelFile(filename2) + with ExcelFile(path) as reader1: + rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0) - rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0) - rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) + with ExcelFile(filename2) as reader2: + rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) - tm.assert_frame_equal(rs1, rs2) + tm.assert_frame_equal(rs1, rs2) - # Since the reader returns a datetime object for dates, - # we need to use df_expected to check the result. - tm.assert_frame_equal(rs2, df_expected) + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. + tm.assert_frame_equal(rs2, df_expected) def test_to_excel_interval_no_labels(self, path): # see gh-19242 @@ -862,7 +859,7 @@ def test_to_excel_unicode_filename(self, ext, path): f = open(filename, "wb") except UnicodeEncodeError: pytest.skip("No unicode file names on this system") - else: + finally: f.close() df = DataFrame( @@ -872,15 +869,15 @@ def test_to_excel_unicode_filename(self, ext, path): ) df.to_excel(filename, "test1", float_format="%.2f") - reader = ExcelFile(filename) - result = pd.read_excel(reader, sheet_name="test1", index_col=0) + with ExcelFile(filename) as reader: + result = pd.read_excel(reader, sheet_name="test1", index_col=0) - expected = DataFrame( - [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], - index=["A", "B"], - columns=["X", "Y", "Z"], - ) - tm.assert_frame_equal(result, expected) + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) # FIXME: dont leave commented-out # def test_to_excel_header_styling_xls(self, engine, ext): @@ -1374,8 +1371,8 @@ def test_excelfile_fspath(self): with tm.ensure_clean("foo.xlsx") as path: df = DataFrame({"A": [1, 2]}) df.to_excel(path) - xl = ExcelFile(path) - result = os.fspath(xl) + with ExcelFile(path) as xl: + result = os.fspath(xl) assert result == path def test_excelwriter_fspath(self): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 6416cb93c7ff5..ef4de5961a696 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -545,12 +545,12 @@ def test_to_csv_zip_arguments(self, compression, archive_name): df.to_csv( path, compression={"method": compression, "archive_name": archive_name} ) - zp = ZipFile(path) - expected_arcname = path if archive_name is None else archive_name - expected_arcname = os.path.basename(expected_arcname) - assert len(zp.filelist) == 1 - archived_file = os.path.basename(zp.filelist[0].filename) - assert archived_file == expected_arcname + with ZipFile(path) as zp: + expected_arcname = path if archive_name is None else archive_name + expected_arcname = os.path.basename(expected_arcname) + assert len(zp.filelist) == 1 + archived_file = os.path.basename(zp.filelist[0].filename) + assert archived_file == expected_arcname @pytest.mark.parametrize("df_new_type", ["Int64"]) def test_to_csv_na_rep_long_string(self, df_new_type): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 24944281419c3..0f9321fd4f96a 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -550,6 +550,7 @@ def test_invalid_timestamp(self, version): msg = "time_stamp should be datetime type" with pytest.raises(ValueError, match=msg): original.to_stata(path, time_stamp=time_stamp, version=version) + assert not os.path.isfile(path) def test_numeric_column_names(self): original = DataFrame(np.reshape(np.arange(25.0), (5, 5))) @@ -1916,10 +1917,10 @@ def test_compression_dict(method, file_ext): compression = {"method": method, "archive_name": archive_name} df.to_stata(path, compression=compression) if method == "zip" or file_ext == "zip": - zp = zipfile.ZipFile(path, "r") - assert len(zp.filelist) == 1 - assert zp.filelist[0].filename == archive_name - fp = io.BytesIO(zp.read(zp.filelist[0])) + with zipfile.ZipFile(path, "r") as zp: + assert len(zp.filelist) == 1 + assert zp.filelist[0].filename == archive_name + fp = io.BytesIO(zp.read(zp.filelist[0])) else: fp = path reread = read_stata(fp, index_col="index")