Skip to content

Commit

Permalink
Backport PR #39202 on branch 1.2.x (#39255)
Browse files Browse the repository at this point in the history
  • Loading branch information
twoertwein committed Jan 18, 2021
1 parent 3cad03f commit b341ca5
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 96 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Fixed regressions
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)
- Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`)
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`)
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`)
Expand Down
74 changes: 29 additions & 45 deletions pandas/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import gzip
import operator
import os
from pathlib import Path
import random
import re
from shutil import rmtree
import string
import tempfile
from typing import Any, Callable, ContextManager, List, Optional, Type, Union, cast
from typing import IO, Any, Callable, ContextManager, List, Optional, Type, Union, cast
import warnings
import zipfile

Expand Down Expand Up @@ -578,66 +580,48 @@ def close(fignum=None):


@contextmanager
def ensure_clean(filename=None, return_filelike=False, **kwargs):
def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any):
"""
Gets a temporary path and agrees to remove on close.
This implementation does not use tempfile.mkstemp to avoid having a file handle.
If the code using the returned path wants to delete the file itself, windows
requires that no program has a file handle to it.
Parameters
----------
filename : str (optional)
if None, creates a temporary file which is then removed when out of
scope. if passed, creates temporary file with filename as ending.
suffix of the created file.
return_filelike : bool (default False)
if True, returns a file-like which is *always* cleaned. Necessary for
savefig and other functions which want to append extensions.
**kwargs
Additional keywords passed in for creating a temporary file.
:meth:`tempFile.TemporaryFile` is used when `return_filelike` is ``True``.
:meth:`tempfile.mkstemp` is used when `return_filelike` is ``False``.
Note that the `filename` parameter will be passed in as the `suffix`
argument to either function.
Additional keywords are passed to open().
See Also
--------
tempfile.TemporaryFile
tempfile.mkstemp
"""
filename = filename or ""
fd = None

kwargs["suffix"] = filename
folder = Path(tempfile.gettempdir())

if return_filelike:
f = tempfile.TemporaryFile(**kwargs)

try:
yield f
finally:
f.close()
else:
# Don't generate tempfile if using a path with directory specified.
if len(os.path.dirname(filename)):
raise ValueError("Can't pass a qualified name to ensure_clean()")
if filename is None:
filename = ""
filename = (
"".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename
)
path = folder / filename

try:
fd, filename = tempfile.mkstemp(**kwargs)
except UnicodeEncodeError:
import pytest
path.touch()

pytest.skip("no unicode file names on this system")
handle_or_str: Union[str, IO] = str(path)
if return_filelike:
kwargs.setdefault("mode", "w+b")
handle_or_str = open(path, **kwargs)

try:
yield filename
finally:
try:
os.close(fd)
except OSError:
print(f"Couldn't close file descriptor: {fd} (file: {filename})")
try:
if os.path.exists(filename):
os.remove(filename)
except OSError as e:
print(f"Exception on removing file: {e}")
try:
yield handle_or_str
finally:
if not isinstance(handle_or_str, str):
handle_or_str.close()
if path.is_file():
path.unlink()


@contextmanager
Expand Down
23 changes: 10 additions & 13 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import datetime
from io import BytesIO
import os
from pathlib import Path
import struct
import sys
from typing import Any, AnyStr, Dict, List, Optional, Sequence, Tuple, Union, cast
Expand Down Expand Up @@ -2462,8 +2461,8 @@ def write_file(self) -> None:
if self.handles.compression["method"] is not None:
# ZipFile creates a file (with the same name) for each write call.
# Write it first into a buffer and then write the buffer to the ZipFile.
self._output_file = self.handles.handle
self.handles.handle = BytesIO()
self._output_file, self.handles.handle = self.handles.handle, BytesIO()
self.handles.created_handles.append(self.handles.handle)

try:
self._write_header(
Expand All @@ -2484,20 +2483,21 @@ def write_file(self) -> None:
self._write_value_labels()
self._write_file_close_tag()
self._write_map()
except Exception as exc:
self._close()
if isinstance(self._fname, (str, Path)):
except Exception as exc:
self.handles.close()
if isinstance(self._fname, (str, os.PathLike)) and os.path.isfile(
self._fname
):
try:
os.unlink(self._fname)
except OSError:
warnings.warn(
f"This save was not successful but {self._fname} could not "
"be deleted. This file is not valid.",
"be deleted. This file is not valid.",
ResourceWarning,
)
raise exc
else:
self._close()

def _close(self) -> None:
"""
Expand All @@ -2509,11 +2509,8 @@ def _close(self) -> None:
# write compression
if self._output_file is not None:
assert isinstance(self.handles.handle, BytesIO)
bio = self.handles.handle
bio.seek(0)
self.handles.handle = self._output_file
self.handles.handle.write(bio.read()) # type: ignore[arg-type]
bio.close()
bio, self.handles.handle = self.handles.handle, self._output_file
self.handles.handle.write(bio.getvalue()) # type: ignore[arg-type]

def _write_map(self) -> None:
"""No-op, future compatibility"""
Expand Down
53 changes: 25 additions & 28 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,30 +657,27 @@ def test_excel_date_datetime_format(self, engine, ext, path):
)

with tm.ensure_clean(ext) as filename2:
writer1 = ExcelWriter(path)
writer2 = ExcelWriter(
with ExcelWriter(path) as writer1:
df.to_excel(writer1, "test1")

with ExcelWriter(
filename2,
date_format="DD.MM.YYYY",
datetime_format="DD.MM.YYYY HH-MM-SS",
)

df.to_excel(writer1, "test1")
df.to_excel(writer2, "test1")

writer1.close()
writer2.close()
) as writer2:
df.to_excel(writer2, "test1")

reader1 = ExcelFile(path)
reader2 = ExcelFile(filename2)
with ExcelFile(path) as reader1:
rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)

rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
with ExcelFile(filename2) as reader2:
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)

tm.assert_frame_equal(rs1, rs2)
tm.assert_frame_equal(rs1, rs2)

# Since the reader returns a datetime object for dates,
# we need to use df_expected to check the result.
tm.assert_frame_equal(rs2, df_expected)
# Since the reader returns a datetime object for dates,
# we need to use df_expected to check the result.
tm.assert_frame_equal(rs2, df_expected)

def test_to_excel_interval_no_labels(self, path):
# see gh-19242
Expand Down Expand Up @@ -862,7 +859,7 @@ def test_to_excel_unicode_filename(self, ext, path):
f = open(filename, "wb")
except UnicodeEncodeError:
pytest.skip("No unicode file names on this system")
else:
finally:
f.close()

df = DataFrame(
Expand All @@ -872,15 +869,15 @@ def test_to_excel_unicode_filename(self, ext, path):
)
df.to_excel(filename, "test1", float_format="%.2f")

reader = ExcelFile(filename)
result = pd.read_excel(reader, sheet_name="test1", index_col=0)
with ExcelFile(filename) as reader:
result = pd.read_excel(reader, sheet_name="test1", index_col=0)

expected = DataFrame(
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
index=["A", "B"],
columns=["X", "Y", "Z"],
)
tm.assert_frame_equal(result, expected)
expected = DataFrame(
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
index=["A", "B"],
columns=["X", "Y", "Z"],
)
tm.assert_frame_equal(result, expected)

# FIXME: dont leave commented-out
# def test_to_excel_header_styling_xls(self, engine, ext):
Expand Down Expand Up @@ -1374,8 +1371,8 @@ def test_excelfile_fspath(self):
with tm.ensure_clean("foo.xlsx") as path:
df = DataFrame({"A": [1, 2]})
df.to_excel(path)
xl = ExcelFile(path)
result = os.fspath(xl)
with ExcelFile(path) as xl:
result = os.fspath(xl)
assert result == path

def test_excelwriter_fspath(self):
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,12 +545,12 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
df.to_csv(
path, compression={"method": compression, "archive_name": archive_name}
)
zp = ZipFile(path)
expected_arcname = path if archive_name is None else archive_name
expected_arcname = os.path.basename(expected_arcname)
assert len(zp.filelist) == 1
archived_file = os.path.basename(zp.filelist[0].filename)
assert archived_file == expected_arcname
with ZipFile(path) as zp:
expected_arcname = path if archive_name is None else archive_name
expected_arcname = os.path.basename(expected_arcname)
assert len(zp.filelist) == 1
archived_file = os.path.basename(zp.filelist[0].filename)
assert archived_file == expected_arcname

@pytest.mark.parametrize("df_new_type", ["Int64"])
def test_to_csv_na_rep_long_string(self, df_new_type):
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ def test_invalid_timestamp(self, version):
msg = "time_stamp should be datetime type"
with pytest.raises(ValueError, match=msg):
original.to_stata(path, time_stamp=time_stamp, version=version)
assert not os.path.isfile(path)

def test_numeric_column_names(self):
original = DataFrame(np.reshape(np.arange(25.0), (5, 5)))
Expand Down Expand Up @@ -1916,10 +1917,10 @@ def test_compression_dict(method, file_ext):
compression = {"method": method, "archive_name": archive_name}
df.to_stata(path, compression=compression)
if method == "zip" or file_ext == "zip":
zp = zipfile.ZipFile(path, "r")
assert len(zp.filelist) == 1
assert zp.filelist[0].filename == archive_name
fp = io.BytesIO(zp.read(zp.filelist[0]))
with zipfile.ZipFile(path, "r") as zp:
assert len(zp.filelist) == 1
assert zp.filelist[0].filename == archive_name
fp = io.BytesIO(zp.read(zp.filelist[0]))
else:
fp = path
reread = read_stata(fp, index_col="index")
Expand Down

0 comments on commit b341ca5

Please sign in to comment.