Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/reshape/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def get_empty_frame(data) -> DataFrame:

if drop_first:
# remove first GH12042
dummy_mat = dummy_mat[:, 1:] # type: ignore[assignment]
dummy_mat = dummy_mat[:, 1:]
dummy_cols = dummy_cols[1:]
return DataFrame(dummy_mat, index=index, columns=dummy_cols, dtype=_dtype)

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ def test_no_empty_apply(mi_styler):


@pytest.mark.parametrize("format", ["html", "latex", "string"])
def test_output_buffer(mi_styler, format):
def test_output_buffer(mi_styler, format, temp_file):
# gh 47053
with tm.ensure_clean(f"delete_me.{format}") as f:
getattr(mi_styler, f"to_{format}")(f)
f = str(temp_file)
getattr(mi_styler, f"to_{format}")(f)
31 changes: 15 additions & 16 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
DataFrame,
Series,
)
import pandas._testing as tm

pytest.importorskip("jinja2")

Expand All @@ -30,27 +29,27 @@ def df_short():


class TestToLatex:
def test_to_latex_to_file(self, float_frame):
with tm.ensure_clean("test.tex") as path:
float_frame.to_latex(path)
with open(path, encoding="utf-8") as f:
assert float_frame.to_latex() == f.read()
def test_to_latex_to_file(self, float_frame, temp_file):
path = str(temp_file)
float_frame.to_latex(path)
with open(path, encoding="utf-8") as f:
assert float_frame.to_latex() == f.read()

def test_to_latex_to_file_utf8_with_encoding(self):
def test_to_latex_to_file_utf8_with_encoding(self, temp_file):
# test with utf-8 and encoding option (GH 7061)
df = DataFrame([["au\xdfgangen"]])
with tm.ensure_clean("test.tex") as path:
df.to_latex(path, encoding="utf-8")
with open(path, encoding="utf-8") as f:
assert df.to_latex() == f.read()
path = str(temp_file)
df.to_latex(path, encoding="utf-8")
with open(path, encoding="utf-8") as f:
assert df.to_latex() == f.read()

def test_to_latex_to_file_utf8_without_encoding(self):
def test_to_latex_to_file_utf8_without_encoding(self, temp_file):
# test with utf-8 without encoding option
df = DataFrame([["au\xdfgangen"]])
with tm.ensure_clean("test.tex") as path:
df.to_latex(path)
with open(path, encoding="utf-8") as f:
assert df.to_latex() == f.read()
path = str(temp_file)
df.to_latex(path)
with open(path, encoding="utf-8") as f:
assert df.to_latex() == f.read()

def test_to_latex_tabular_with_index(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
Expand Down
158 changes: 79 additions & 79 deletions pandas/tests/io/json/test_readlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_readjson_each_chunk(request, lines_json_df, engine):
assert chunks[1].shape == (1, 2)


def test_readjson_chunks_from_file(request, engine):
def test_readjson_chunks_from_file(request, engine, temp_file):
if engine == "pyarrow":
# GH 48893
reason = (
Expand All @@ -204,41 +204,41 @@ def test_readjson_chunks_from_file(request, engine):
)
request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))

with tm.ensure_clean("test.json") as path:
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df.to_json(path, lines=True, orient="records")
with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
chunked = pd.concat(reader)
unchunked = read_json(path, lines=True, engine=engine)
tm.assert_frame_equal(unchunked, chunked)
path = str(temp_file)
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df.to_json(path, lines=True, orient="records")
with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
chunked = pd.concat(reader)
unchunked = read_json(path, lines=True, engine=engine)
tm.assert_frame_equal(unchunked, chunked)


@pytest.mark.parametrize("chunksize", [None, 1])
def test_readjson_chunks_closes(chunksize):
with tm.ensure_clean("test.json") as path:
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df.to_json(path, lines=True, orient="records")
reader = JsonReader(
path,
orient=None,
typ="frame",
dtype=True,
convert_axes=True,
convert_dates=True,
keep_default_dates=True,
precise_float=False,
date_unit=None,
encoding=None,
lines=True,
chunksize=chunksize,
compression=None,
nrows=None,
)
with reader:
reader.read()
assert reader.handles.handle.closed, (
f"didn't close stream with chunksize = {chunksize}"
)
def test_readjson_chunks_closes(chunksize, temp_file):
path = str(temp_file)
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df.to_json(path, lines=True, orient="records")
reader = JsonReader(
path,
orient=None,
typ="frame",
dtype=True,
convert_axes=True,
convert_dates=True,
keep_default_dates=True,
precise_float=False,
date_unit=None,
encoding=None,
lines=True,
chunksize=chunksize,
compression=None,
nrows=None,
)
with reader:
reader.read()
assert reader.handles.handle.closed, (
f"didn't close stream with chunksize = {chunksize}"
)


@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")


def test_readjson_unicode(request, monkeypatch, engine):
def test_readjson_unicode(request, monkeypatch, engine, temp_file):
if engine == "pyarrow":
# GH 48893
reason = (
Expand All @@ -287,14 +287,14 @@ def test_readjson_unicode(request, monkeypatch, engine):
)
request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))

with tm.ensure_clean("test.json") as path:
monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
with open(path, "w", encoding="utf-8") as f:
f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')
path = str(temp_file)
monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
with open(path, "w", encoding="utf-8") as f:
f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')

result = read_json(path, engine=engine)
expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
tm.assert_frame_equal(result, expected)
result = read_json(path, engine=engine)
expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("nrows", [1, 2])
Expand Down Expand Up @@ -441,25 +441,25 @@ def test_to_json_append_mode(mode_):
df.to_json(mode=mode_, lines=False, orient="records")


def test_to_json_append_output_consistent_columns():
def test_to_json_append_output_consistent_columns(temp_file):
# GH 35849
# Testing that resulting output reads in as expected.
# Testing same columns, new rows
df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})

expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
with tm.ensure_clean("test.json") as path:
# Save dataframes to the same file
df1.to_json(path, lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")
path = str(temp_file)
# Save dataframes to the same file
df1.to_json(path, lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)
# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)


def test_to_json_append_output_inconsistent_columns():
def test_to_json_append_output_inconsistent_columns(temp_file):
# GH 35849
# Testing that resulting output reads in as expected.
# Testing one new column, one old column, new rows
Expand All @@ -473,17 +473,17 @@ def test_to_json_append_output_inconsistent_columns():
"col3": [np.nan, np.nan, "!", "#"],
}
)
with tm.ensure_clean("test.json") as path:
# Save dataframes to the same file
df1.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")
path = str(temp_file)
# Save dataframes to the same file
df1.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)
# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)


def test_to_json_append_output_different_columns():
def test_to_json_append_output_different_columns(temp_file):
# GH 35849
# Testing that resulting output reads in as expected.
# Testing same, differing and new columns
Expand All @@ -500,19 +500,19 @@ def test_to_json_append_output_different_columns():
"col4": [None, None, None, None, None, None, True, False],
}
).astype({"col4": "float"})
with tm.ensure_clean("test.json") as path:
# Save dataframes to the same file
df1.to_json(path, mode="a", lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")
df4.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)
path = str(temp_file)
# Save dataframes to the same file
df1.to_json(path, mode="a", lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")
df4.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)


def test_to_json_append_output_different_columns_reordered():
def test_to_json_append_output_different_columns_reordered(temp_file):
# GH 35849
# Testing that resulting output reads in as expected.
# Testing specific result column order.
Expand All @@ -530,13 +530,13 @@ def test_to_json_append_output_different_columns_reordered():
"col1": [None, None, None, None, 3, 4, 1, 2],
}
).astype({"col4": "float"})
with tm.ensure_clean("test.json") as path:
# Save dataframes to the same file
df4.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")
df1.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)
path = str(temp_file)
# Save dataframes to the same file
df4.to_json(path, mode="a", lines=True, orient="records")
df3.to_json(path, mode="a", lines=True, orient="records")
df2.to_json(path, mode="a", lines=True, orient="records")
df1.to_json(path, mode="a", lines=True, orient="records")

# Read path file
result = read_json(path, lines=True)
tm.assert_frame_equal(result, expected)
Loading
Loading