Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 43 additions & 46 deletions pandas/tests/io/json/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,21 @@
import pandas._testing as tm


def test_compression_roundtrip(compression):
def test_compression_roundtrip(compression, temp_file):
df = pd.DataFrame(
[[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
index=["A", "B"],
columns=["X", "Y", "Z"],
)

with tm.ensure_clean() as path:
df.to_json(path, compression=compression)
tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
df.to_json(temp_file, compression=compression)
tm.assert_frame_equal(df, pd.read_json(temp_file, compression=compression))

# explicitly ensure file was compressed.
with tm.decompress_file(path, compression) as fh:
result = fh.read().decode("utf8")
data = StringIO(result)
tm.assert_frame_equal(df, pd.read_json(data))
# explicitly ensure file was compressed.
with tm.decompress_file(temp_file, compression) as fh:
result = fh.read().decode("utf8")
data = StringIO(result)
tm.assert_frame_equal(df, pd.read_json(data))


def test_read_zipped_json(datapath):
Expand All @@ -43,15 +42,14 @@ def test_read_zipped_json(datapath):
@td.skip_if_not_us_locale
@pytest.mark.single_cpu
@pytest.mark.network
def test_with_s3_url(compression, s3_bucket_public, s3so):
def test_with_s3_url(compression, s3_bucket_public, s3so, temp_file):
# Bucket created in tests/io/conftest.py
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))

key = f"{uuid.uuid4()}.json"
with tm.ensure_clean() as path:
df.to_json(path, compression=compression)
with open(path, "rb") as f:
s3_bucket_public.put_object(Key=key, Body=f)
df.to_json(temp_file, compression=compression)
with open(temp_file, "rb") as f:
s3_bucket_public.put_object(Key=key, Body=f)

roundtripped_df = pd.read_json(
f"s3://{s3_bucket_public.name}/{key}",
Expand All @@ -61,39 +59,35 @@ def test_with_s3_url(compression, s3_bucket_public, s3so):
tm.assert_frame_equal(df, roundtripped_df)


def test_lines_with_compression(compression):
with tm.ensure_clean() as path:
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
df.to_json(path, orient="records", lines=True, compression=compression)
roundtripped_df = pd.read_json(path, lines=True, compression=compression)
tm.assert_frame_equal(df, roundtripped_df)
def test_lines_with_compression(compression, temp_file):
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
df.to_json(temp_file, orient="records", lines=True, compression=compression)
roundtripped_df = pd.read_json(temp_file, lines=True, compression=compression)
tm.assert_frame_equal(df, roundtripped_df)


def test_chunksize_with_compression(compression):
with tm.ensure_clean() as path:
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
df.to_json(path, orient="records", lines=True, compression=compression)
def test_chunksize_with_compression(compression, temp_file):
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
df.to_json(temp_file, orient="records", lines=True, compression=compression)

with pd.read_json(
path, lines=True, chunksize=1, compression=compression
) as res:
roundtripped_df = pd.concat(res)
tm.assert_frame_equal(df, roundtripped_df)
with pd.read_json(
temp_file, lines=True, chunksize=1, compression=compression
) as res:
roundtripped_df = pd.concat(res)
tm.assert_frame_equal(df, roundtripped_df)


def test_write_unsupported_compression_type():
def test_write_unsupported_compression_type(temp_file):
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
with tm.ensure_clean() as path:
msg = "Unrecognized compression type: unsupported"
with pytest.raises(ValueError, match=msg):
df.to_json(path, compression="unsupported")
msg = "Unrecognized compression type: unsupported"
with pytest.raises(ValueError, match=msg):
df.to_json(temp_file, compression="unsupported")


def test_read_unsupported_compression_type():
with tm.ensure_clean() as path:
msg = "Unrecognized compression type: unsupported"
with pytest.raises(ValueError, match=msg):
pd.read_json(path, compression="unsupported")
def test_read_unsupported_compression_type(temp_file):
msg = "Unrecognized compression type: unsupported"
with pytest.raises(ValueError, match=msg):
pd.read_json(temp_file, compression="unsupported")


@pytest.mark.parametrize(
Expand All @@ -102,25 +96,28 @@ def test_read_unsupported_compression_type():
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_json_compression(
compression_only, read_infer, to_infer, compression_to_extension, infer_string
compression_only,
read_infer,
to_infer,
compression_to_extension,
infer_string,
tmp_path,
):
with pd.option_context("future.infer_string", infer_string):
# see gh-15008
compression = compression_only

# We'll complete file extension subsequently.
filename = "test."
filename += compression_to_extension[compression]
filename = tmp_path / f"test.{compression_to_extension[compression]}"

df = pd.DataFrame({"A": [1]})

to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression

with tm.ensure_clean(filename) as path:
df.to_json(path, compression=to_compression)
result = pd.read_json(path, compression=read_compression)
tm.assert_frame_equal(result, df)
df.to_json(filename, compression=to_compression)
result = pd.read_json(filename, compression=read_compression)
tm.assert_frame_equal(result, df)


def test_to_json_compression_mode(compression):
Expand Down
31 changes: 14 additions & 17 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,10 @@ def test_reconstruction_index(self):
result = read_json(StringIO(df.to_json()))
tm.assert_frame_equal(result, df)

def test_path(self, float_frame, int_frame, datetime_frame):
with tm.ensure_clean("test.json") as path:
for df in [float_frame, int_frame, datetime_frame]:
df.to_json(path)
read_json(path)
def test_path(self, float_frame, int_frame, datetime_frame, temp_file):
for df in [float_frame, int_frame, datetime_frame]:
df.to_json(temp_file)
read_json(temp_file)

def test_axis_dates(self, datetime_series, datetime_frame):
# frame
Expand Down Expand Up @@ -1423,14 +1422,13 @@ def test_read_s3_jsonl(self, s3_bucket_public_with_data, s3so):
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)

def test_read_local_jsonl(self):
def test_read_local_jsonl(self, temp_file):
# GH17200
with tm.ensure_clean("tmp_items.json") as path:
with open(path, "w", encoding="utf-8") as infile:
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
result = read_json(path, lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)
with open(temp_file, "w", encoding="utf-8") as infile:
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
result = read_json(temp_file, lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)

def test_read_jsonl_unicode_chars(self):
# GH15132: non-ascii unicode characters
Expand Down Expand Up @@ -1526,17 +1524,16 @@ def test_to_jsonl(self):
],
)
@pytest.mark.parametrize("dtype", ["category", object])
def test_latin_encoding(self, dtype, val):
def test_latin_encoding(self, dtype, val, temp_file):
# GH 13774
ser = Series(
[x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
dtype=dtype,
)
encoding = "latin-1"
with tm.ensure_clean("test.json") as path:
ser.to_json(path, encoding=encoding)
retr = read_json(StringIO(path), encoding=encoding)
tm.assert_series_equal(ser, retr, check_categorical=False)
ser.to_json(temp_file, encoding=encoding)
retr = read_json(StringIO(temp_file), encoding=encoding)
tm.assert_series_equal(ser, retr, check_categorical=False)

def test_data_frame_size_after_to_json(self):
# GH15344
Expand Down
Loading