Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions tests/nested_pandas/nestedframe/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,18 @@
import pyarrow.fs
import pyarrow.parquet as pq
import pytest
from pandas.testing import assert_frame_equal
from upath import UPath

from nested_pandas import NestedFrame, read_parquet
from nested_pandas.datasets import generate_data
from nested_pandas.nestedframe.io import (
FSSPEC_BLOCK_SIZE,
_get_storage_options_and_path,
_is_directory,
_transform_read_parquet_data_arg,
from_pyarrow,
)
from pandas.testing import assert_frame_equal
from upath import UPath


def test_read_parquet():
Expand Down Expand Up @@ -497,3 +499,31 @@ def test__get_storage_options_and_path():
# Test with invalid type
with pytest.raises(TypeError):
_get_storage_options_and_path(123)


def test__is_directory():
"""Test _is_directory function with various input types."""
# Test with Path object pointing to a directory
dir_path = Path("tests/test_data")
assert _is_directory(dir_path) is True

# Test with Path object pointing to a file
file_path = Path("tests/test_data/nested.parquet")
assert _is_directory(file_path) is False

# Test with string pointing to a directory
str_dir_path = "tests/test_data"
assert _is_directory(str_dir_path) is True

# Test with string pointing to a file
str_file_path = "tests/test_data/nested.parquet"
assert _is_directory(str_file_path) is False

# Test with non-existent path (should return False)
non_existent = Path("tests/non_existent_directory")
assert _is_directory(non_existent) is False

# Test with other types (should return False)
assert _is_directory(123) is False
assert _is_directory(None) is False
assert _is_directory([]) is False
23 changes: 23 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,29 @@ def make_id(row, prefix_str):
get_max, columns=["packed.c", "packed.d"], output_names=["only_one_name"], row_container="args"
)

# Test output_names as a string (single output)
def get_single_max(row):
return row["packed.c"].max()

result = nf.map_rows(get_single_max, columns=["packed.c"], output_names="max_c")
assert len(result) == len(nf)
assert list(result.columns) == ["max_c"]
for i in range(len(result)):
assert result["max_c"].values[i] == expected_max_c[i]

# Test output_names as a list (multiple outputs)
def get_max_pair(row):
return pd.Series([row["packed.c"].max(), row["packed.d"].max()], index=["max_col1", "max_col2"])

result = nf.map_rows(
get_max_pair, columns=["packed.c", "packed.d"], output_names=["custom_max1", "custom_max2"]
)
assert len(result) == len(nf)
assert list(result.columns) == ["custom_max1", "custom_max2"]
for i in range(len(result)):
assert result["custom_max1"].values[i] == expected_max_c[i]
assert result["custom_max2"].values[i] == expected_max_d[i]

# Verify that append_columns=True works as expected.
# Ensure that even with non-unique indexes, the final result retains
# the original index (nested-pandas#301)
Expand Down