diff --git a/tests/nested_pandas/nestedframe/test_io.py b/tests/nested_pandas/nestedframe/test_io.py index 0a6c1b01..05039279 100644 --- a/tests/nested_pandas/nestedframe/test_io.py +++ b/tests/nested_pandas/nestedframe/test_io.py @@ -10,16 +10,18 @@ import pyarrow.fs import pyarrow.parquet as pq import pytest +from pandas.testing import assert_frame_equal +from upath import UPath + from nested_pandas import NestedFrame, read_parquet from nested_pandas.datasets import generate_data from nested_pandas.nestedframe.io import ( FSSPEC_BLOCK_SIZE, _get_storage_options_and_path, + _is_directory, _transform_read_parquet_data_arg, from_pyarrow, ) -from pandas.testing import assert_frame_equal -from upath import UPath def test_read_parquet(): @@ -497,3 +499,31 @@ def test__get_storage_options_and_path(): # Test with invalid type with pytest.raises(TypeError): _get_storage_options_and_path(123) + + +def test__is_directory(): + """Test _is_directory function with various input types.""" + # Test with Path object pointing to a directory + dir_path = Path("tests/test_data") + assert _is_directory(dir_path) is True + + # Test with Path object pointing to a file + file_path = Path("tests/test_data/nested.parquet") + assert _is_directory(file_path) is False + + # Test with string pointing to a directory + str_dir_path = "tests/test_data" + assert _is_directory(str_dir_path) is True + + # Test with string pointing to a file + str_file_path = "tests/test_data/nested.parquet" + assert _is_directory(str_file_path) is False + + # Test with non-existent path (should return False) + non_existent = Path("tests/non_existent_directory") + assert _is_directory(non_existent) is False + + # Test with other types (should return False) + assert _is_directory(123) is False + assert _is_directory(None) is False + assert _is_directory([]) is False diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 8092665f..b132ef0d 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -1177,6 +1177,29 @@ def make_id(row, prefix_str): get_max, columns=["packed.c", "packed.d"], output_names=["only_one_name"], row_container="args" ) + # Test output_names as a string (single output) + def get_single_max(row): + return row["packed.c"].max() + + result = nf.map_rows(get_single_max, columns=["packed.c"], output_names="max_c") + assert len(result) == len(nf) + assert list(result.columns) == ["max_c"] + for i in range(len(result)): + assert result["max_c"].values[i] == expected_max_c[i] + + # Test output_names as a list (multiple outputs) + def get_max_pair(row): + return pd.Series([row["packed.c"].max(), row["packed.d"].max()], index=["max_col1", "max_col2"]) + + result = nf.map_rows( + get_max_pair, columns=["packed.c", "packed.d"], output_names=["custom_max1", "custom_max2"] + ) + assert len(result) == len(nf) + assert list(result.columns) == ["custom_max1", "custom_max2"] + for i in range(len(result)): + assert result["custom_max1"].values[i] == expected_max_c[i] + assert result["custom_max2"].values[i] == expected_max_d[i] + # Verify that append_columns=True works as expected. # Ensure that even with non-unique indexes, the final result retains # the original index (nested-pandas#301)