Skip to content

Commit

Permalink
FIX-modin-project#4652: Support categorical data in from_dataframe
Browse files Browse the repository at this point in the history
Signed-off-by: Karthik Velayutham <vkarthik@ponder.io>
  • Loading branch information
Karthik Velayutham committed Jul 29, 2022
1 parent 05bf659 commit cac3572
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
15 changes: 10 additions & 5 deletions modin/core/dataframe/pandas/exchange/dataframe_protocol/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@
from .exception import NoValidityBuffer, NoOffsetsBuffer


_NO_VALIDITY_BUFFER = {
ColumnNullType.NON_NULLABLE: "This column is non-nullable so does not have a mask",
ColumnNullType.USE_NAN: "This column uses NaN as null so does not have a separate mask",
ColumnNullType.USE_SENTINEL: "This column uses a sentinel value so does not have a mask",
}


@_inherit_docstrings(ProtocolColumn)
class PandasProtocolColumn(ProtocolColumn):
"""
Expand Down Expand Up @@ -414,11 +421,9 @@ def _get_validity_buffer(self) -> Tuple[PandasProtocolBuffer, Any]:
self._validity_buffer_cache = (buffer, dtype)
return self._validity_buffer_cache

if null == ColumnNullType.NON_NULLABLE:
msg = "This column is non-nullable so does not have a mask"
elif null == ColumnNullType.USE_NAN:
msg = "This column uses NaN as null so does not have a separate mask"
else:
try:
msg = _NO_VALIDITY_BUFFER[null]
except KeyError:
raise NotImplementedError("See self.describe_null")

raise NoValidityBuffer(msg)
Expand Down
13 changes: 11 additions & 2 deletions modin/test/exchange/dataframe_protocol/pandas/test_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
from modin.test.test_utils import warns_that_defaulting_to_pandas


def test_simple_import():
modin_df_producer = pd.DataFrame(test_data["int_data"])
def eval_df_protocol(modin_df_producer):
internal_modin_df_producer = modin_df_producer.__dataframe__()
# Our configuration in pytest.ini requires that we explicitly catch all
# instances of defaulting to pandas, this one raises a warning on `.from_dataframe`
Expand All @@ -43,3 +42,13 @@ def test_simple_import():

df_equals(modin_df_producer, modin_df_consumer)
df_equals(modin_df_producer, internal_modin_df_consumer)


def test_simple_import():
modin_df = pd.DataFrame(test_data["int_data"])
eval_df_protocol(modin_df)


def test_categorical_from_dataframe():
modin_df = pd.DataFrame({"foo": pd.Series([0, 1, 2, 3], dtype="category")})
eval_df_protocol(modin_df)

0 comments on commit cac3572

Please sign in to comment.