Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT-#3044: Create Extentions Module in Modin #6961

Merged
merged 15 commits into from
Mar 1, 2024
Merged
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ jobs:
- run: MODIN_BENCHMARK_MODE=True ${{ matrix.execution.shell-ex }} modin/pandas/test/internals/test_benchmark_mode.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/pandas/test/internals/test_repartition.py
- run: ${{ matrix.execution.shell-ex }} $PARALLEL modin/test/test_partition_api.py
- run: ${{ matrix.execution.shell-ex }} modin/pandas/api/extensions/test
- name: xgboost tests
run: |
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
Expand Down
26 changes: 26 additions & 0 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
+ f" Modin ({__pandas_version__}.X). This may cause undesired side effects!"
)

# The extensions assigned to this module
_PD_EXTENSIONS_ = {}

# to not pollute namespace
del version

Expand Down Expand Up @@ -225,7 +228,30 @@ def _update_engine(publisher: Parameter):
from .plotting import Plotting as plotting
from .series import Series


def __getattr__(name: str):
"""
Overrides getattr on the module to enable extensions.

Parameters
----------
name : str
The name of the attribute being retrieved.

Returns
-------
Attribute
Returns the extension attribute, if it exists, otherwise returns the attribute
imported in this file.
"""
try:
return _PD_EXTENSIONS_.get(name, globals()[name])
except KeyError:
raise AttributeError(f"module 'modin.pandas' has no attribute '{name}'")


__all__ = [ # noqa: F405
"_PD_EXTENSIONS_",
"DataFrame",
"Series",
"read_csv",
Expand Down
16 changes: 16 additions & 0 deletions modin/pandas/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from modin.pandas.api import extensions

__all__ = ["extensions"]
24 changes: 24 additions & 0 deletions modin/pandas/api/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .extensions import (
register_dataframe_accessor,
register_pd_accessor,
register_series_accessor,
)

__all__ = [
"register_dataframe_accessor",
"register_series_accessor",
"register_pd_accessor",
]
163 changes: 163 additions & 0 deletions modin/pandas/api/extensions/extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from types import ModuleType
from typing import Any, Union

import modin.pandas as pd


def _set_attribute_on_obj(
name: str, extensions_dict: dict, obj: Union[pd.DataFrame, pd.Series, ModuleType]
):
"""
Create a new or override existing attribute on obj.

Parameters
----------
name : str
The name of the attribute to assign to `obj`.
extensions_dict : dict
The dictionary mapping extension name to `new_attr` (assigned below).
obj : DataFrame, Series, or modin.pandas
The object we are assigning the new attribute to.

Returns
-------
decorator
Returns the decorator function.
"""

def decorator(new_attr: Any):
"""
The decorator for a function or class to be assigned to name

Parameters
----------
new_attr : Any
The new attribute to assign to name.

Returns
-------
new_attr
Unmodified new_attr is return from the decorator.
"""
extensions_dict[name] = new_attr
setattr(obj, name, new_attr)
return new_attr

return decorator


def register_dataframe_accessor(name: str):
"""
Registers a dataframe attribute with the name provided.

This is a decorator that assigns a new attribute to DataFrame. It can be used
with the following syntax:

```
@register_dataframe_accessor("new_method")
def my_new_dataframe_method(*args, **kwargs):
# logic goes here
return
```

The new attribute can then be accessed with the name provided:

```
df.new_method(*my_args, **my_kwargs)
```

Parameters
----------
name : str
The name of the attribute to assign to DataFrame.

Returns
-------
decorator
Returns the decorator function.
"""
return _set_attribute_on_obj(
name, pd.dataframe._DATAFRAME_EXTENSIONS_, pd.DataFrame
)


def register_series_accessor(name: str):
"""
Registers a series attribute with the name provided.

This is a decorator that assigns a new attribute to Series. It can be used
with the following syntax:

```
@register_series_accessor("new_method")
def my_new_series_method(*args, **kwargs):
# logic goes here
return
```

The new attribute can then be accessed with the name provided:

```
s.new_method(*my_args, **my_kwargs)
```

Parameters
----------
name : str
The name of the attribute to assign to Series.

Returns
-------
decorator
Returns the decorator function.
"""
return _set_attribute_on_obj(name, pd.series._SERIES_EXTENSIONS_, pd.Series)


def register_pd_accessor(name: str):
"""
Registers a pd namespace attribute with the name provided.

This is a decorator that assigns a new attribute to modin.pandas. It can be used
with the following syntax:

```
@register_pd_accessor("new_function")
def my_new_pd_function(*args, **kwargs):
# logic goes here
return
```

The new attribute can then be accessed with the name provided:

```
import modin.pandas as pd

pd.new_method(*my_args, **my_kwargs)
```


Parameters
----------
name : str
The name of the attribute to assign to modin.pandas.

Returns
-------
decorator
Returns the decorator function.
"""
return _set_attribute_on_obj(name, pd._PD_EXTENSIONS_, pd)
12 changes: 12 additions & 0 deletions modin/pandas/api/extensions/test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
54 changes: 54 additions & 0 deletions modin/pandas/api/extensions/test/test_dataframe_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import modin.pandas as pd
from modin.pandas.api.extensions import register_dataframe_accessor


def test_dataframe_extension_simple_method():
expected_string_val = "Some string value"
method_name = "new_method"
df = pd.DataFrame([1, 2, 3])

@register_dataframe_accessor(method_name)
def my_method_implementation(self):
return expected_string_val

assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is my_method_implementation
assert df.new_method() == expected_string_val


def test_dataframe_extension_non_method():
expected_val = 4
attribute_name = "four"
register_dataframe_accessor(attribute_name)(expected_val)
df = pd.DataFrame([1, 2, 3])

assert attribute_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
assert pd.dataframe._DATAFRAME_EXTENSIONS_[attribute_name] == 4
assert df.four == expected_val


def test_dataframe_extension_accessing_existing_methods():
df = pd.DataFrame([1, 2, 3])
method_name = "self_accessor"
expected_result = df.sum() / df.count()

@register_dataframe_accessor(method_name)
def my_average(self):
return self.sum() / self.count()

assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is my_average
assert df.self_accessor().equals(expected_result)
37 changes: 37 additions & 0 deletions modin/pandas/api/extensions/test/test_pd_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import modin.pandas as pd
from modin.pandas.api.extensions import register_pd_accessor


def test_dataframe_extension_simple_method():
expected_string_val = "Some string value"
method_name = "new_method"

@register_pd_accessor(method_name)
def my_method_implementation():
return expected_string_val

assert method_name in pd._PD_EXTENSIONS_.keys()
assert pd._PD_EXTENSIONS_[method_name] is my_method_implementation
assert pd.new_method() == expected_string_val


def test_dataframe_extension_non_method():
expected_val = 4
attribute_name = "four"
register_pd_accessor(attribute_name)(expected_val)
assert attribute_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
assert pd._PD_EXTENSIONS_[attribute_name] == 4
assert pd.four == expected_val