Skip to content

Commit

Permalink
FEAT-#6983: Add Pluggable Documentation Module Support (#6986)
Browse files Browse the repository at this point in the history
Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>
Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
  • Loading branch information
sfc-gh-dpetersohn and YarShev committed Mar 5, 2024
1 parent 5769a1d commit 338c501
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 2 deletions.
3 changes: 3 additions & 0 deletions modin/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CIAWSSecretAccessKey,
CpuCount,
DaskThreadsPerWorker,
DocModule,
DoUseCalcite,
Engine,
EnvironmentVariable,
Expand Down Expand Up @@ -109,4 +110,6 @@
"LogMode",
"LogMemoryInterval",
"LogFileSize",
# Plugin settings
"DocModule",
]
43 changes: 43 additions & 0 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

"""Module houses Modin configs originated from environment variables."""

import importlib
import os
import secrets
import sys
Expand Down Expand Up @@ -843,6 +844,48 @@ class LazyExecution(EnvironmentVariable, type=str):
default = "Auto"


class DocModule(EnvironmentVariable, type=ExactStr):
"""
The module to use that will be used for docstrings.
The value set here must be a valid, importable module. It should have
a `DataFrame`, `Series`, and/or several APIs directly (e.g. `read_csv`).
"""

varname = "MODIN_DOC_MODULE"
default = "pandas"

@classmethod
def put(cls, value: str) -> None:
"""
Assign a value to the DocModule config.
Parameters
----------
value : str
Config value to set.
"""
super().put(value)
# Reload everything to apply the documentation. This is required since the
# docs might already have been created and the implementation will assume
# that the new docs are applied when the config is set. This set of operations
# does this.
import modin.pandas as pd

importlib.reload(pd.accessor)
importlib.reload(pd.base)
importlib.reload(pd.dataframe)
importlib.reload(pd.general)
importlib.reload(pd.groupby)
importlib.reload(pd.io)
importlib.reload(pd.iterator)
importlib.reload(pd.series)
importlib.reload(pd.series_utils)
importlib.reload(pd.utils)
importlib.reload(pd.window)
importlib.reload(pd)


class DaskThreadsPerWorker(EnvironmentVariable, type=int):
"""Number of threads per Dask worker."""

Expand Down
17 changes: 17 additions & 0 deletions modin/config/test/docs_module/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .classes import DataFrame, Series
from .functions import read_csv

__all__ = ["DataFrame", "Series", "read_csv"]
24 changes: 24 additions & 0 deletions modin/config/test/docs_module/classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


class DataFrame:
def apply(self):
"""This is a test of the documentation module for DataFrame."""
return


class Series:
def isna(self):
"""This is a test of the documentation module for Series."""
return
17 changes: 17 additions & 0 deletions modin/config/test/docs_module/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.


def read_csv():
"""Test override for functions on the module."""
return
30 changes: 30 additions & 0 deletions modin/config/test/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,36 @@ def test_custom_help(make_custom_envvar):
assert "custom var" in make_custom_envvar.get_help()


def test_doc_module():
import pandas

import modin.pandas as pd
from modin.config import DocModule

DocModule.put("modin.config.test.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
== "This is a test of the documentation module for Series."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
# Test for override
assert pd.read_csv.__doc__ == "Test override for functions on the module."
# Test for pandas doc when function is not defined on module.
assert pandas.read_table.__doc__ in pd.read_table.__doc__


def test_hdk_envvar():
try:
import pyhdk
Expand Down
20 changes: 20 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,26 @@ def isin(self, values): # noqa: PR01, RT01, D200
"""
return super(DataFrame, self).isin(values)

def isna(self):
"""
Detect missing values.
Returns
-------
The result of detecting missing values.
"""
return super(DataFrame, self).isna()

def isnull(self):
"""
Detect missing values.
Returns
-------
The result of detecting missing values.
"""
return super(DataFrame, self).isnull()

def iterrows(self): # noqa: D200
"""
Iterate over ``DataFrame`` rows as (index, ``Series``) pairs.
Expand Down
20 changes: 20 additions & 0 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,26 @@ def isin(self, values): # noqa: PR01, RT01, D200
"""
return super(Series, self).isin(values, shape_hint="column")

def isna(self):
"""
Detect missing values.
Returns
-------
The result of detecting missing values.
"""
return super(Series, self).isna()

def isnull(self):
"""
Detect missing values.
Returns
-------
The result of detecting missing values.
"""
return super(Series, self).isnull()

def item(self): # noqa: RT01, D200
"""
Return the first element of the underlying data as a Python scalar.
Expand Down
25 changes: 23 additions & 2 deletions modin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
)

from modin._version import get_versions
from modin.config import Engine, StorageFormat
from modin.config import DocModule, Engine, StorageFormat

T = TypeVar("T")
"""Generic type parameter"""
Expand Down Expand Up @@ -399,6 +399,22 @@ def _inherit_docstrings(
are not defined in target class (but are defined in the ancestor class),
which means that ancestor class attribute docstrings could also change.
"""
# Import the docs module and get the class (e.g. `DataFrame`).
imported_doc_module = importlib.import_module(DocModule.get())
# Set the default parent so we can use it in case some docs are missing from
# parent module.
default_parent = parent
# Try to get the parent object from the doc module, and if it isn't there,
# get it from parent instead. We only do this if we are overriding pandas
# documentation. We don't touch other docs.
if DocModule.get() != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
overwrite_existing = True

def _documentable_obj(obj: object) -> bool:
"""Check if `obj` docstring could be patched."""
Expand All @@ -421,7 +437,12 @@ def decorator(cls_or_func: Fn) -> Fn:
if attr in seen:
continue
seen.add(attr)
parent_obj = getattr(parent, attr, None)
# Try to get the attribute from the docs class first, then
# from the default parent (pandas), and if it's not in either,
# set `parent_obj` to `None`.
parent_obj = getattr(
parent, attr, getattr(default_parent, attr, None)
)
if (
parent_obj in excluded
or not _documentable_obj(parent_obj)
Expand Down

0 comments on commit 338c501

Please sign in to comment.