In [60]:
import re, sys, inspect, subprocess

In [19]:
import pdpipe

In [20]:
from numpydoc.validate import ERROR_MSGS, validate

In [14]:
module_blacklist = ["_version", "cfg"]

In [94]:
HIDE_ERROR_CODES_LIST = [
    "ES01",  # No extended summary found
    "SA01",  # See Also section not found
    "EX01",  # No examples section found
]

In [98]:
HIDE_ERROR_REGEX = "|".join(
    [":{}:".format(code) for code in HIDE_ERROR_CODES_LIST]
)

In [99]:
HIDE_ERROR_PATTERN = re.compile(HIDE_ERROR_REGEX)

In [147]:
SOFT_ERROR_CODES_LIST = HIDE_ERROR_CODES_LIST + [
    "GL01",  # Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
]

In [148]:
SOFT_ERROR_REGEX = "|".join(
    [":{}:".format(code) for code in SOFT_ERROR_CODES_LIST]
)

In [149]:
SOFT_ERROR_PATTERN = re.compile(SOFT_ERROR_REGEX)

In [157]:
def get_npdoc_val_report(object_name) -> bool:
    """Prints a numpydoc validation report of the Python object of the given name.

    Parameters
    ----------
    object_name : str
        The name of the object for which to run numpydoc docstring validation.

    Returns
    -------
    bool
        True if any hard errors were found (errors not defined as soft errors
        in the SOFT_ERROR_CODES_LIST).
    """
    any_errors = False
    any_hard_errors = False
    print(f"\nnumpydoc validation results for {object_name}:")
    try:
        output = subprocess.check_output(
            ["python", "-m", "numpydoc", object_name, "--validate"]
        )
    except subprocess.CalledProcessError as e:
        any_errors = True
        output = e.output
    decoded_output = output.decode("utf-8")
    # if output includes ANY hard error code
    # calc by comparing N of soft errors w/ N lines
    report_lines = decoded_output.split("\n")
    nlines = len(report_lines) - 1
    nsoft = len(SOFT_ERROR_PATTERN.findall(decoded_output))
    nhide = len(HIDE_ERROR_PATTERN.findall(decoded_output))
    nerrors = nlines - nhide
    if nlines > nsoft:
        any_hard_errors = True
    if nerrors > 0:
        print(f"A total of {nerrors} errors were found")
        print(
            f"Out of which {nsoft-nhide} are soft errors"
            f", (an additional {nhide} errors were hidden)."
        )
        for line in report_lines:
            if len(HIDE_ERROR_PATTERN.findall(line)) < 1 and len(line) > 0:
                print(line)
    else:
        print("No errors found.")
    return any_hard_errors

In [156]:
get_npdoc_val_report("pdpipe")


numpydoc validation results for pdpipe:
No errors found.


False

In [158]:
def recursively_validate_object(val_obj, val_full_name) -> bool:
    obj_w_hard_errors = []
    if get_npdoc_val_report(val_full_name):
        obj_w_hard_errors = [val_full_name]
    for name, obj in inspect.getmembers(val_obj):
        if not name.startswith("_"):
            if inspect.isclass(obj) or inspect.isfunction(obj):
                full_name = f"{val_full_name}.{name}"
                res = recursively_validate_object(obj, full_name)
                if len(res) > 0:
                    obj_w_hard_errors.extend(res)
    return sorted(set(obj_w_hard_errors))

In [159]:
def validate_module(module_name):
    print(f"Validating numpy docstrings in the {module_name} module!")
    obj_w_hard_errors = []
    res = get_npdoc_val_report(module_name)
    if res:
        obj_w_hard_errors.append(module_name)
    for name, obj in inspect.getmembers(sys.modules[module_name]):
        # print(obj)
        # if inspect.isclass(obj):
        #     print(obj)
        if inspect.ismodule(obj) and name not in module_blacklist:
            module_full_name = f"{module_name}.{name}"
            # print(f"Found pdpipe sub-module {module_full_name}")
            res = recursively_validate_object(obj, module_full_name)
            obj_w_hard_errors.extend(res)
    obj_w_hard_errors = sorted(set(obj_w_hard_errors))
    if len(obj_w_hard_errors) > 0:
        print("Hard errors were found in the following objects:")
        print("\n".join(obj_w_hard_errors))
        # print(obj_w_hard_errors)
    else:
        print("No hard errors were found anywhere in the module!")

In [160]:
validate_module("pdpipe")

Validating numpy docstrings in the pdpipe module!

numpydoc validation results for pdpipe:
No errors found.

numpydoc validation results for pdpipe.cfg:
No errors found.

numpydoc validation results for pdpipe.cfg.CfgKey:
A total of 1 errors were found
Out of which 0 are soft errors, (an additional 0 errors were hidden).
pdpipe.cfg.CfgKey:GL08:The object does not have a docstring

numpydoc validation results for pdpipe.cond:
No errors found.

numpydoc validation results for pdpipe.cond.AlwaysTrue:
A total of 1 errors were found
Out of which 0 are soft errors, (an additional 2 errors were hidden).
pdpipe.cond.AlwaysTrue:PR01:Parameters {'**kwargs'} not documented

numpydoc validation results for pdpipe.cond.AlwaysTrue.fit:
A total of 1 errors were found
Out of which 1 are soft errors, (an additional 3 errors were hidden).
pdpipe.cond.AlwaysTrue.fit:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line i

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.fly.DropRowsByQualifier.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.fly.DropRowsByQualifier.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.fly.DropRowsByQualifier.ColumnsBasedPipelineStage:PR01:Parameters {'desc_temp', '**kwargs', 'columns', 'exclude_columns', 'none_columns'} not documented
pdpipe.fly.DropRowsByQualifier.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.fly.DropRowsByQualifier.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.fly.DropRowsByQualifier.ConditionValidator:GL01:Docstring text (summary) should start i

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.fly.KeepRowsByQualifier.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.fly.KeepRowsByQualifier.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.fly.KeepRowsByQualifier.ColumnsBasedPipelineStage:PR01:Parameters {'none_columns', 'exclude_columns', 'columns', '**kwargs', 'desc_temp'} not documented
pdpipe.fly.KeepRowsByQualifier.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.fly.KeepRowsByQualifier.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.fly.KeepRowsByQualifier.ConditionValidator:GL01:Docstring text (summary) should start i

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.fly.PdPipelineStage.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.fly.PdPipelineStage.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.fly.PdPipelineStage.ColumnsBasedPipelineStage:PR01:Parameters {'exclude_columns', 'desc_temp', 'columns', '**kwargs', 'none_columns'} not documented
pdpipe.fly.PdPipelineStage.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.fly.PdPipelineStage.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.fly.PdPipelineStage.ConditionValidator:GL01:Docstring text (summary) should start in the line immediately a

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.lbl.DropLabelsByValues.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.lbl.DropLabelsByValues.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.lbl.DropLabelsByValues.ColumnsBasedPipelineStage:PR01:Parameters {'**kwargs', 'columns', 'exclude_columns', 'none_columns', 'desc_temp'} not documented
pdpipe.lbl.DropLabelsByValues.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.lbl.DropLabelsByValues.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.lbl.DropLabelsByValues.ConditionValidator:GL01:Docstring text (summary) should start in the 

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.lbl.PdPipelineStage.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.lbl.PdPipelineStage.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.lbl.PdPipelineStage.ColumnsBasedPipelineStage:PR01:Parameters {'desc_temp', 'columns', 'none_columns', 'exclude_columns', '**kwargs'} not documented
pdpipe.lbl.PdPipelineStage.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.lbl.PdPipelineStage.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.lbl.PdPipelineStage.ConditionValidator:GL01:Docstring text (summary) should start in the line immediately a

Parameters 
--------- in 
A pipeline stage that operates on a subset of dataframe columns.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 2 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.nltk_stages.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.ColumnsBasedPipelineStage:PR01:Parameters {'**kwargs', 'columns', 'none_columns', 'desc_temp', 'exclude_columns'} not documented

numpydoc validation results for pdpipe.nltk_stages.ColumnsBasedPipelineStage.AdHocStage:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.nltk_stages.ColumnsBasedPipelineStage.AdHocStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.ColumnsBasedPipelineStage.AdHocStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Gener

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.nltk_stages.ColumnsBasedPipelineStage.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.ColumnsBasedPipelineStage.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.nltk_stages.ColumnsBasedPipelineStage.ColumnsBasedPipelineStage:PR01:Parameters {'exclude_columns', 'desc_temp', '**kwargs', 'columns', 'none_columns'} not documented
pdpipe.nltk_stages.ColumnsBasedPipelineStage.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.nltk_stages.ColumnsBasedPipelineStage.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.nltk_stages.Colum

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.nltk_stages.DropRareTokens.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.DropRareTokens.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.nltk_stages.DropRareTokens.ColumnsBasedPipelineStage:PR01:Parameters {'columns', 'exclude_columns', 'none_columns', 'desc_temp', '**kwargs'} not documented
pdpipe.nltk_stages.DropRareTokens.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.nltk_stages.DropRareTokens.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.nltk_stages.DropRareTokens.ConditionValidator:GL01:Docstring text (summa

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.nltk_stages.MapColVals.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.MapColVals.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.nltk_stages.MapColVals.ColumnsBasedPipelineStage:PR01:Parameters {'desc_temp', 'columns', 'none_columns', 'exclude_columns', '**kwargs'} not documented
pdpipe.nltk_stages.MapColVals.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.nltk_stages.MapColVals.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.nltk_stages.MapColVals.ConditionValidator:GL01:Docstring text (summary) should start in the 

Parameters 
--------- in 
Creates and adds a pipeline stage that operates on a subset of dataframe columns to this pipeline stage.
... in the docstring of ColumnsBasedPipelineStage in /Users/shaypalachy/clones/pdpipe/pdpipe/core.py.
  warn(msg)


A total of 4 errors were found
Out of which 1 are soft errors, (an additional 2 errors were hidden).
pdpipe.nltk_stages.RemoveStopwords.ColumnsBasedPipelineStage:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.nltk_stages.RemoveStopwords.ColumnsBasedPipelineStage:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.nltk_stages.RemoveStopwords.ColumnsBasedPipelineStage:PR01:Parameters {'columns', '**kwargs', 'exclude_columns', 'none_columns', 'desc_temp'} not documented
pdpipe.nltk_stages.RemoveStopwords.ColumnsBasedPipelineStage:RT01:No Returns section found

numpydoc validation results for pdpipe.nltk_stages.RemoveStopwords.ConditionValidator:
A total of 4 errors were found
Out of which 1 are soft errors, (an additional 1 errors were hidden).
pdpipe.nltk_stages.RemoveStopwords.ConditionValidator:GL01:Docstring text 

KeyboardInterrupt: 

In [151]:
recursively_validate_object(
    pdpipe.run_time_parameters, "pdpipe.run_time_parameters"
)


numpydoc validation results for pdpipe.run_time_parameters:
No errors found.

numpydoc validation results for pdpipe.run_time_parameters.DynamicParameter:
No errors found.

numpydoc validation results for pdpipe.run_time_parameters.DynamicParameter.fit_transform:
A total of 7 errors were found
Out of which 1 are soft errors, (an additional 3 errors were hidden).
pdpipe.run_time_parameters.DynamicParameter.fit_transform:GL01:Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between)
pdpipe.run_time_parameters.DynamicParameter.fit_transform:SS05:Summary must start with infinitive verb, not third person (e.g. use "Generate" instead of "Generates")
pdpipe.run_time_parameters.DynamicParameter.fit_transform:PR01:Parameters {'y'} not documented
pdpipe.run_time_parameters.DynamicParameter.fit_transform:PR02:Unknown parameters {'y: optional'}
pdpipe.run_time_parameters.DynamicParameter.fit_transform:PR09:Par

['pdpipe.run_time_parameters.DynamicParameter.fit_transform',
 'pdpipe.run_time_parameters.DynamicParameter.transform',
 'pdpipe.run_time_parameters.dynamic']