Skip to content

Commit

Permalink
[MNT] create build tool to check invalid backticks (#6088)
Browse files Browse the repository at this point in the history
#### Reference Issues/PRs

This PR is related to this work here:
#6023

#### What does this implement/fix? Explain your changes.

It creates a python script in `build_tools` that can be run to list out
all the files and incorrect uses of `backticks` in the docstrings.

#### Any other comments?

If this tool is accepted we can use this to fix all the remaining
invalid `backtick` uses and can later be converted to a pre-commit tool.
  • Loading branch information
geetu040 committed Mar 20, 2024
1 parent c4b24c1 commit d1f79ac
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 9 deletions.
120 changes: 120 additions & 0 deletions build_tools/check_backticks.py
@@ -0,0 +1,120 @@
#!/usr/bin/env python3 -u
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

"""Test script to check for invalid use of single-backticks."""

__author__ = ["geetu040"]

import argparse
import ast
import glob
import re


def find_py_files(folder_path):
"""Find all Python files in a given folder path."""
return glob.glob(f"{folder_path}/**/*.py", recursive=True)


def extract_docstrings(filename):
"""Extract docstrings from a Python file."""
# create abstract syntax tree from the file
with open(filename) as f:
tree = ast.parse(f.read())

# walk through all nodes in the tree
docstrings = {}
for node in ast.walk(tree):
if (
isinstance(node, ast.Expr)
and isinstance(node.value, ast.Constant)
and isinstance(node.value.value, str)
):
# if the node is an expression and
# its value is a constant and
# constant's value is a string
# the node represents a docstring
# See https://docs.python.org/3/library/ast.html#abstract-grammar
docstring = node.value.value
lineno = node.value.lineno
docstrings[lineno] = docstring

return docstrings


def find_invalid_backtick_text(docstring):
"""Find invalid backtick text in a docstring."""
# remove all multiple backticks to avoid interference
# we are looking only for invalid single-backtick
docstring = re.sub(r"`{2,}.*?`{2,}", "", docstring, flags=re.DOTALL)

all_backtick_text = re.findall(r"`.*?`", docstring, flags=re.DOTALL)
# expressions like :math:`d(x, y):= (x-y)^2` are valid cases
valid_backtick_text = re.findall(r":.*?:(`.*?`)", docstring, flags=re.DOTALL)

# find all the invalid backtick code snippets
invalid_backtick_text = set()
for text in all_backtick_text:
if text in valid_backtick_text:
continue
# rst hyperlinks are valid cases
if re.match(r"`.*?<http.*?>`", text, flags=re.DOTALL):
continue
invalid_backtick_text.add(text)

return invalid_backtick_text


def main():
"""Execute the main function of the script."""
# parse command line arguments
parser = argparse.ArgumentParser(
description="Test script to check for invalid use of single-backticks."
)
parser.add_argument(
"folder_path",
nargs="?",
default="./sktime",
help="Folder path to search for Python files",
)
args = parser.parse_args()

folder_path = args.folder_path
results = {}

# list all the python files in the project
py_files = find_py_files(folder_path)

for file in py_files:
docstrings = extract_docstrings(file)
results_on_file = {}

for lineno, docstring in docstrings.items():
invalid_backtick_text = find_invalid_backtick_text(docstring)

if len(invalid_backtick_text) > 0:
results_on_file[lineno] = invalid_backtick_text

if len(results_on_file) > 0:
results[file] = results_on_file

# print the lines along with the invalid backticks text
print(f'Results in "{folder_path}"') # noqa: T201
if len(results) > 0:
print(f"Total Files with invalid backticks: {len(results)}") # noqa: T201
for filename, result in results.items():
for lineno, errors in result.items():
print(f"{filename}:{lineno} {' '.join(errors)}") # noqa: T201
else:
print("No invalid backticks found") # noqa: T201


if __name__ == "__main__":
"""
Usage: defaults to "./sktime"
python build_tools/check_backticks.py
Usage: folder path as argument
python build_tools/check_backticks.py sktime/classification/distance_based
"""
main()
2 changes: 1 addition & 1 deletion sktime/classification/dictionary_based/_boss.py
Expand Up @@ -112,7 +112,7 @@ class BOSSEnsemble(BaseClassifier):
Notes
-----
For the Java version, see
- ``Original Publication <https://github.com/patrickzib/SFA>``_.
- `Original Publication <https://github.com/patrickzib/SFA>`_.
- `TSML <https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/
tsml/classifiers/dictionary_based/BOSS.java>`_.
Expand Down
2 changes: 1 addition & 1 deletion sktime/classification/dictionary_based/_muse.py
Expand Up @@ -101,7 +101,7 @@ class MUSE(BaseClassifier):
Notes
-----
For the Java version, see
- ``Original Publication <https://github.com/patrickzib/SFA>``_.
- `Original Publication <https://github.com/patrickzib/SFA>`_.
- `MUSE
<https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/tsml/
classifiers/multivariate/WEASEL_MUSE.java>`_.
Expand Down
2 changes: 1 addition & 1 deletion sktime/classification/dictionary_based/_weasel.py
Expand Up @@ -103,7 +103,7 @@ class WEASEL(BaseClassifier):
Notes
-----
For the Java version, see
- ``Original Publication <https://github.com/patrickzib/SFA>``_.
- `Original Publication <https://github.com/patrickzib/SFA>`_.
- `TSML <https://github.com/uea-machine-learning/tsml/blob/master/src/main/java
/tsml/classifiers/dictionary_based/WEASEL.java>`_.
Expand Down
2 changes: 1 addition & 1 deletion sktime/classification/feature_based/_catch22_classifier.py
Expand Up @@ -53,7 +53,7 @@ class Catch22Classifier(_DelegatedClassifier):
Notes
-----
Authors ``catch22ForestClassifier <https://github.com/chlubba/sktime-catch22>``_.
Authors `catch22ForestClassifier <https://github.com/chlubba/sktime-catch22>`_.
For the Java version, see `tsml <https://github.com/uea-machine-learning/tsml/blob
/master/src/main/java/tsml/classifiers/hybrids/Catch22Classifier.java>`_.
Expand Down
2 changes: 1 addition & 1 deletion sktime/clustering/k_means/_k_means_tslearn.py
Expand Up @@ -51,7 +51,7 @@ class TimeSeriesKMeansTslearn(_TslearnAdapter, BaseClusterer):
parallelization.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See scikit-learns'
``Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>``_
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
for more details.
dtw_inertia: bool (default: False)
Expand Down
2 changes: 1 addition & 1 deletion sktime/clustering/kernel_k_means.py
Expand Up @@ -48,7 +48,7 @@ class TimeSeriesKernelKMeans(_TslearnAdapter, BaseClusterer):
computations.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See scikit-learns'
``Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>``_
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
for more details.
random_state: int or np.random.RandomState instance or None, defaults = None
Determines random number generation for centroid initialization.
Expand Down
2 changes: 1 addition & 1 deletion sktime/dists_kernels/gak.py
Expand Up @@ -21,7 +21,7 @@ class GAKernel(_TslearnPwTrafoAdapter, BasePairwiseTransformerPanel):
The number of jobs to run in parallel.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See scikit-learns'
``Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>``__
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
for more details.
verbose : int, optional, default=0
The verbosity level: if non zero, progress messages are printed.
Expand Down
4 changes: 2 additions & 2 deletions sktime/transformations/series/detrend/_deseasonalize.py
Expand Up @@ -50,7 +50,7 @@ class Deseasonalizer(BaseTransformer):
-----
For further explanation on seasonal components and additive vs.
multiplicative models see
``Forecasting: Principles and Practice <https://otexts.com/fpp3/components.html>``_.
`Forecasting: Principles and Practice <https://otexts.com/fpp3/components.html>`_.
Seasonal decomposition is computed using `statsmodels
<https://www.statsmodels.org/stable/generated/statsmodels.tsa.seasonal.seasonal_decompose.html>`_.
Expand Down Expand Up @@ -286,7 +286,7 @@ class ConditionalDeseasonalizer(Deseasonalizer):
-----
For further explanation on seasonal components and additive vs.
multiplicative models see
``Forecasting: Principles and Practice <https://otexts.com/fpp3/components.html>``_.
`Forecasting: Principles and Practice <https://otexts.com/fpp3/components.html>`_.
Seasonal decomposition is computed using `statsmodels
<https://www.statsmodels.org/stable/generated/statsmodels.tsa.seasonal.seasonal_decompose.html>`_.
Expand Down

0 comments on commit d1f79ac

Please sign in to comment.