python fix docs and check in CI

pola-rs · Dec 18, 2021 · 921d248 · 921d248
1 parent 84892a3
commit 921d248
Show file tree

Hide file tree

Showing 10 changed files with 128 additions and 66 deletions.
diff --git a/.github/workflows/docs_check.yaml b/.github/workflows/docs_check.yaml
@@ -0,0 +1,22 @@
+name: Docs check
+
+on:
+  - pull_request
+jobs:
+  test:
+    name: Docs check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r py-polars/build.requirements.txt
+      - name: Build python reference
+        run: |
+          cd py-polars/docs
+          make html SPHINXOPTS="-W"
diff --git a/py-polars/docs/source/conf.py b/py-polars/docs/source/conf.py
@@ -29,12 +29,12 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    "numpydoc",  # numpy docstrings
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
     "sphinx.ext.doctest",
     "sphinx.ext.extlinks",
     "sphinx.ext.todo",
-    "numpydoc",  # numpy docstrings
     "sphinx.ext.intersphinx",
     "sphinx.ext.coverage",
     "sphinx.ext.mathjax",
@@ -62,7 +62,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+# html_static_path = ["_static"]
 
 html_logo = "../img/polars_logo.png"
 autosummary_generate = True

diff --git a/py-polars/docs/source/reference/dataframe.rst b/py-polars/docs/source/reference/dataframe.rst
@@ -5,8 +5,6 @@ DataFrame
 
 Constructor
 -----------
-.. autosummary::
-   :toctree: api/
 
    DataFrame
 
@@ -105,6 +103,7 @@ Manipulation/ selection
     DataFrame.hstack
     DataFrame.vstack
     DataFrame.groupby
+    DataFrame.groupby_dynamic
     DataFrame.select
     DataFrame.with_columns
     DataFrame.with_column_renamed

diff --git a/py-polars/docs/source/reference/expression.rst b/py-polars/docs/source/reference/expression.rst
@@ -177,6 +177,7 @@ Manipulation/ selection
     Expr.backward_fill
     Expr.reverse
     Expr.filter
+    Expr.where
     Expr.head
     Expr.tail
     Expr.reinterpret
@@ -195,10 +196,7 @@ Column names
 ------------
    Expressions that help renaming/ selecting columns by name.
 
-   A wildcard `col("*")` selects all columns in a DataFrame.
-
-   Examples
-   --------
+   A wildcard `col("*")`/`pl.all()` selects all columns in a DataFrame.
 
    >>> df.select(col("*"))
 

diff --git a/py-polars/docs/source/reference/series.rst b/py-polars/docs/source/reference/series.rst
@@ -92,7 +92,6 @@ Computations
     Series.cummin
     Series.cummax
     Series.cumprod
-    Series.arg_sort
     Series.arg_true
     Series.arg_unique
     Series.unique

diff --git a/py-polars/polars/internals/expr.py b/py-polars/polars/internals/expr.py
@@ -297,9 +297,9 @@ def exclude(
         columns
             Column(s) to exclude from selection.
             This can be:
-                - a column name, or multiple names
-                - a regular expression starting with `^` and ending with `$`
-                - a dtype or multiple dtypes
+            - a column name, or multiple names
+            - a regular expression starting with `^` and ending with `$`
+            - a dtype or multiple dtypes
 
         Examples
         --------
@@ -1769,20 +1769,20 @@ def rank(self, method: str = "average", reverse: bool = False) -> "Expr":
             {'average', 'min', 'max', 'dense', 'ordinal', 'random'}, optional
             The method used to assign ranks to tied elements.
             The following methods are available (default is 'average'):
-              * 'average': The average of the ranks that would have been assigned to
-                all the tied values is assigned to each value.
-              * 'min': The minimum of the ranks that would have been assigned to all
-                the tied values is assigned to each value.  (This is also
-                referred to as "competition" ranking.)
-              * 'max': The maximum of the ranks that would have been assigned to all
-                the tied values is assigned to each value.
-              * 'dense': Like 'min', but the rank of the next highest element is
-                assigned the rank immediately after those assigned to the tied
-                elements.
-              * 'ordinal': All values are given a distinct rank, corresponding to
-                the order that the values occur in `a`.
-              * 'random': Like 'ordinal', but the rank for ties is not dependent
-                on the order that the values occur in `a`.
+            - 'average': The average of the ranks that would have been assigned to
+            all the tied values is assigned to each value.
+            - 'min': The minimum of the ranks that would have been assigned to all
+            the tied values is assigned to each value.  (This is also
+            referred to as "competition" ranking.)
+            - 'max': The maximum of the ranks that would have been assigned to all
+            the tied values is assigned to each value.
+            - 'dense': Like 'min', but the rank of the next highest element is
+            assigned the rank immediately after those assigned to the tied
+            elements.
+            - 'ordinal': All values are given a distinct rank, corresponding to
+            the order that the values occur in `a`.
+            - 'random': Like 'ordinal', but the rank for ties is not dependent
+            on the order that the values occur in `a`.
         reverse
             reverse the operation
         """
@@ -2444,12 +2444,12 @@ def __init__(self, expr: Expr):
     def buckets(self, every: str, offset: Optional[str] = None) -> Expr:
         """
         .. warning::
-            This API is experimental and will likely change.
+            This API is experimental and may change without it being considered a breaking change.
 
         Divide the date/ datetime range into buckets.
         Data will be sorted by this operation.
 
-        The `every` and `offset` argument are created with the
+        The `every` and `offset` arguments are created with
         the following string language:
 
         1ns # 1 nanosecond

diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -2259,6 +2259,58 @@ def groupby_dynamic(
         closed: str = "left",
         by: Optional[Union[str, tp.List[str], "pli.Expr", tp.List["pli.Expr"]]] = None,
     ) -> "DynamicGroupBy":
+        """
+        Groups based on a time value. Time windows are calculated and rows are assigned to windows.
+        Different from a normal groupby is that a row can be member of multiple groups. The time window could
+        be seen as a rolling window, with a window size determined by dates/times instead of slots in the DataFrame.
+
+        A window is defined by:
+        - every: interval of the window
+        - period: length of the window
+        - offset: offset of the window
+
+        The `every`, `period` and `offset` arguments are created with
+        the following string language:
+
+        1ns # 1 nanosecond
+        1us # 1 microsecond
+        1ms # 1 millisecond
+        1s  # 1 second
+        1m  # 1 minute
+        1h  # 1 hour
+        1d  # 1 day
+        1w  # 1 week
+        1mo # 1 calendar month
+        1y  # 1 calendar year
+
+        3d12h4m25s # 3 days, 12 hours, 4 minutes, and 25 seconds
+
+        .. warning::
+            This API is experimental and may change without it being considered a breaking change.
+
+        Parameters
+        ----------
+        time_column
+            Column used to group based on the time window.
+            Often to type Date/Datetime
+            This column must be sorted. If not the output will not make sense.
+        every
+            interval of the window
+        period
+            length of the window
+        offset
+            offset of the window
+        truncate
+            truncate the time value to the window lower bound
+        include_boundaries
+            add the lower and upper bound of the window to the "_lower_bound" and "_upper_bound" columns
+        closed
+            Defines if the window interval is closed or not.
+            Any of {"left", "right", "both" "none"}
+        by
+            Also group by these column(s)
+        """
+
         return DynamicGroupBy(
             self,
             time_column,
@@ -3616,6 +3668,11 @@ def is_empty(self) -> bool:
 
 
 class DynamicGroupBy:
+    """
+    A dynamic grouper. This has an `.agg` method which will allow you to run all polars expressions
+    in a groupby context.
+    """
+
     def __init__(
         self,
         df: "DataFrame",

diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py
@@ -26,7 +26,7 @@ def wrap_ldf(ldf: "PyLazyFrame") -> "LazyFrame":
 
 def _prepare_groupby_inputs(
     by: Optional[Union[str, tp.List[str], "pli.Expr", tp.List["pli.Expr"]]],
-) -> tp.List[PyExpr]:
+) -> tp.List["PyExpr"]:
     if isinstance(by, list):
         new_by = []
         for e in by:

diff --git a/py-polars/polars/internals/lazy_functions.py b/py-polars/polars/internals/lazy_functions.py
@@ -40,11 +40,12 @@ def col(
     A column in a DataFrame.
     Can be used to select:
 
-     * a single column by name
-     * all columns by using a wildcard `"*"`
-     * column by regular expression if the regex starts with `^` and ends with `$`
+    - a single column by name
+    - all columns by using a wildcard `"*"`
+    - column by regular expression if the regex starts with `^` and ends with `$`
 
     Parameters
+    ----------
     col
         A string that holds the name of the column
 
@@ -228,8 +229,8 @@ def max(
     column
         Column(s) to be used in aggregation. Will lead to different behavior based on the input.
         input:
-            - Union[str, Series] -> aggregate the maximum value of that column.
-            - tp.List[Expr] -> aggregate the maximum value horizontally.
+        - Union[str, Series] -> aggregate the maximum value of that column.
+        - tp.List[Expr] -> aggregate the maximum value horizontally.
     """
     if isinstance(column, pli.Series):
         return column.max()
@@ -266,8 +267,8 @@ def min(
     column
         Column(s) to be used in aggregation. Will lead to different behavior based on the input.
         input:
-            - Union[str, Series] -> aggregate the sum value of that column.
-            - tp.List[Expr] -> aggregate the sum value horizontally.
+        - Union[str, Series] -> aggregate the sum value of that column.
+        - tp.List[Expr] -> aggregate the sum value horizontally.
     """
     if isinstance(column, pli.Series):
         return column.min()
@@ -304,8 +305,8 @@ def sum(
     column
         Column(s) to be used in aggregation. Will lead to different behavior based on the input.
         input:
-            - Union[str, Series] -> aggregate the sum value of that column.
-            - tp.List[Expr] -> aggregate the sum value horizontally.
+        - Union[str, Series] -> aggregate the sum value of that column.
+        - tp.List[Expr] -> aggregate the sum value horizontally.
     """
     if isinstance(column, pli.Series):
         return column.sum()

diff --git a/py-polars/polars/internals/series.py b/py-polars/polars/internals/series.py
@@ -1340,19 +1340,6 @@ def argsort(self, reverse: bool = False) -> "Series":
         """
         return wrap_s(self._s.argsort(reverse))
 
-    def arg_sort(self, reverse: bool = False) -> "Series":
-        """
-        .. deprecated::
-
-        Index location of the sorted variant of this Series.
-
-        Returns
-        -------
-        indexes
-            Indexes that can be used to sort this array.
-        """
-        return wrap_s(self._s.argsort(reverse))
-
     def arg_unique(self) -> "Series":
         """
         Get unique index as Series.
@@ -3002,23 +2989,22 @@ def rank(self, method: str = "average", reverse: bool = False) -> "Series":
             {'average', 'min', 'max', 'dense', 'ordinal', 'random'}, optional
             The method used to assign ranks to tied elements.
             The following methods are available (default is 'average'):
-              * 'average': The average of the ranks that would have been assigned to
-                all the tied values is assigned to each value.
-              * 'min': The minimum of the ranks that would have been assigned to all
-                the tied values is assigned to each value.  (This is also
-                referred to as "competition" ranking.)
-              * 'max': The maximum of the ranks that would have been assigned to all
-                the tied values is assigned to each value.
-              * 'dense': Like 'min', but the rank of the next highest element is
-                assigned the rank immediately after those assigned to the tied
-                elements.
-              * 'ordinal': All values are given a distinct rank, corresponding to
-                the order that the values occur in `a`.
-              * 'random': Like 'ordinal', but the rank for ties is not dependent
-                on the order that the values occur in `a`.
+            - 'average': The average of the ranks that would have been assigned to
+            all the tied values is assigned to each value.
+            - 'min': The minimum of the ranks that would have been assigned to all
+            the tied values is assigned to each value.  (This is also
+            referred to as "competition" ranking.)
+            - 'max': The maximum of the ranks that would have been assigned to all
+            the tied values is assigned to each value.
+            - 'dense': Like 'min', but the rank of the next highest element is
+            assigned the rank immediately after those assigned to the tied
+            elements.
+            - 'ordinal': All values are given a distinct rank, corresponding to
+            the order that the values occur in `a`.
+            - 'random': Like 'ordinal', but the rank for ties is not dependent
+            on the order that the values occur in `a`.
         reverse
             reverse the operation
-
         """
         return wrap_s(self._s.rank(method, reverse))