From 225005578d8c082771ff9dbde33a34db09f58c3f Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 19 Mar 2024 13:43:19 +0100 Subject: [PATCH] chore(python): Remove unused private constructors (#15160) --- .../polars/_utils/construction/dataframe.py | 2 +- py-polars/polars/convert.py | 106 +++++++++++------- py-polars/polars/dataframe/frame.py | 91 --------------- py-polars/polars/series/series.py | 18 --- 4 files changed, 66 insertions(+), 151 deletions(-) diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py index 58db91397cc3..f727d1543546 100644 --- a/py-polars/polars/_utils/construction/dataframe.py +++ b/py-polars/polars/_utils/construction/dataframe.py @@ -1122,7 +1122,7 @@ def numpy_to_pydf( strict: bool = True, nan_to_null: bool = False, ) -> PyDataFrame: - """Construct a PyDataFrame from a numpy ndarray (including structured ndarrays).""" + """Construct a PyDataFrame from a NumPy ndarray (including structured ndarrays).""" shape = data.shape two_d = len(shape) == 2 diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py index add3e0bc015e..7b4dc139c616 100644 --- a/py-polars/polars/convert.py +++ b/py-polars/polars/convert.py @@ -1,14 +1,23 @@ from __future__ import annotations import io +import itertools import re -from itertools import chain, zip_longest from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence, overload import polars._reexport as pl from polars import functions as F +from polars._utils.construction.dataframe import ( + arrow_to_pydf, + dict_to_pydf, + numpy_to_pydf, + pandas_to_pydf, + sequence_to_pydf, +) +from polars._utils.construction.series import arrow_to_pyseries, pandas_to_pyseries from polars._utils.deprecation import deprecate_renamed_parameter from polars._utils.various import _cast_repr_strings_with_schema +from polars._utils.wrap import wrap_df, wrap_s from polars.datatypes import N_INFER_DEFAULT, Categorical, List, Object, String, Struct from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa @@ -76,8 +85,13 @@ def from_dict( │ 2 ┆ 4 │ └─────┴─────┘ """ - return pl.DataFrame._from_dict( - data, schema=schema, schema_overrides=schema_overrides, strict=strict + return wrap_df( + dict_to_pydf( + data, + schema=schema, + schema_overrides=schema_overrides, + strict=strict, + ) ) @@ -254,13 +268,15 @@ def from_records( │ 3 ┆ 6 │ └─────┴─────┘ """ - return pl.DataFrame._from_records( - data, - schema=schema, - schema_overrides=schema_overrides, - strict=strict, - orient=orient, - infer_schema_length=infer_schema_length, + return wrap_df( + sequence_to_pydf( + data, + schema=schema, + schema_overrides=schema_overrides, + strict=strict, + orient=orient, + infer_schema_length=infer_schema_length, + ) ) @@ -272,14 +288,14 @@ def from_numpy( orient: Orientation | None = None, ) -> DataFrame: """ - Construct a DataFrame from a numpy ndarray. This operation clones data. + Construct a DataFrame from a NumPy ndarray. This operation clones data. Note that this is slower than creating from columnar memory. Parameters ---------- data : :class:`numpy.ndarray` - Two-dimensional data represented as a numpy ndarray. + Two-dimensional data represented as a NumPy ndarray. schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict The DataFrame schema may be declared in several ways: @@ -319,8 +335,10 @@ def from_numpy( │ 3 ┆ 6 │ └─────┴─────┘ """ - return pl.DataFrame._from_numpy( - data, schema=schema, orient=orient, schema_overrides=schema_overrides + return wrap_df( + numpy_to_pydf( + data, schema=schema, orient=orient, schema_overrides=schema_overrides + ) ) @@ -378,8 +396,7 @@ def from_arrow( >>> import pyarrow as pa >>> data = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> df = pl.from_arrow(data) - >>> df + >>> pl.from_arrow(data) shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -395,8 +412,7 @@ def from_arrow( >>> import pyarrow as pa >>> data = pa.array([1, 2, 3]) - >>> series = pl.from_arrow(data, schema={"s": pl.Int32}) - >>> series + >>> pl.from_arrow(data, schema={"s": pl.Int32}) shape: (3,) Series: 's' [i32] [ @@ -406,16 +422,19 @@ def from_arrow( ] """ # noqa: W505 if isinstance(data, (pa.Table, pa.RecordBatch)): - return pl.DataFrame._from_arrow( - data=data, - rechunk=rechunk, - schema=schema, - schema_overrides=schema_overrides, + return wrap_df( + arrow_to_pydf( + data=data, + rechunk=rechunk, + schema=schema, + schema_overrides=schema_overrides, + ) ) elif isinstance(data, (pa.Array, pa.ChunkedArray)): name = getattr(data, "_name", "") or "" + s = wrap_s(arrow_to_pyseries(name, data, rechunk=rechunk)) s = pl.DataFrame( - data=pl.Series._from_arrow(name, data, rechunk=rechunk), + data=s, schema=schema, schema_overrides=schema_overrides, ).to_series() @@ -427,15 +446,18 @@ def from_arrow( ) if isinstance(data, Iterable): - return pl.DataFrame._from_arrow( - data=pa.Table.from_batches( - chain.from_iterable( - (b.to_batches() if isinstance(b, pa.Table) else [b]) for b in data - ) - ), - rechunk=rechunk, - schema=schema, - schema_overrides=schema_overrides, + pa_table = pa.Table.from_batches( + itertools.chain.from_iterable( + (b.to_batches() if isinstance(b, pa.Table) else [b]) for b in data + ) + ) + return wrap_df( + arrow_to_pydf( + data=pa_table, + rechunk=rechunk, + schema=schema, + schema_overrides=schema_overrides, + ) ) msg = f"expected PyArrow Table, Array, or one or more RecordBatches; got {type(data).__name__!r}" @@ -535,14 +557,16 @@ def from_pandas( ] """ if isinstance(data, (pd.Series, pd.Index, pd.DatetimeIndex)): - return pl.Series._from_pandas("", data, nan_to_null=nan_to_null) + return wrap_s(pandas_to_pyseries("", data, nan_to_null=nan_to_null)) elif isinstance(data, pd.DataFrame): - return pl.DataFrame._from_pandas( - data, - rechunk=rechunk, - nan_to_null=nan_to_null, - schema_overrides=schema_overrides, - include_index=include_index, + return wrap_df( + pandas_to_pydf( + data, + schema_overrides=schema_overrides, + rechunk=rechunk, + nan_to_null=nan_to_null, + include_index=include_index, + ) ) else: msg = f"expected pandas DataFrame or Series, got {type(data).__name__!r}" @@ -666,7 +690,7 @@ def _from_dataframe_repr(m: re.Match[str]) -> DataFrame: headers = [h[0] for h in header_block] dtypes = [None] * len(headers) else: - headers, dtypes = (list(h) for h in zip_longest(*header_block)) + headers, dtypes = (list(h) for h in itertools.zip_longest(*header_block)) body = rows[table_body_start + 1 :] no_dtypes = all(d is None for d in dtypes) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index a90ab5e06bb1..b1cdd0a244c7 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -441,97 +441,6 @@ def _from_pydf(cls, py_df: PyDataFrame) -> Self: df._df = py_df return df - @classmethod - def _from_dict( - cls, - data: Mapping[str, Sequence[object] | Mapping[str, Sequence[object]] | Series], - schema: SchemaDefinition | None = None, - *, - schema_overrides: SchemaDict | None = None, - strict: bool = True, - ) -> Self: - """ - Construct a DataFrame from a dictionary of sequences. - - See Also - -------- - polars.convert.from_dict - """ - return cls._from_pydf( - dict_to_pydf( - data, schema=schema, schema_overrides=schema_overrides, strict=strict - ) - ) - - @classmethod - def _from_records( - cls, - data: Sequence[Any], - schema: SchemaDefinition | None = None, - *, - schema_overrides: SchemaDict | None = None, - strict: bool = True, - orient: Orientation | None = None, - infer_schema_length: int | None = N_INFER_DEFAULT, - ) -> Self: - """ - Construct a DataFrame from a sequence of sequences. - - See Also - -------- - polars.convert.from_records - """ - return cls._from_pydf( - sequence_to_pydf( - data, - schema=schema, - schema_overrides=schema_overrides, - strict=strict, - orient=orient, - infer_schema_length=infer_schema_length, - ) - ) - - @classmethod - def _from_numpy( - cls, - data: np.ndarray[Any, Any], - schema: SchemaDefinition | None = None, - *, - schema_overrides: SchemaDict | None = None, - orient: Orientation | None = None, - ) -> Self: - """ - Construct a DataFrame from a numpy ndarray. - - Parameters - ---------- - data : numpy ndarray - Two-dimensional data represented as a numpy ndarray. - schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict - The DataFrame schema may be declared in several ways: - - * As a dict of {name:type} pairs; if type is None, it will be auto-inferred. - * As a list of column names; in this case types are automatically inferred. - * As a list of (name,type) pairs; this is equivalent to the dictionary form. - - If you supply a list of column names that does not match the names in the - underlying data, the names given here will overwrite them. The number - of names given in the schema should match the underlying data dimensions. - schema_overrides : dict, default None - Support type specification or override of one or more columns; note that - any dtypes inferred from the columns param will be overridden. - orient : {'col', 'row'}, default None - Whether to interpret two-dimensional data as columns or as rows. If None, - the orientation is inferred by matching the columns and data dimensions. If - this does not yield conclusive results, column orientation is used. - """ - return cls._from_pydf( - numpy_to_pydf( - data, schema=schema, schema_overrides=schema_overrides, orient=orient - ) - ) - @classmethod def _from_arrow( cls, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 7a0328bcba2c..39877d4dc341 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -377,11 +377,6 @@ def _from_pyseries(cls, pyseries: PySeries) -> Self: series._s = pyseries return series - @classmethod - def _from_arrow(cls, name: str, values: pa.Array, *, rechunk: bool = True) -> Self: - """Construct a Series from an Arrow Array.""" - return cls._from_pyseries(arrow_to_pyseries(name, values, rechunk=rechunk)) - @classmethod def _import_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self: """ @@ -394,19 +389,6 @@ def _import_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self: """ return cls._from_pyseries(PySeries._import_from_c(name, pointers)) - @classmethod - def _from_pandas( - cls, - name: str, - values: pd.Series[Any] | pd.Index[Any] | pd.DatetimeIndex, - *, - nan_to_null: bool = True, - ) -> Self: - """Construct a Series from a pandas Series or DatetimeIndex.""" - return cls._from_pyseries( - pandas_to_pyseries(name, values, nan_to_null=nan_to_null) - ) - def _get_buffer_info(self) -> BufferInfo: """ Return pointer, offset, and length information about the underlying buffer.