Skip to content

Commit

Permalink
chore(python): Remove unused private constructors (#15160)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Mar 19, 2024
1 parent bd26ecd commit 2250055
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 151 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/construction/dataframe.py
Expand Up @@ -1122,7 +1122,7 @@ def numpy_to_pydf(
strict: bool = True,
nan_to_null: bool = False,
) -> PyDataFrame:
"""Construct a PyDataFrame from a numpy ndarray (including structured ndarrays)."""
"""Construct a PyDataFrame from a NumPy ndarray (including structured ndarrays)."""
shape = data.shape
two_d = len(shape) == 2

Expand Down
106 changes: 65 additions & 41 deletions py-polars/polars/convert.py
@@ -1,14 +1,23 @@
from __future__ import annotations

import io
import itertools
import re
from itertools import chain, zip_longest
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence, overload

import polars._reexport as pl
from polars import functions as F
from polars._utils.construction.dataframe import (
arrow_to_pydf,
dict_to_pydf,
numpy_to_pydf,
pandas_to_pydf,
sequence_to_pydf,
)
from polars._utils.construction.series import arrow_to_pyseries, pandas_to_pyseries
from polars._utils.deprecation import deprecate_renamed_parameter
from polars._utils.various import _cast_repr_strings_with_schema
from polars._utils.wrap import wrap_df, wrap_s
from polars.datatypes import N_INFER_DEFAULT, Categorical, List, Object, String, Struct
from polars.dependencies import pandas as pd
from polars.dependencies import pyarrow as pa
Expand Down Expand Up @@ -76,8 +85,13 @@ def from_dict(
│ 2 ┆ 4 │
└─────┴─────┘
"""
return pl.DataFrame._from_dict(
data, schema=schema, schema_overrides=schema_overrides, strict=strict
return wrap_df(
dict_to_pydf(
data,
schema=schema,
schema_overrides=schema_overrides,
strict=strict,
)
)


Expand Down Expand Up @@ -254,13 +268,15 @@ def from_records(
│ 3 ┆ 6 │
└─────┴─────┘
"""
return pl.DataFrame._from_records(
data,
schema=schema,
schema_overrides=schema_overrides,
strict=strict,
orient=orient,
infer_schema_length=infer_schema_length,
return wrap_df(
sequence_to_pydf(
data,
schema=schema,
schema_overrides=schema_overrides,
strict=strict,
orient=orient,
infer_schema_length=infer_schema_length,
)
)


Expand All @@ -272,14 +288,14 @@ def from_numpy(
orient: Orientation | None = None,
) -> DataFrame:
"""
Construct a DataFrame from a numpy ndarray. This operation clones data.
Construct a DataFrame from a NumPy ndarray. This operation clones data.
Note that this is slower than creating from columnar memory.
Parameters
----------
data : :class:`numpy.ndarray`
Two-dimensional data represented as a numpy ndarray.
Two-dimensional data represented as a NumPy ndarray.
schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
The DataFrame schema may be declared in several ways:
Expand Down Expand Up @@ -319,8 +335,10 @@ def from_numpy(
│ 3 ┆ 6 │
└─────┴─────┘
"""
return pl.DataFrame._from_numpy(
data, schema=schema, orient=orient, schema_overrides=schema_overrides
return wrap_df(
numpy_to_pydf(
data, schema=schema, orient=orient, schema_overrides=schema_overrides
)
)


Expand Down Expand Up @@ -378,8 +396,7 @@ def from_arrow(
>>> import pyarrow as pa
>>> data = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df = pl.from_arrow(data)
>>> df
>>> pl.from_arrow(data)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
Expand All @@ -395,8 +412,7 @@ def from_arrow(
>>> import pyarrow as pa
>>> data = pa.array([1, 2, 3])
>>> series = pl.from_arrow(data, schema={"s": pl.Int32})
>>> series
>>> pl.from_arrow(data, schema={"s": pl.Int32})
shape: (3,)
Series: 's' [i32]
[
Expand All @@ -406,16 +422,19 @@ def from_arrow(
]
""" # noqa: W505
if isinstance(data, (pa.Table, pa.RecordBatch)):
return pl.DataFrame._from_arrow(
data=data,
rechunk=rechunk,
schema=schema,
schema_overrides=schema_overrides,
return wrap_df(
arrow_to_pydf(
data=data,
rechunk=rechunk,
schema=schema,
schema_overrides=schema_overrides,
)
)
elif isinstance(data, (pa.Array, pa.ChunkedArray)):
name = getattr(data, "_name", "") or ""
s = wrap_s(arrow_to_pyseries(name, data, rechunk=rechunk))
s = pl.DataFrame(
data=pl.Series._from_arrow(name, data, rechunk=rechunk),
data=s,
schema=schema,
schema_overrides=schema_overrides,
).to_series()
Expand All @@ -427,15 +446,18 @@ def from_arrow(
)

if isinstance(data, Iterable):
return pl.DataFrame._from_arrow(
data=pa.Table.from_batches(
chain.from_iterable(
(b.to_batches() if isinstance(b, pa.Table) else [b]) for b in data
)
),
rechunk=rechunk,
schema=schema,
schema_overrides=schema_overrides,
pa_table = pa.Table.from_batches(
itertools.chain.from_iterable(
(b.to_batches() if isinstance(b, pa.Table) else [b]) for b in data
)
)
return wrap_df(
arrow_to_pydf(
data=pa_table,
rechunk=rechunk,
schema=schema,
schema_overrides=schema_overrides,
)
)

msg = f"expected PyArrow Table, Array, or one or more RecordBatches; got {type(data).__name__!r}"
Expand Down Expand Up @@ -535,14 +557,16 @@ def from_pandas(
]
"""
if isinstance(data, (pd.Series, pd.Index, pd.DatetimeIndex)):
return pl.Series._from_pandas("", data, nan_to_null=nan_to_null)
return wrap_s(pandas_to_pyseries("", data, nan_to_null=nan_to_null))
elif isinstance(data, pd.DataFrame):
return pl.DataFrame._from_pandas(
data,
rechunk=rechunk,
nan_to_null=nan_to_null,
schema_overrides=schema_overrides,
include_index=include_index,
return wrap_df(
pandas_to_pydf(
data,
schema_overrides=schema_overrides,
rechunk=rechunk,
nan_to_null=nan_to_null,
include_index=include_index,
)
)
else:
msg = f"expected pandas DataFrame or Series, got {type(data).__name__!r}"
Expand Down Expand Up @@ -666,7 +690,7 @@ def _from_dataframe_repr(m: re.Match[str]) -> DataFrame:
headers = [h[0] for h in header_block]
dtypes = [None] * len(headers)
else:
headers, dtypes = (list(h) for h in zip_longest(*header_block))
headers, dtypes = (list(h) for h in itertools.zip_longest(*header_block))

body = rows[table_body_start + 1 :]
no_dtypes = all(d is None for d in dtypes)
Expand Down
91 changes: 0 additions & 91 deletions py-polars/polars/dataframe/frame.py
Expand Up @@ -441,97 +441,6 @@ def _from_pydf(cls, py_df: PyDataFrame) -> Self:
df._df = py_df
return df

@classmethod
def _from_dict(
cls,
data: Mapping[str, Sequence[object] | Mapping[str, Sequence[object]] | Series],
schema: SchemaDefinition | None = None,
*,
schema_overrides: SchemaDict | None = None,
strict: bool = True,
) -> Self:
"""
Construct a DataFrame from a dictionary of sequences.
See Also
--------
polars.convert.from_dict
"""
return cls._from_pydf(
dict_to_pydf(
data, schema=schema, schema_overrides=schema_overrides, strict=strict
)
)

@classmethod
def _from_records(
cls,
data: Sequence[Any],
schema: SchemaDefinition | None = None,
*,
schema_overrides: SchemaDict | None = None,
strict: bool = True,
orient: Orientation | None = None,
infer_schema_length: int | None = N_INFER_DEFAULT,
) -> Self:
"""
Construct a DataFrame from a sequence of sequences.
See Also
--------
polars.convert.from_records
"""
return cls._from_pydf(
sequence_to_pydf(
data,
schema=schema,
schema_overrides=schema_overrides,
strict=strict,
orient=orient,
infer_schema_length=infer_schema_length,
)
)

@classmethod
def _from_numpy(
cls,
data: np.ndarray[Any, Any],
schema: SchemaDefinition | None = None,
*,
schema_overrides: SchemaDict | None = None,
orient: Orientation | None = None,
) -> Self:
"""
Construct a DataFrame from a numpy ndarray.
Parameters
----------
data : numpy ndarray
Two-dimensional data represented as a numpy ndarray.
schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
The DataFrame schema may be declared in several ways:
* As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
* As a list of column names; in this case types are automatically inferred.
* As a list of (name,type) pairs; this is equivalent to the dictionary form.
If you supply a list of column names that does not match the names in the
underlying data, the names given here will overwrite them. The number
of names given in the schema should match the underlying data dimensions.
schema_overrides : dict, default None
Support type specification or override of one or more columns; note that
any dtypes inferred from the columns param will be overridden.
orient : {'col', 'row'}, default None
Whether to interpret two-dimensional data as columns or as rows. If None,
the orientation is inferred by matching the columns and data dimensions. If
this does not yield conclusive results, column orientation is used.
"""
return cls._from_pydf(
numpy_to_pydf(
data, schema=schema, schema_overrides=schema_overrides, orient=orient
)
)

@classmethod
def _from_arrow(
cls,
Expand Down
18 changes: 0 additions & 18 deletions py-polars/polars/series/series.py
Expand Up @@ -377,11 +377,6 @@ def _from_pyseries(cls, pyseries: PySeries) -> Self:
series._s = pyseries
return series

@classmethod
def _from_arrow(cls, name: str, values: pa.Array, *, rechunk: bool = True) -> Self:
"""Construct a Series from an Arrow Array."""
return cls._from_pyseries(arrow_to_pyseries(name, values, rechunk=rechunk))

@classmethod
def _import_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self:
"""
Expand All @@ -394,19 +389,6 @@ def _import_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self:
"""
return cls._from_pyseries(PySeries._import_from_c(name, pointers))

@classmethod
def _from_pandas(
cls,
name: str,
values: pd.Series[Any] | pd.Index[Any] | pd.DatetimeIndex,
*,
nan_to_null: bool = True,
) -> Self:
"""Construct a Series from a pandas Series or DatetimeIndex."""
return cls._from_pyseries(
pandas_to_pyseries(name, values, nan_to_null=nan_to_null)
)

def _get_buffer_info(self) -> BufferInfo:
"""
Return pointer, offset, and length information about the underlying buffer.
Expand Down

0 comments on commit 2250055

Please sign in to comment.