Skip to content

Commit

Permalink
(Python) Refactor Series constructor (#1032)
Browse files Browse the repository at this point in the history
- Moved Series construction logic to construction.py.
- Added some type conversion utils to datatypes.py for use in Series construction
- Deprecated Series.from_arrow. Usage of pl.from_arrow is encouraged, which calls Series._from_arrow.
  • Loading branch information
stinodego authored and ritchie46 committed Jul 25, 2021
1 parent 7c4e0e1 commit 8b494da
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 172 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def from_arrow(
if isinstance(a, pa.Table):
return pl.DataFrame.from_arrow(a, rechunk)
elif isinstance(a, pa.Array):
return pl.Series.from_arrow("", a)
return pl.Series._from_arrow("", a)
else:
raise ValueError(f"expected arrow table / array, got {a}")

Expand Down
104 changes: 103 additions & 1 deletion py-polars/polars/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import ctypes
import typing as tp
from typing import Any, Dict, Type
from typing import Any, Callable, Dict, Sequence, Type

import numpy as np
import pyarrow as pa
from _ctypes import _SimpleCData

try:
from polars.polars import PySeries
except ImportError:
import warnings

warnings.warn("binary files missing")

__pdoc__ = {
"dtype_to_ctype": False,
"dtype_to_int": False,
Expand Down Expand Up @@ -271,3 +280,96 @@ def pytype_to_polars_type(data_type: Type[Any]) -> Type[DataType]:
else:
polars_type = data_type
return polars_type


_POLARS_TYPE_TO_CONSTRUCTOR = {
Float32: PySeries.new_opt_f32,
Float64: PySeries.new_opt_f64,
Int8: PySeries.new_opt_i8,
Int16: PySeries.new_opt_i16,
Int32: PySeries.new_opt_i32,
Int64: PySeries.new_opt_i64,
UInt8: PySeries.new_opt_u8,
UInt16: PySeries.new_opt_u16,
UInt32: PySeries.new_opt_u32,
UInt64: PySeries.new_opt_u64,
Date32: PySeries.new_opt_i32,
Date64: PySeries.new_opt_i32,
Boolean: PySeries.new_opt_bool,
Utf8: PySeries.new_str,
Object: PySeries.new_object,
}


def polars_type_to_constructor(
dtype: Type[DataType],
) -> Callable[[str, Sequence[Any]], "PySeries"]:
"""
Get the right PySeries constructor for the given Polars dtype.
"""
try:
return _POLARS_TYPE_TO_CONSTRUCTOR[dtype]
except KeyError:
raise ValueError(f"Cannot construct PySeries for type {dtype}.")


_NUMPY_TYPE_TO_CONSTRUCTOR = {
np.float32: PySeries.new_f32,
np.float64: PySeries.new_f64,
np.int8: PySeries.new_i8,
np.int16: PySeries.new_i16,
np.int32: PySeries.new_i32,
np.int64: PySeries.new_i64,
np.uint8: PySeries.new_u8,
np.uint16: PySeries.new_u16,
np.uint32: PySeries.new_u32,
np.uint64: PySeries.new_u64,
np.str_: PySeries.new_str,
bool: PySeries.new_bool,
}


def numpy_type_to_constructor(dtype: Type[np.dtype]) -> Callable[..., "PySeries"]:
"""
Get the right PySeries constructor for the given Polars dtype.
"""
try:
return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype]
except KeyError:
return PySeries.new_object


_PY_TYPE_TO_CONSTRUCTOR = {
float: PySeries.new_opt_f64,
int: PySeries.new_opt_i64,
str: PySeries.new_str,
bool: PySeries.new_opt_bool,
}


def py_type_to_constructor(dtype: Type[Any]) -> Callable[..., "PySeries"]:
"""
Get the right PySeries constructor for the given Python dtype.
"""
try:
return _PY_TYPE_TO_CONSTRUCTOR[dtype]
except KeyError:
return PySeries.new_object


_PY_TYPE_TO_ARROW_TYPE = {
float: pa.float64(),
int: pa.int64(),
str: pa.large_utf8(),
bool: pa.bool_(),
}


def py_type_to_arrow_type(dtype: Type[Any]) -> pa.lib.DataType:
"""
Convert a Python dtype to an Arrow dtype.
"""
try:
return _PY_TYPE_TO_ARROW_TYPE[dtype]
except KeyError:
raise ValueError(f"Cannot parse dtype {dtype} into arrow dtype.")
14 changes: 11 additions & 3 deletions py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def _from_dict(
-------
DataFrame
"""
return cls(data, columns=columns, nullable=nullable)
return cls._from_pydf(dict_to_pydf(data, columns=columns, nullable=nullable))

@classmethod
def _from_records(
Expand Down Expand Up @@ -294,7 +294,15 @@ def _from_records(
-------
DataFrame
"""
return cls(data, columns=columns, orient=orient, nullable=nullable)
if isinstance(data, np.ndarray):
pydf = numpy_to_pydf(
data, columns=columns, orient=orient, nullable=nullable
)
else:
pydf = sequence_to_pydf(
data, columns=columns, orient=orient, nullable=nullable
)
return cls._from_pydf(pydf)

@classmethod
def from_arrow(cls, table: pa.Table, rechunk: bool = True) -> "DataFrame":
Expand Down Expand Up @@ -373,7 +381,7 @@ def _from_pandas(
╰─────┴─────┴─────╯
```
"""
return cls(data, columns=columns, nullable=nullable)
return cls._from_pydf(pandas_to_pydf(data, columns=columns, nullable=nullable))

@classmethod
def from_rows(
Expand Down

0 comments on commit 8b494da

Please sign in to comment.