Skip to content

Commit

Permalink
reduced pyarrow dependency: repeat impl
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 9, 2021
1 parent 60c7a28 commit 10c2aab
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 33 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ def to_parquet(
self,
file: Union[str, Path],
compression: str = "snappy",
use_pyarrow: bool = True,
use_pyarrow: bool = _PYARROW_AVAILABLE,
**kwargs: Any,
) -> None:
"""
Expand Down
9 changes: 4 additions & 5 deletions py-polars/polars/eager/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,10 @@ def _from_pyseries(cls, pyseries: "PySeries") -> "Series":
return series

@classmethod
def _repeat(cls, name: str, val: str, n: int) -> "Series":
"""
Only used for strings.
"""
return cls._from_pyseries(PySeries.repeat(name, val, n))
def _repeat(
cls, name: str, val: Union[int, float, str, bool], n: int, dtype: Type[DataType]
) -> "Series":
return cls._from_pyseries(PySeries.repeat(name, val, n, dtype))

@classmethod
def _from_arrow(cls, name: str, values: "pa.Array") -> "Series":
Expand Down
24 changes: 6 additions & 18 deletions py-polars/polars/functions.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
from typing import Optional, Sequence, Union

try:
import pyarrow as pa

_PYARROW_AVAILABLE = True
except ImportError:
_PYARROW_AVAILABLE = False

import polars as pl

try:
from polars.datatypes import py_type_to_polars_type
from polars.polars import concat_df as _concat_df
from polars.polars import concat_series as _concat_series

Expand Down Expand Up @@ -65,7 +59,7 @@ def concat(


def repeat(
val: Union[int, float, str], n: int, name: Optional[str] = None
val: Union[int, float, str, bool], n: int, name: Optional[str] = None
) -> "pl.Series":
"""
Repeat a single value n times and collect into a Series.
Expand All @@ -81,16 +75,10 @@ def repeat(
"""
if name is None:
name = ""
if isinstance(val, str):
s = pl.Series._repeat(name, val, n)
s.rename(name)
return s
else:
if not _PYARROW_AVAILABLE:
raise ImportError(
"'pyarrow' is required for repeating a int or a float value."
)
return pl.Series._from_arrow(name, pa.repeat(val, n))

dtype = py_type_to_polars_type(type(val))
s = pl.Series._repeat(name, val, n, dtype)
return s


def arg_where(mask: "pl.Series") -> "pl.Series":
Expand Down
3 changes: 0 additions & 3 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,6 @@ def sequence_to_pyseries(
if not _PYARROW_AVAILABLE:
dtype = py_type_to_polars_type(nested_dtype)
return PySeries.new_list(name, values, dtype)
# raise ImportError(
# f"'pyarrow' is required for converting a Sequence of {nested_dtype} to a PySeries."
# )

try:
nested_arrow_dtype = py_type_to_arrow_type(nested_dtype)
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def read_ipc_schema(

def read_ipc(
file: Union[str, BinaryIO, Path, bytes],
use_pyarrow: bool = True,
use_pyarrow: bool = _PYARROW_AVAILABLE,
memory_map: bool = True,
columns: Optional[List[str]] = None,
storage_options: Optional[Dict] = None,
Expand Down Expand Up @@ -484,7 +484,7 @@ def read_ipc(

def read_parquet(
source: Union[str, List[str], Path, BinaryIO, bytes],
use_pyarrow: bool = True,
use_pyarrow: bool = _PYARROW_AVAILABLE,
stop_after_n_rows: Optional[int] = None,
memory_map: bool = True,
columns: Optional[List[str]] = None,
Expand Down
37 changes: 33 additions & 4 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,39 @@ impl PySeries {
}

#[staticmethod]
pub fn repeat(name: &str, val: &str, n: usize) -> Self {
let mut ca: Utf8Chunked = (0..n).map(|_| val).collect_trusted();
ca.rename(name);
ca.into_series().into()
pub fn repeat(name: &str, val: &PyAny, n: usize, dtype: &PyAny) -> Self {
let str_repr = dtype.str().unwrap().to_str().unwrap();
let dtype = str_to_polarstype(str_repr);

match dtype {
DataType::Utf8 => {
let val = val.extract::<&str>().unwrap();
let mut ca: Utf8Chunked = (0..n).map(|_| val).collect_trusted();
ca.rename(name);
ca.into_series().into()
}
DataType::Int64 => {
let val = val.extract::<i64>().unwrap();
let mut ca: NoNull<Int64Chunked> = (0..n).map(|_| val).collect_trusted();
ca.rename(name);
ca.into_inner().into_series().into()
}
DataType::Float64 => {
let val = val.extract::<f64>().unwrap();
let mut ca: NoNull<Float64Chunked> = (0..n).map(|_| val).collect_trusted();
ca.rename(name);
ca.into_inner().into_series().into()
}
DataType::Boolean => {
let val = val.extract::<bool>().unwrap();
let mut ca: BooleanChunked = (0..n).map(|_| val).collect_trusted();
ca.rename(name);
ca.into_series().into()
}
dt => {
panic!("cannot create repeat with dtype: {:?}", dt);
}
}
}

#[staticmethod]
Expand Down
7 changes: 7 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,13 @@ def test_repeat():
s = pl.repeat("foo", 10)
assert s.dtype == pl.Utf8
assert s.len() == 10
s = pl.repeat(1.0, 5)
assert s.dtype == pl.Float64
assert s.len() == 5
assert s == [1.0, 1.0, 1.0, 1.0, 1.0]
s = pl.repeat(True, 5)
assert s.dtype == pl.Boolean
assert s.len() == 5


def test_median():
Expand Down

0 comments on commit 10c2aab

Please sign in to comment.