Skip to content

Commit

Permalink
feat[python]: upcast primitive types on Series construction (#4498)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 19, 2022
1 parent 5dc495c commit a98e0ba
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 25 deletions.
30 changes: 29 additions & 1 deletion py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,35 @@ def sequence_to_pyseries(
except RuntimeError:
return sequence_from_anyvalue_or_object(name, values)

return constructor(name, values, strict)
while True:
try:
return constructor(name, values, strict)
except TypeError as error:
str_val = str(error)

# from x to float
# error message can be:
# - integers: "'float' object cannot be interpreted as an integer"
if "'float'" in str_val:
constructor = py_type_to_constructor(float)

# from x to string
# error message can be:
# - integers: "'str' object cannot be interpreted as an integer"
# - floats: "must be real number, not str"
elif (
"'str'" in str_val or str_val == "must be real number, not str"
):
constructor = py_type_to_constructor(str)

# from x to int
# error message can be:
# - bools: "'int' object cannot be converted to 'PyBool'"
elif str_val == "'int' object cannot be converted to 'PyBool'":
constructor = py_type_to_constructor(int)

else:
raise error


def _pandas_series_to_arrow(
Expand Down
57 changes: 33 additions & 24 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,37 +150,46 @@ impl PySeries {
}
}

fn new_primitive<'a, T>(name: &str, obj: &'a PyAny, strict: bool) -> PyResult<PySeries>
where
T: PolarsNumericType,
ChunkedArray<T>: IntoSeries,
T::Native: FromPyObject<'a>,
{
let (seq, len) = get_pyseq(obj)?;
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);

for res in seq.iter()? {
let item = res?;

if item.is_none() {
builder.append_null()
} else {
match item.extract::<T::Native>() {
Ok(val) => builder.append_value(val),
Err(e) => {
if strict {
return Err(e);
}
builder.append_null()
}
}
}
}
let ca = builder.finish();

let s = ca.into_series();
Ok(PySeries { series: s })
}

// Init with lists that can contain Nones
macro_rules! init_method_opt {
($name:ident, $type:ty, $native: ty) => {
#[pymethods]
impl PySeries {
#[staticmethod]
pub fn $name(name: &str, obj: &PyAny, strict: bool) -> PyResult<PySeries> {
let (seq, len) = get_pyseq(obj)?;
let mut builder = PrimitiveChunkedBuilder::<$type>::new(name, len);

for res in seq.iter()? {
let item = res?;

if item.is_none() {
builder.append_null()
} else {
match item.extract::<$native>() {
Ok(val) => builder.append_value(val),
Err(e) => {
if strict {
return Err(e);
}
builder.append_null()
}
}
}
}
let ca = builder.finish();

let s = ca.into_series();
Ok(PySeries { series: s })
new_primitive::<$type>(name, obj, strict)
}
}
};
Expand Down
13 changes: 13 additions & 0 deletions py-polars/tests/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,16 @@ def test_from_dicts_list_struct_without_inner_dtype() -> None:
],
"days_of_week": [1, 2],
}


def test_upcast_primitive_and_strings() -> None:
assert pl.Series([1, 1.0, 1]).dtype == pl.Float64
assert pl.Series([1, 1, "1.0"]).dtype == pl.Utf8
assert pl.Series([1, 1.0, "1.0"]).dtype == pl.Utf8
assert pl.Series([True, 1]).dtype == pl.Int64
assert pl.Series([True, 1.0]).dtype == pl.Float64
assert pl.Series([True, "1.0"]).dtype == pl.Utf8
assert pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}).dtypes == [
pl.Float64,
pl.Float64,
]

0 comments on commit a98e0ba

Please sign in to comment.