Skip to content

Commit

Permalink
python lazyframe schema (#3302)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 4, 2022
1 parent b19b6db commit 0b6c625
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 3 deletions.
4 changes: 2 additions & 2 deletions polars/polars-core/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,13 @@ impl Schema {
ArrowSchema::from(fields)
}

pub fn iter_fields(&self) -> impl Iterator<Item = Field> + '_ {
pub fn iter_fields(&self) -> impl Iterator<Item = Field> + ExactSizeIterator + '_ {
self.inner
.iter()
.map(|(name, dtype)| Field::new(name, dtype.clone()))
}

pub fn iter_dtypes(&self) -> impl Iterator<Item = &DataType> + '_ {
pub fn iter_dtypes(&self) -> impl Iterator<Item = &DataType> + ExactSizeIterator + '_ {
self.inner.iter().map(|(_name, dtype)| dtype)
}

Expand Down
43 changes: 43 additions & 0 deletions py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,49 @@ def columns(self) -> List[str]:
"""
return self._ldf.columns()

@property
def dtypes(self) -> List[Type[DataType]]:
"""
Get dtypes of columns in LazyFrame.
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.dtypes
[<class 'polars.datatypes.int64'>, <class 'polars.datatypes.float64'>, <class 'polars.datatypes.utf8'>]
See Also
--------
schema : Return a dict of [column name, dtype]
"""
return self._ldf.dtypes()

@property
def schema(self) -> Dict[str, Type[DataType]]:
"""
Get a dict[column name, DataType]
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.schema
{'foo': <class 'polars.datatypes.Int64'>, 'bar': <class 'polars.datatypes.Float64'>, 'ham': <class 'polars.datatypes.Utf8'>}
"""
return self._ldf.schema()

def cache(self: LDF) -> LDF:
"""
Cache the result once the execution of the physical plan hits this node.
Expand Down
22 changes: 21 additions & 1 deletion py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use polars_core::prelude::{
};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::PyList;
use pyo3::types::{PyDict, PyList};
use std::io::BufWriter;

#[pyclass]
Expand Down Expand Up @@ -716,6 +716,26 @@ impl PyLazyFrame {
self.ldf.schema().iter_names().cloned().collect()
}

pub fn dtypes(&self, py: Python) -> PyObject {
let schema = self.ldf.schema();
let iter = schema
.iter_dtypes()
.map(|dt| Wrap(dt.clone()).to_object(py));
PyList::new(py, iter).to_object(py)
}

pub fn schema(&self, py: Python) -> PyObject {
let schema = self.ldf.schema();
let schema_dict = PyDict::new(py);

schema.iter_fields().for_each(|fld| {
schema_dict
.set_item(fld.name(), Wrap(fld.data_type().clone()))
.unwrap()
});
schema_dict.to_object(py)
}

pub fn unnest(&self, cols: Vec<String>) -> PyLazyFrame {
self.ldf.clone().unnest(cols).into()
}
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,3 +1238,23 @@ def test_quantile_filtered_agg() -> None:
.to_list()
== [1.0, 1.0]
)


def test_lazy_schema() -> None:
lf = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
).lazy()
assert lf.schema == {"foo": pl.Int64, "bar": pl.Float64, "ham": pl.Utf8}

lf = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
).lazy()
assert lf.dtypes == [pl.Int64, pl.Float64, pl.Utf8]

0 comments on commit 0b6c625

Please sign in to comment.