Skip to content

Commit

Permalink
LazyFrame expose schema; python expose lazyframe columns
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 5, 2021
1 parent 9e21702 commit c6c03a4
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 6 deletions.
6 changes: 6 additions & 0 deletions polars/polars-lazy/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,12 @@ impl Default for OptState {
pub type AllowedOptimizations = OptState;

impl LazyFrame {
/// Get a hold on the schema of the current LazyFrame computation.
pub fn schema(&self) -> SchemaRef {
let logical_plan = self.clone().get_plan_builder().build();
logical_plan.schema().clone()
}

/// Create a LazyFrame directly from a parquet scan.
#[cfg(feature = "parquet")]
pub fn new_from_parquet(path: String, stop_after_n_rows: Option<usize>, cache: bool) -> Self {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,7 @@ fn rewrite_projections(exprs: Vec<Expr>, schema: &Schema) -> Vec<Expr> {
pub struct LogicalPlanBuilder(LogicalPlan);

impl LogicalPlan {
pub(crate) fn schema(&self) -> &Schema {
pub(crate) fn schema(&self) -> &SchemaRef {
use LogicalPlan::*;
match self {
Cache { input } => input.schema(),
Expand Down
21 changes: 21 additions & 0 deletions py-polars/polars/lazy/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,27 @@ def fetch(
)
return pl.eager.frame.wrap_df(ldf.fetch(n_rows))

@property
def columns(self) -> tp.List[str]:
"""
Get or set column names.
Examples
--------
>>> df = (pl.DataFrame({
>>> "foo": [1, 2, 3],
>>> "bar": [6, 7, 8],
>>> "ham": ['a', 'b', 'c']
>>> }).lazy()
>>> .select(["foo", "bar"]))
>>> df.columns
["foo", "bar"]
"""
return self._ldf.columns()

def cache(
self,
) -> "LazyFrame":
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ use crate::series::PySeries;
use polars::chunked_array::object::PolarsObjectSafe;
use polars::frame::row::Row;
use polars::prelude::AnyValue;
use polars_core::utils::arrow::datatypes::ArrowNativeType;
use pyo3::basic::CompareOp;
use pyo3::conversion::{FromPyObject, IntoPy};
use pyo3::prelude::*;
use pyo3::types::PySequence;
use pyo3::{PyAny, PyResult};
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use polars_core::utils::arrow::datatypes::ArrowNativeType;

#[repr(transparent)]
pub struct Wrap<T>(pub T);
Expand Down
5 changes: 1 addition & 4 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,10 +517,7 @@ impl PyDataFrame {
pub fn take(&self, indices: Wrap<AlignedVec<u32>>) -> PyResult<Self> {
let indices = indices.0;
let indices = indices.into_primitive_array::<UInt32Type>(None);
let df = self
.df
.take(&indices.into())
.map_err(PyPolarsEr::from)?;
let df = self.df.take(&indices.into()).map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}

Expand Down
9 changes: 9 additions & 0 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,4 +412,13 @@ impl PyLazyFrame {
pub fn clone(&self) -> PyLazyFrame {
self.ldf.clone().into()
}

pub fn columns(&self) -> Vec<String> {
self.ldf
.schema()
.fields()
.iter()
.map(|fld| fld.name().to_string())
.collect()
}
}
12 changes: 12 additions & 0 deletions py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,15 @@ def test_drop_nulls():
def test_all_expr():
df = pl.DataFrame({"nrs": [1, 2, 3, 4, 5, None]})
assert df[[pl.all()]].frame_equal(df)


def test_lazy_columns():
df = pl.DataFrame(
{
"a": [1],
"b": [1],
"c": [1],
}
).lazy()

assert df.select(["a", "c"]).columns == ["a", "c"]

0 comments on commit c6c03a4

Please sign in to comment.