Skip to content

Commit

Permalink
Lazy: rename columns
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 24, 2021
1 parent f254b5c commit a47b261
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 28 deletions.
58 changes: 41 additions & 17 deletions polars/polars-lazy/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,23 +407,47 @@ impl LazyFrame {
self.select_local(vec![col("*").reverse()])
}

/// Rename a column in the DataFrame
pub fn with_column_renamed(self, existing_name: &str, new_name: &str) -> Self {
let schema = self.logical_plan.schema();
let schema = schema
.rename(&[existing_name], &[new_name])
.expect("cannot rename non existing column");

// first make sure that the column is projected, then we
let init = self.with_column(col(existing_name));

let existing_name = existing_name.to_string();
let new_name = new_name.to_string();
let f = move |mut df: DataFrame| {
df.rename(&existing_name, &new_name)?;
Ok(df)
};
init.map(f, Some(AllowedOptimizations::default()), Some(schema))
/// Rename columns in the DataFrame. This does not preserve ordering.
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self
where
I: IntoIterator<Item = T> + Clone,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
{
let existing: Vec<String> = existing
.into_iter()
.map(|name| name.as_ref().to_string())
.collect();

self.with_columns(
existing
.iter()
.zip(new)
.map(|(old, new)| col(old).alias(new.as_ref()))
.collect(),
)
.drop_columns_impl(&existing)
}

/// Removes columns from the DataFrame.
/// Note that its better to only select the columns you need
/// and let the projection pushdown optimize away the unneeded columns.
pub fn drop_columns<I, T>(self, columns: I) -> Self
where
I: IntoIterator<Item = T>,
T: AsRef<str>,
{
let columns: Vec<String> = columns
.into_iter()
.map(|name| name.as_ref().to_string())
.collect();
self.drop_columns_impl(&columns)
}

#[allow(clippy::ptr_arg)]
fn drop_columns_impl(self, columns: &Vec<String>) -> Self {
self.select_local(vec![col("*").exclude(columns)])
}

/// Shift the values by a given period and fill the parts that will be empty due to this operation
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Manipulation/ selection
LazyFrame.drop_columns
LazyFrame.drop_column
LazyFrame.with_column_renamed
LazyFrame.rename
LazyFrame.reverse
LazyFrame.shift
LazyFrame.shift_and_fill
Expand Down
15 changes: 14 additions & 1 deletion py-polars/polars/lazy/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,20 @@ def with_column_renamed(self, existing_name: str, new_name: str) -> "LazyFrame":
"""
Rename a column in the DataFrame
"""
return wrap_ldf(self._ldf.with_column_renamed(existing_name, new_name))
return wrap_ldf(self._ldf.rename([existing_name], [new_name]))

def rename(self, mapping: Dict[str, str]) -> "LazyFrame":
"""
Rename column names. This does not preserve column order.
Parameters
----------
mapping
Key value pairs that map from old name to new name.
"""
existing = list(mapping.keys())
new = list(mapping.values())
return wrap_ldf(self._ldf.rename(existing, new))

def reverse(self) -> "LazyFrame":
"""
Expand Down
12 changes: 3 additions & 9 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ impl PyLazyFrame {
ldf.with_columns(py_exprs_to_exprs(exprs)).into()
}

pub fn with_column_renamed(&mut self, existing: &str, new: &str) -> PyLazyFrame {
pub fn rename(&mut self, existing: Vec<String>, new: Vec<String>) -> PyLazyFrame {
let ldf = self.ldf.clone();
ldf.with_column_renamed(existing, new).into()
ldf.rename(existing, new).into()
}

pub fn reverse(&self) -> Self {
Expand Down Expand Up @@ -409,13 +409,7 @@ impl PyLazyFrame {

pub fn drop_columns(&self, cols: Vec<String>) -> Self {
let ldf = self.ldf.clone();
let f = move |mut df: DataFrame| {
for col in &cols {
let _ = df.drop_in_place(col);
}
Ok(df)
};
ldf.map(f, None, None).into()
ldf.drop_columns(cols).into()
}

pub fn clone(&self) -> PyLazyFrame {
Expand Down
14 changes: 13 additions & 1 deletion py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def test_datetime_consistency():

def test_clip():
df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
df.select(col("a").clip(2, 4))["a"].to_list() == [2, 2, 3, 4, 4]
df.select(pl.col("a").clip(2, 4))["a"].to_list() == [2, 2, 3, 4, 4]
pl.Series([1, 2, 3, 4, 5]).clip(2, 4).to_list() == [2, 2, 3, 4, 4]


Expand Down Expand Up @@ -571,3 +571,15 @@ def test_is_in():
True,
False,
]


def test_rename():
lf = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy()
out = lf.rename({"a": "foo", "b": "bar"}).collect()
# todo: preserve column order
assert out.columns == ["c", "foo", "bar"]


def test_drop_columns():
out = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy().drop_columns(["a", "b"])
assert out.columns == ["c"]

0 comments on commit a47b261

Please sign in to comment.