Skip to content

Commit

Permalink
fix_rename (#2740)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 23, 2022
1 parent c101f83 commit 2872ada
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 18 deletions.
93 changes: 79 additions & 14 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,24 +281,57 @@ impl LazyFrame {
self.select_local(vec![col("*").reverse()])
}

/// Rename columns in the DataFrame.
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self
where
I: IntoIterator<Item = T> + Clone,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
{
let existing: Vec<String> = existing
.into_iter()
.map(|name| name.as_ref().to_string())
.collect();
fn rename_impl_swapping(self, existing: Vec<String>, new: Vec<String>) -> Self {
// schema after renaming
let new_schema = self.schema().rename(&existing, &new).unwrap();

let prefix = "__POLARS_TEMP_";

let new: Vec<String> = new
.into_iter()
.map(|name| name.as_ref().to_string())
.iter()
.map(|name| format!("{}{}", prefix, name))
.collect();

self.with_columns(
existing
.iter()
.zip(&new)
.map(|(old, new)| col(old).alias(new))
.collect::<Vec<_>>(),
)
.map(
move |mut df: DataFrame| {
let mut cols = std::mem::take(df.get_columns_mut());
// we must find the indices before we start swapping,
// because swapping may influence the positions we find if columns are swapped for instance.
// e.g. a -> b
// b -> a
#[allow(clippy::needless_collect)]
let existing_idx = existing
.iter()
.map(|name| cols.iter().position(|s| s.name() == name.as_str()).unwrap())
.collect::<Vec<_>>();
let new_idx = new
.iter()
.map(|name| cols.iter().position(|s| s.name() == name.as_str()).unwrap())
.collect::<Vec<_>>();

for (existing_i, new_i) in existing_idx.into_iter().zip(new_idx) {
cols.swap(existing_i, new_i);
let s = &mut cols[existing_i];
let name = &s.name()[prefix.len()..].to_string();
s.rename(name);
}
cols.truncate(cols.len() - existing.len());
DataFrame::new(cols)
},
None,
Some(new_schema),
Some("RENAME_SWAPPING"),
)
}

fn rename_imp(self, existing: Vec<String>, new: Vec<String>) -> Self {
self.with_columns(
existing
.iter()
Expand Down Expand Up @@ -326,6 +359,38 @@ impl LazyFrame {
)
}

/// Rename columns in the DataFrame.
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Self
where
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
{
// We dispatch to 2 implementations.
// 1 is swapping eg. rename a -> b and b -> a
// 2 is non-swapping eg. rename a -> new_name
// the latter allows predicate pushdown.
let existing = existing
.into_iter()
.map(|a| a.as_ref().to_string())
.collect::<Vec<_>>();
let new = new
.into_iter()
.map(|a| a.as_ref().to_string())
.collect::<Vec<_>>();
let schema = &*self.schema();
// a column gets swapped
if new
.iter()
.any(|name| schema.column_with_name(name).is_some())
{
self.rename_impl_swapping(existing, new)
} else {
self.rename_imp(existing, new)
}
}

/// Removes columns from the DataFrame.
/// Note that its better to only select the columns you need
/// and let the projection pushdown optimize away the unneeded columns.
Expand Down
18 changes: 18 additions & 0 deletions polars/tests/it/lazy/projection_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,21 @@ fn test_sum_after_filter() -> Result<()> {
assert_eq!(df.column("values")?.get(0), AnyValue::Int32(130));
Ok(())
}

#[test]
fn test_swap_rename() -> Result<()> {
let df = df![
"a" => [1],
"b" => [2],
]?
.lazy()
.rename(["a", "b"], ["b", "a"])
.collect()?;

let expected = df![
"b" => [1],
"a" => [2],
]?;
assert!(df.frame_equal(&expected));
Ok(())
}
5 changes: 1 addition & 4 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1571,10 +1571,7 @@ def rename(self, mapping: Dict[str, str]) -> "DataFrame":
└───────┴─────┴─────┘
"""
df = self.clone()
for k, v in mapping.items():
df._df.rename(k, v)
return df
return self.lazy().rename(mapping).collect(no_optimization=True)

def insert_at_idx(self, index: int, series: "pli.Series") -> None:
"""
Expand Down
18 changes: 18 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,24 @@ def test_with_column_renamed() -> None:
assert result.frame_equal(expected)


def test_rename_swap() -> None:
df = pl.DataFrame(
{
"a": [1, 2, 3, 4, 5],
"b": [5, 4, 3, 2, 1],
}
)

out = df.rename({"a": "b", "b": "a"})
expected = pl.DataFrame(
{
"b": [1, 2, 3, 4, 5],
"a": [5, 4, 3, 2, 1],
}
)
assert out.frame_equal(expected)


def test_fill_null() -> None:
df = pl.DataFrame({"a": [1, 2], "b": [3, None]})
assert df.fill_null(4).frame_equal(pl.DataFrame({"a": [1, 2], "b": [3, 4]}))
Expand Down

0 comments on commit 2872ada

Please sign in to comment.