Skip to content

Commit

Permalink
fix(rust, python): fix lazy swapping rename (#5884)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 22, 2022
1 parent f6d0a9a commit c266dff
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 50 deletions.
6 changes: 6 additions & 0 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,12 @@ impl DataFrame {
self.columns.iter().position(|s| s.name() == name)
}

/// Get column index of a `Series` by name.
pub fn try_find_idx_by_name(&self, name: &str) -> PolarsResult<usize> {
self.find_idx_by_name(name)
.ok_or_else(|| PolarsError::NotFound(name.to_string().into()))
}

/// Select a single column by name.
///
/// # Example
Expand Down
66 changes: 16 additions & 50 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,69 +268,35 @@ impl LazyFrame {
self.select_local(vec![col("*").reverse()])
}

fn rename_impl_swapping(self, mut existing: Vec<String>, mut new: Vec<String>) -> Self {
fn rename_impl_swapping(self, existing: Vec<String>, new: Vec<String>) -> Self {
assert_eq!(new.len(), existing.len());
let mut removed = 0;
for mut idx in 0..existing.len() {
// remove "name" -> "name
// these are no ops.
idx -= removed;
if existing[idx] == new[idx] {
existing.swap_remove(idx);
new.swap_remove(idx);
removed += 1;
}
}

let existing2 = existing.clone();
let new2 = new.clone();
let udf_schema = move |s: &Schema| {
let udf_schema = move |old_schema: &Schema| {
let mut new_schema = old_schema.clone();

// schema after renaming
let mut new_schema = s.clone();
for (old, new) in existing2.iter().zip(new2.iter()) {
new_schema
.rename(old, new.to_string())
.ok_or_else(|| PolarsError::NotFound(old.to_string().into()))?
let dtype = old_schema.try_get(old)?;
new_schema.with_column(new.clone(), dtype.clone());
}
Ok(Arc::new(new_schema))
};

let prefix = "__POLARS_TEMP_";

let new: Vec<String> = new.iter().map(|name| format!("{prefix}{name}")).collect();

self.with_columns(
existing
.iter()
.zip(&new)
.map(|(old, new)| col(old).alias(new))
.collect::<Vec<_>>(),
)
.map(
self.map(
move |mut df: DataFrame| {
let mut cols = std::mem::take(df.get_columns_mut());
// we must find the indices before we start swapping,
// because swapping may influence the positions we find if columns are swapped for instance.
// e.g. a -> b
// b -> a
#[allow(clippy::needless_collect)]
let existing_idx = existing
let positions = existing
.iter()
.map(|name| cols.iter().position(|s| s.name() == name.as_str()).unwrap())
.collect::<Vec<_>>();
let new_idx = new
.iter()
.map(|name| cols.iter().position(|s| s.name() == name.as_str()).unwrap())
.collect::<Vec<_>>();

for (existing_i, new_i) in existing_idx.into_iter().zip(new_idx) {
cols.swap(existing_i, new_i);
let s = &mut cols[existing_i];
let name = &s.name()[prefix.len()..].to_string();
s.rename(name);
.map(|old| df.try_find_idx_by_name(old))
.collect::<PolarsResult<Vec<_>>>()?;

for (pos, name) in positions.iter().zip(new.iter()) {
df.get_columns_mut()[*pos].rename(name);
}
cols.truncate(cols.len() - existing.len());
DataFrame::new(cols)
// recreate dataframe so we check duplicates
let columns = std::mem::take(df.get_columns_mut());
DataFrame::new(columns)
},
None,
Some(Arc::new(udf_schema)),
Expand Down
8 changes: 8 additions & 0 deletions py-polars/tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,11 @@ def test_fill_null_f32_with_lit() -> None:
# ensure the literal integer does not upcast the f32 to an f64
df = pl.DataFrame({"a": [1.1, 1.2]}, columns=[("a", pl.Float32)])
assert df.fill_null(value=0).dtypes == [pl.Float32]


def test_lazy_rename() -> None:
df = pl.DataFrame({"x": [1], "y": [2]})

assert (
df.lazy().rename({"y": "x", "x": "y"}).select(["x", "y"]).collect()
).to_dict(False) == {"x": [2], "y": [1]}

0 comments on commit c266dff

Please sign in to comment.