Skip to content

Commit

Permalink
overwrite existing column in outer join
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 5, 2021
1 parent a0e63d1 commit 9fece9a
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 32 deletions.
6 changes: 3 additions & 3 deletions polars/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ impl DataFrame {
// below will panic
let mut columns = self.select_series(columns)?;
columns.sort_by(|sa, sb| {
self.name_to_idx(sa.name())
self.check_name_to_idx(sa.name())
.expect("checked above")
.partial_cmp(&self.name_to_idx(sb.name()).expect("checked above"))
.partial_cmp(&self.check_name_to_idx(sb.name()).expect("checked above"))
.expect("cmp usize -> Ordering")
});

Expand All @@ -93,7 +93,7 @@ impl DataFrame {
// Safety:
// offsets are not take longer than the Series.
if let Ok((exploded, offsets)) = get_exploded(s) {
let col_idx = self.name_to_idx(s.name())?;
let col_idx = self.check_name_to_idx(s.name())?;

// expand all the other columns based the exploded first column
if i == 0 {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ impl DataFrame {
for (s_left, s_right) in selected_left.iter().zip(&selected_right) {
let mut s = s_left.zip_outer_join_column(s_right, &opt_join_tuples);
s.rename(s_left.name());
df_left.hstack_mut(&[s])?;
df_left.with_column(s)?;
}
self.finish_join(df_left, df_right, suffix)
}
Expand Down
39 changes: 12 additions & 27 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,22 +135,12 @@ fn duplicate_err(name: &str) -> Result<()> {

impl DataFrame {
/// Get the index of the column.
fn name_to_idx(&self, name: &str) -> Result<usize> {
let mut idx = 0;
for column in &self.columns {
if column.name() == name {
break;
}
idx += 1;
}
if idx == self.columns.len() {
Err(PolarsError::NotFound(name.into()))
} else {
Ok(idx)
}
fn check_name_to_idx(&self, name: &str) -> Result<usize> {
self.find_idx_by_name(name)
.ok_or_else(|| PolarsError::NotFound(name.into()))
}

fn has_column(&self, name: &str) -> Result<()> {
fn check_already_present(&self, name: &str) -> Result<()> {
if self.columns.iter().any(|s| s.name() == name) {
Err(PolarsError::Duplicate(
format!("column with name: '{}' already present in DataFrame", name).into(),
Expand Down Expand Up @@ -847,10 +837,8 @@ impl DataFrame {
/// # Ok::<(), PolarsError>(())
/// ```
pub fn drop_in_place(&mut self, name: &str) -> Result<Series> {
let idx = self.name_to_idx(name)?;
let result = Ok(self.columns.remove(idx));
self.rechunk();
result
let idx = self.check_name_to_idx(name)?;
Ok(self.columns.remove(idx))
}

/// Return a new `DataFrame` where all null values are dropped.
Expand Down Expand Up @@ -923,7 +911,7 @@ impl DataFrame {
/// # Ok::<(), PolarsError>(())
/// ```
pub fn drop(&self, name: &str) -> Result<Self> {
let idx = self.name_to_idx(name)?;
let idx = self.check_name_to_idx(name)?;
let mut new_cols = Vec::with_capacity(self.columns.len() - 1);

self.columns.iter().enumerate().for_each(|(i, s)| {
Expand Down Expand Up @@ -955,17 +943,16 @@ impl DataFrame {
/// Insert a new column at a given index.
pub fn insert_at_idx<S: IntoSeries>(&mut self, index: usize, column: S) -> Result<&mut Self> {
let series = column.into_series();
self.has_column(series.name())?;
self.check_already_present(series.name())?;
self.insert_at_idx_no_name_check(index, series)
}

/// Add a new column to this `DataFrame` or replace an existing one.
pub fn with_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self> {
let series = column.into_series();
if series.len() == self.height() || self.is_empty() {
if self.has_column(series.name()).is_err() {
let name = series.name().to_string();
self.apply(&name, |_| series)?;
if let Some(idx) = self.find_idx_by_name(series.name()) {
self.replace_at_idx(idx, series)?;
} else {
self.columns.push(series);
self.rechunk();
Expand Down Expand Up @@ -1638,14 +1625,12 @@ impl DataFrame {
/// | "egg" | 3 |
/// +--------+-------+
/// ```
pub fn apply<F, S>(&mut self, column: &str, f: F) -> Result<&mut Self>
pub fn apply<F, S>(&mut self, name: &str, f: F) -> Result<&mut Self>
where
F: FnOnce(&Series) -> S,
S: IntoSeries,
{
let idx = self
.find_idx_by_name(column)
.ok_or_else(|| PolarsError::NotFound(column.to_string()))?;
let idx = self.check_name_to_idx(name)?;
self.apply_at_idx(idx, f)
}

Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/frame/upstream_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl Index<&str> for DataFrame {
type Output = Series;

fn index(&self, index: &str) -> &Self::Output {
let idx = self.name_to_idx(index).unwrap();
let idx = self.check_name_to_idx(index).unwrap();
&self.columns[idx]
}
}

0 comments on commit 9fece9a

Please sign in to comment.