Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor drop columns to fix issues #10903

Merged
merged 13 commits into from
Nov 9, 2023
215 changes: 107 additions & 108 deletions crates/nu-command/src/filters/drop/column.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use nu_engine::CallExt;
use nu_protocol::ast::{Call, CellPath};
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
record, Category, Example, FromValue, IntoInterruptiblePipelineData, IntoPipelineData,
PipelineData, Record, ShellError, Signature, Span, SyntaxShape, Type, Value,
record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value,
};

use std::collections::HashSet;

#[derive(Clone)]
pub struct DropColumn;

Expand All @@ -16,7 +18,10 @@ impl Command for DropColumn {

fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![(Type::Table(vec![]), Type::Table(vec![]))])
.input_output_types(vec![
(Type::Table(vec![]), Type::Table(vec![])),
(Type::Record(vec![]), Type::Record(vec![])),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Good catch!

])
.optional(
"columns",
SyntaxShape::Int,
Expand All @@ -41,148 +46,142 @@ impl Command for DropColumn {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
// the number of columns to drop
let columns: Option<i64> = call.opt(engine_state, stack, 0)?;
let span = call.head;
let columns: Option<Spanned<i64>> = call.opt(engine_state, stack, 0)?;

let columns_to_drop = if let Some(quantity) = columns {
quantity
let columns = if let Some(columns) = columns {
if columns.item < 0 {
return Err(ShellError::NeedsPositiveValue(columns.span));
} else {
columns.item as usize
}
} else {
1
};

// Make columns to drop is positive
if columns_to_drop < 0 {
if let Some(expr) = call.positional_nth(0) {
return Err(ShellError::NeedsPositiveValue(expr.span));
}
}

dropcol(engine_state, span, input, columns_to_drop)
drop_cols(engine_state, input, call.head, columns)
}

fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Remove the last column of a table",
example: "[[lib, extension]; [nu-lib, rs] [nu-core, rb]] | drop column",
result: Some(Value::list(
vec![
Value::test_record(record!("lib" =>Value::test_string("nu-lib"))),
Value::test_record(record!("lib" =>Value::test_string("nu-core"))),
],
Span::test_data(),
)),
}]
vec![
Example {
description: "Remove the last column of a table",
example: "[[lib, extension]; [nu-lib, rs] [nu-core, rb]] | drop column",
result: Some(Value::test_list(vec![
Value::test_record(record! { "lib" => Value::test_string("nu-lib") }),
Value::test_record(record! { "lib" => Value::test_string("nu-core") }),
])),
},
Example {
description: "Remove the last column of a record",
example: "{lib: nu-lib, extension: rs} | drop column",
result: Some(Value::test_record(
record! { "lib" => Value::test_string("nu-lib") },
)),
},
]
}
}

fn dropcol(
fn drop_cols(
engine_state: &EngineState,
span: Span,
input: PipelineData,
columns: i64, // the number of columns to drop
head: Span,
columns: usize,
) -> Result<PipelineData, ShellError> {
let mut keep_columns = vec![];

// For simplicity and performance, we use the first row's columns
// as the columns for the whole table, and assume that later rows/records
// have these same columns. However, this can give weird results like:
// `[{a: 1}, {b: 2}] | drop column`
// This will drop the column "a" instead of "b" even though column "b"
// is displayed farther to the right.
match input {
PipelineData::ListStream(stream, ..) => {
let mut output = vec![];

let v: Vec<_> = stream.into_iter().collect();
let input_cols = get_input_cols(v.clone());
let kc = get_keep_columns(input_cols, columns);
keep_columns = get_cellpath_columns(kc, span);

for input_val in v {
let mut record = Record::new();

for path in &keep_columns {
let fetcher = input_val.clone().follow_cell_path(&path.members, false)?;
record.push(path.into_string(), fetcher);
}
output.push(Value::record(record, span))
PipelineData::ListStream(mut stream, ..) => {
if let Some(mut first) = stream.next() {
let drop_cols = drop_cols_set(&mut first, head, columns)?;

Ok(std::iter::once(first)
.chain(stream.map(move |mut v| {
match drop_record_cols(&mut v, head, &drop_cols) {
Ok(()) => v,
Err(e) => Value::error(e, head),
}
}))
.into_pipeline_data(engine_state.ctrlc.clone()))
} else {
Ok(PipelineData::Empty)
}

Ok(output
.into_iter()
.into_pipeline_data(engine_state.ctrlc.clone()))
}
PipelineData::Value(v, ..) => {
let val_span = v.span();
if let Value::List {
vals: input_vals, ..
} = v
{
let mut output = vec![];
let input_cols = get_input_cols(input_vals.clone());
let kc = get_keep_columns(input_cols, columns);
keep_columns = get_cellpath_columns(kc, val_span);

for input_val in input_vals {
let mut record = Record::new();

for path in &keep_columns {
let fetcher = input_val.clone().follow_cell_path(&path.members, false)?;
record.push(path.into_string(), fetcher);
let span = v.span();
match v {
Value::List { mut vals, .. } => {
if let Some((first, rest)) = vals.split_first_mut() {
let drop_cols = drop_cols_set(first, head, columns)?;
for val in rest {
drop_record_cols(val, head, &drop_cols)?
}
}
output.push(Value::record(record, val_span))
Ok(Value::list(vals, span).into_pipeline_data())
}

Ok(output
.into_iter()
.into_pipeline_data(engine_state.ctrlc.clone()))
} else {
let mut record = Record::new();

for cell_path in &keep_columns {
let result = v.clone().follow_cell_path(&cell_path.members, false)?;
record.push(cell_path.into_string(), result);
Value::Record {
val: mut record, ..
} => {
let len = record.len().saturating_sub(columns);
record.truncate(len);
Ok(Value::record(record, span).into_pipeline_data())
}

Ok(Value::record(record, span).into_pipeline_data())
// Propagate errors
Value::Error { error, .. } => Err(*error),
val => Err(unsupported_value_error(&val, head)),
}
}
x => Ok(x),
PipelineData::Empty => Ok(PipelineData::Empty),
PipelineData::ExternalStream { span, .. } => Err(ShellError::OnlySupportsThisInputType {
exp_input_type: "table or record".into(),
wrong_type: "raw data".into(),
dst_span: head,
src_span: span,
}),
}
}

fn get_input_cols(input: Vec<Value>) -> Vec<String> {
let rec = input.first();
match rec {
Some(Value::Record { val, .. }) => val.cols.to_vec(),
_ => vec!["".to_string()],
fn drop_cols_set(val: &mut Value, head: Span, drop: usize) -> Result<HashSet<String>, ShellError> {
if let Value::Record { val: record, .. } = val {
let len = record.len().saturating_sub(drop);
Ok(record.drain(len..).map(|(col, _)| col).collect())
} else {
Err(unsupported_value_error(val, head))
}
}

fn get_cellpath_columns(keep_cols: Vec<String>, span: Span) -> Vec<CellPath> {
let mut output = vec![];
for keep_col in keep_cols {
let val = Value::string(keep_col, span);
let cell_path = match CellPath::from_value(val) {
Ok(v) => v,
Err(_) => return vec![],
};
output.push(cell_path);
fn drop_record_cols(
val: &mut Value,
head: Span,
drop_cols: &HashSet<String>,
) -> Result<(), ShellError> {
if let Value::Record { val, .. } = val {
val.retain(|col, _| !drop_cols.contains(col));
Ok(())
} else {
Err(unsupported_value_error(val, head))
}
output
}

fn get_keep_columns(input: Vec<String>, mut num_of_columns_to_drop: i64) -> Vec<String> {
let vlen: i64 = input.len() as i64;

if num_of_columns_to_drop > vlen {
num_of_columns_to_drop = vlen;
fn unsupported_value_error(val: &Value, head: Span) -> ShellError {
ShellError::OnlySupportsThisInputType {
exp_input_type: "table or record".into(),
wrong_type: val.get_type().to_string(),
dst_span: head,
src_span: val.span(),
}

let num_of_columns_to_keep = (vlen - num_of_columns_to_drop) as usize;
input[0..num_of_columns_to_keep].to_vec()
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_examples() {
use super::DropColumn;
use crate::test_examples;
test_examples(DropColumn {})
crate::test_examples(DropColumn)
}
}
42 changes: 42 additions & 0 deletions crates/nu-command/tests/commands/drop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,45 @@ fn fail_on_non_iterator() {

assert!(actual.err.contains("command doesn't support"));
}

#[test]
fn disjoint_columns_first_row_becomes_empty() {
let actual = nu!(pipeline(
"
[{a: 1}, {b: 2, c: 3}]
| drop column
| columns
| to nuon
"
));

assert_eq!(actual.out, "[b, c]");
}

#[test]
fn disjoint_columns() {
let actual = nu!(pipeline(
"
[{a: 1, b: 2}, {c: 3}]
| drop column
| columns
| to nuon
"
));
Comment on lines +98 to +120
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotta admit I find both of those behaviors absolutely cursed based on what the user sees with the default table output.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lol, I agree. Perhaps we could:

  • drop the first columns? Then it would be skip columns instead of drop columns
  • compute the set of all columns (via nu_engine::column::get_columns). Unfortunately, this means we would have to collect the ListStream.


assert_eq!(actual.out, "[a, c]");
}

#[test]
fn record() {
let actual = nu!("{a: 1, b: 2} | drop column | to nuon");

assert_eq!(actual.out, "{a: 1}");
}

#[test]
fn more_columns_than_record_has() {
let actual = nu!("{a: 1, b: 2} | drop column 3 | to nuon");

assert_eq!(actual.out, "{}");
}