Skip to content

Commit

Permalink
group-by now returns a table instead of a record (nushell#10848)
Browse files Browse the repository at this point in the history
# Description

Previously `group-by` returned a record containing each group as a
column. This data layout is hard to work with for some tasks because you
have to further manipulate the result to do things like determine the
number of items in each group, or the number of groups. `transpose` will
turn the record returned by `group-by` into a table, but this is
expensive when `group-by` is run on a large input.

In a discussion with @fdncred [several
workarounds](nushell#10462) to
common tasks were discussed, but they seem unsatisfying in general.

Now when `group-by --to-table` is used a table is returned with the
columns "groups" and "items" making it easier to do things like count
the number of groups (`| length`) or count the number of items in each
group (`| each {|g| $g.items | length`)

# User-Facing Changes

* `group-by` returns a `table` with "group" and "items" columns instead
of a `record` with one column per group name

# Tests + Formatting

Tests for `group-by` were updated

# After Submitting

* No breaking changes were made. The new `--to-table` switch should be
added automatically to the [`group-by`
documentation](https://www.nushell.sh/commands/docs/group-by.html)
  • Loading branch information
drbrain committed Oct 28, 2023
1 parent c87bac0 commit 3dfe1a4
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 34 deletions.
117 changes: 88 additions & 29 deletions crates/nu-command/src/filters/group_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,15 @@ impl Command for GroupBy {
// example. Perhaps Table should be a subtype of List, in which case
// the current signature would suffice even when a Table example
// exists.
.input_output_types(vec![(
Type::List(Box::new(Type::Any)),
Type::Record(vec![]),
)])
.input_output_types(vec![
(Type::List(Box::new(Type::Any)), Type::Record(vec![])),
(Type::List(Box::new(Type::Any)), Type::Table(vec![])),
])
.switch(
"to-table",
"Return a table with \"groups\" and \"items\" columns",
None,
)
.optional(
"grouper",
SyntaxShape::OneOf(vec![
Expand Down Expand Up @@ -80,7 +85,6 @@ impl Command for GroupBy {
),
})),
},

Example {
description: "You can also group by raw values by leaving out the argument",
example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
Expand All @@ -101,6 +105,46 @@ impl Command for GroupBy {
),
})),
},
Example {
description: "You can also output a table instead of a record",
example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
result: Some(Value::test_list(vec![
Value::test_record(
record! {
"group" => Value::test_string("1"),
"items" => Value::test_list(
vec![
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
]
)
}
),
Value::test_record(
record! {
"group" => Value::test_string("3"),
"items" => Value::test_list(
vec![
Value::test_string("3"),
Value::test_string("3"),
]
)
}
),
Value::test_record(
record! {
"group" => Value::test_string("2"),
"items" => Value::test_list(
vec![
Value::test_string("2"),
]
)
}
),
])),
},
]
}
}
Expand All @@ -123,11 +167,11 @@ pub fn group_by(
));
}

let group_value = match grouper {
let groups = match grouper {
Some(v) => {
let span = v.span();
match v {
Value::CellPath { val, .. } => group_cell_path(val, values, span)?,
Value::CellPath { val, .. } => group_cell_path(val, values)?,
Value::Block { .. } | Value::Closure { .. } => {
let block: Option<Closure> = call.opt(engine_state, stack, 0)?;
group_closure(&values, span, block, stack, engine_state, call)?
Expand All @@ -141,17 +185,22 @@ pub fn group_by(
}
}
}
None => group_no_grouper(values, span)?,
None => group_no_grouper(values)?,
};

Ok(PipelineData::Value(group_value, None))
let value = if call.has_flag("to-table") {
groups_to_table(groups, span)
} else {
groups_to_record(groups, span)
};

Ok(PipelineData::Value(value, None))
}

pub fn group_cell_path(
column_name: CellPath,
values: Vec<Value>,
span: Span,
) -> Result<Value, ShellError> {
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();

for value in values.into_iter() {
Expand All @@ -167,16 +216,10 @@ pub fn group_cell_path(
group.push(value);
}

Ok(Value::record(
groups
.into_iter()
.map(|(k, v)| (k, Value::list(v, span)))
.collect(),
span,
))
Ok(groups)
}

pub fn group_no_grouper(values: Vec<Value>, span: Span) -> Result<Value, ShellError> {
pub fn group_no_grouper(values: Vec<Value>) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();

for value in values.into_iter() {
Expand All @@ -185,13 +228,7 @@ pub fn group_no_grouper(values: Vec<Value>, span: Span) -> Result<Value, ShellEr
group.push(value);
}

Ok(Value::record(
groups
.into_iter()
.map(|(k, v)| (k, Value::list(v, span)))
.collect(),
span,
))
Ok(groups)
}

// TODO: refactor this, it's a bit of a mess
Expand All @@ -202,7 +239,7 @@ fn group_closure(
stack: &mut Stack,
engine_state: &EngineState,
call: &Call,
) -> Result<Value, ShellError> {
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let error_key = "error";
let mut keys: Vec<Result<String, ShellError>> = vec![];
let value_list = Value::list(values.to_vec(), span);
Expand Down Expand Up @@ -268,13 +305,35 @@ fn group_closure(
group.push(value);
}

Ok(Value::record(
Ok(groups)
}

fn groups_to_record(groups: IndexMap<String, Vec<Value>>, span: Span) -> Value {
Value::record(
groups
.into_iter()
.map(|(k, v)| (k, Value::list(v, span)))
.collect(),
span,
))
)
}

fn groups_to_table(groups: IndexMap<String, Vec<Value>>, span: Span) -> Value {
Value::list(
groups
.into_iter()
.map(|(group, items)| {
Value::record(
record! {
"group" => Value::string(group, span),
"items" => Value::list(items, span),
},
span,
)
})
.collect(),
span,
)
}

#[cfg(test)]
Expand Down
9 changes: 4 additions & 5 deletions crates/nu-std/std/testing.nu
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,10 @@ def create-test-record [] nothing -> record<before-each: string, after-each: str
valid-annotations
| get $x.annotation
}
| group-by annotation
| transpose key value
| update value {|x|
$x.value.function_name
| if $x.key in ["test", "test-skip"] {
| group-by --to-table annotation
| update items {|x|
$x.items.function_name
| if $x.group in ["test", "test-skip"] {
$in
} else {
get 0
Expand Down

0 comments on commit 3dfe1a4

Please sign in to comment.