diff --git a/src/execute/position.rs b/src/execute/position.rs index 7202d8da..53cdd4f7 100644 --- a/src/execute/position.rs +++ b/src/execute/position.rs @@ -58,6 +58,7 @@ pub fn apply_position_adjustments( #[cfg(test)] mod tests { use super::*; + use crate::plot::facet::{Facet, FacetLayout}; use crate::plot::layer::{Geom, Position}; use crate::plot::{AestheticValue, Mappings, ParameterValue, Scale, ScaleType}; use polars::prelude::*; @@ -322,4 +323,182 @@ mod tests { assert!((-0.3..=0.3).contains(&v)); } } + + #[test] + fn test_stack_resets_per_facet_panel() { + // Stacking should compute independently within each facet panel. + // Without this, bars in the second facet panel stack on top of + // cumulative values from the first panel (see issue #244). + // + // Two facet panels (F1, F2) each with the same x="A" and two + // fill groups (X, Y). Stacking within each panel should start from 0. + let df = df! { + "__ggsql_aes_pos1__" => ["A", "A", "A", "A"], + "__ggsql_aes_pos2__" => [10.0, 20.0, 30.0, 40.0], + "__ggsql_aes_pos2end__" => [0.0, 0.0, 0.0, 0.0], + "__ggsql_aes_fill__" => ["X", "Y", "X", "Y"], + "__ggsql_aes_facet1__" => ["F1", "F1", "F2", "F2"], + } + .unwrap(); + + let mut layer = crate::plot::Layer::new(Geom::bar()); + layer.mappings = { + let mut m = Mappings::new(); + m.insert( + "pos1", + AestheticValue::standard_column("__ggsql_aes_pos1__"), + ); + m.insert( + "pos2", + AestheticValue::standard_column("__ggsql_aes_pos2__"), + ); + m.insert( + "pos2end", + AestheticValue::standard_column("__ggsql_aes_pos2end__"), + ); + m.insert( + "fill", + AestheticValue::standard_column("__ggsql_aes_fill__"), + ); + m.insert( + "facet1", + AestheticValue::standard_column("__ggsql_aes_facet1__"), + ); + m + }; + layer.partition_by = vec![ + "__ggsql_aes_fill__".to_string(), + "__ggsql_aes_facet1__".to_string(), + ]; + layer.position = Position::stack(); + layer.data_key = Some("__ggsql_layer_0__".to_string()); + + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + spec.facet = Some(Facet::new(FacetLayout::Wrap { + variables: vec!["facet_var".to_string()], + })); + let mut data_map = HashMap::new(); + data_map.insert("__ggsql_layer_0__".to_string(), df); + + let mut spec_with_layer = spec; + spec_with_layer.layers.push(layer); + + apply_position_adjustments(&mut spec_with_layer, &mut data_map).unwrap(); + + let result_df = data_map.get("__ggsql_layer_0__").unwrap(); + + // Sort by facet then fill so we can assert in predictable order + let result_df = result_df + .clone() + .lazy() + .sort_by_exprs( + [col("__ggsql_aes_facet1__"), col("__ggsql_aes_fill__")], + SortMultipleOptions::default(), + ) + .collect() + .unwrap(); + + let pos2 = result_df + .column("__ggsql_aes_pos2__") + .unwrap() + .f64() + .unwrap(); + let pos2end = result_df + .column("__ggsql_aes_pos2end__") + .unwrap() + .f64() + .unwrap(); + + let pos2_vals: Vec = pos2.into_iter().flatten().collect(); + let pos2end_vals: Vec = pos2end.into_iter().flatten().collect(); + + // Expected (sorted by facet, fill): + // F1/X: pos2end=0, pos2=10 (first in panel, starts at 0) + // F1/Y: pos2end=10, pos2=30 (stacks on X) + // F2/X: pos2end=0, pos2=30 (first in panel, should reset to 0) + // F2/Y: pos2end=30, pos2=70 (stacks on X) + assert_eq!( + pos2end_vals[2], 0.0, + "F2 panel first bar should start at 0, not carry over from F1. pos2end={:?}, pos2={:?}", + pos2end_vals, pos2_vals + ); + } + + #[test] + fn test_dodge_ignores_facet_columns_in_group_count() { + // Dodge should compute n_groups per facet panel, not globally. + // With fill=["X","Y"] and facet=["F1","F2"], dodge should see + // 2 groups (X, Y) not 4 (X-F1, X-F2, Y-F1, Y-F2). + // + // With 2 groups and default width 0.9: + // adjusted_width = 0.9 / 2 = 0.45 + // offsets: -0.225 (group X), +0.225 (group Y) + // + // If facet columns incorrectly inflate n_groups to 4: + // adjusted_width = 0.9 / 4 = 0.225 + // offsets would be different (spread across 4 positions) + let df = df! { + "__ggsql_aes_pos1__" => ["A", "A", "A", "A"], + "__ggsql_aes_pos2__" => [10.0, 20.0, 30.0, 40.0], + "__ggsql_aes_pos2end__" => [0.0, 0.0, 0.0, 0.0], + "__ggsql_aes_fill__" => ["X", "Y", "X", "Y"], + "__ggsql_aes_facet1__" => ["F1", "F1", "F2", "F2"], + } + .unwrap(); + + let mut layer = crate::plot::Layer::new(Geom::bar()); + layer.mappings = { + let mut m = Mappings::new(); + m.insert( + "pos1", + AestheticValue::standard_column("__ggsql_aes_pos1__"), + ); + m.insert( + "pos2", + AestheticValue::standard_column("__ggsql_aes_pos2__"), + ); + m.insert( + "pos2end", + AestheticValue::standard_column("__ggsql_aes_pos2end__"), + ); + m.insert( + "fill", + AestheticValue::standard_column("__ggsql_aes_fill__"), + ); + m.insert( + "facet1", + AestheticValue::standard_column("__ggsql_aes_facet1__"), + ); + m + }; + layer.partition_by = vec![ + "__ggsql_aes_fill__".to_string(), + "__ggsql_aes_facet1__".to_string(), + ]; + layer.position = Position::dodge(); + layer.data_key = Some("__ggsql_layer_0__".to_string()); + + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + spec.facet = Some(Facet::new(FacetLayout::Wrap { + variables: vec!["facet_var".to_string()], + })); + let mut data_map = HashMap::new(); + data_map.insert("__ggsql_layer_0__".to_string(), df); + + spec.layers.push(layer); + + apply_position_adjustments(&mut spec, &mut data_map).unwrap(); + + // With 2 groups (X, Y), adjusted_width should be 0.45 + let adjusted = spec.layers[0].adjusted_width.unwrap(); + assert!( + (adjusted - 0.45).abs() < 0.001, + "adjusted_width should be 0.45 (2 groups), got {} (facet columns inflated group count)", + adjusted + ); + } } diff --git a/src/plot/layer/position/dodge.rs b/src/plot/layer/position/dodge.rs index ec63ba72..09e9dfe9 100644 --- a/src/plot/layer/position/dodge.rs +++ b/src/plot/layer/position/dodge.rs @@ -6,7 +6,10 @@ //! - If only pos2 is discrete → dodge vertically (pos2offset) //! - If both are discrete → 2D grid dodge (both offsets, arranged in a grid) -use super::{compute_dodge_offsets, is_continuous_scale, Layer, PositionTrait, PositionType}; +use super::{ + compute_dodge_offsets, is_continuous_scale, non_facet_partition_cols, Layer, PositionTrait, + PositionType, +}; use crate::plot::types::{DefaultParamValue, ParamConstraint, ParamDefinition, ParameterValue}; use crate::{naming, DataFrame, GgsqlError, Plot, Result}; use polars::prelude::*; @@ -159,8 +162,10 @@ fn apply_dodge_with_width( return Ok((df, None)); } - // Compute group indices - let group_info = match compute_group_indices(&df, &layer.partition_by)? { + // Compute group indices, excluding facet columns so group count + // reflects within-panel groups (not cross-panel composites) + let group_cols = non_facet_partition_cols(&layer.partition_by, spec); + let group_info = match compute_group_indices(&df, &group_cols)? { Some(info) => info, None => return Ok((df, None)), }; diff --git a/src/plot/layer/position/jitter.rs b/src/plot/layer/position/jitter.rs index 6dcc9c80..852b05ae 100644 --- a/src/plot/layer/position/jitter.rs +++ b/src/plot/layer/position/jitter.rs @@ -15,8 +15,8 @@ //! - `normal`: normal/Gaussian distribution with ~95% of points within the width use super::{ - compute_dodge_offsets, compute_group_indices, is_continuous_scale, Layer, PositionTrait, - PositionType, + compute_dodge_offsets, compute_group_indices, is_continuous_scale, non_facet_partition_cols, + Layer, PositionTrait, PositionType, }; use crate::plot::types::{DefaultParamValue, ParamConstraint, ParamDefinition, ParameterValue}; use crate::{naming, DataFrame, GgsqlError, Plot, Result}; @@ -491,9 +491,11 @@ fn apply_jitter(df: DataFrame, layer: &Layer, spec: &Plot) -> Result let mut rng = rand::thread_rng(); let n_rows = df.height(); - // Compute group info for dodge-first behavior + // Compute group info for dodge-first behavior, excluding facet columns + // so group count reflects within-panel groups + let group_cols = non_facet_partition_cols(&layer.partition_by, spec); let group_info = if dodge { - compute_group_indices(&df, &layer.partition_by)? + compute_group_indices(&df, &group_cols)? } else { None }; diff --git a/src/plot/layer/position/mod.rs b/src/plot/layer/position/mod.rs index af11965e..a865e575 100644 --- a/src/plot/layer/position/mod.rs +++ b/src/plot/layer/position/mod.rs @@ -104,6 +104,32 @@ pub fn compute_dodge_offsets( } } +/// Filter facet columns out of partition_by for position adjustments that +/// compute group indices (dodge, jitter). +/// +/// Facet columns in partition_by inflate the group count — e.g., 2 fill groups +/// across 2 facet panels would be seen as 4 composite groups instead of 2. +/// Position adjustments should operate per-panel, so facet columns must be excluded. +pub fn non_facet_partition_cols(partition_by: &[String], spec: &Plot) -> Vec { + let facet_cols: std::collections::HashSet = spec + .facet + .as_ref() + .map(|f| { + f.layout + .internal_facet_names() + .into_iter() + .map(|aes| crate::naming::aesthetic_column(&aes)) + .collect() + }) + .unwrap_or_default(); + + partition_by + .iter() + .filter(|col| !facet_cols.contains(*col)) + .cloned() + .collect() +} + // Re-export position implementations pub use dodge::{compute_group_indices, Dodge, GroupIndices}; pub use identity::Identity; diff --git a/src/plot/layer/position/stack.rs b/src/plot/layer/position/stack.rs index f2081e12..2cef1e0e 100644 --- a/src/plot/layer/position/stack.rs +++ b/src/plot/layer/position/stack.rs @@ -266,6 +266,17 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re // 2. stack_end_col = lag(stack_col, 1, 0) - the bar bottom/start (previous stack top) // The cumsum naturally stacks across the grouping column values + // Build the partition columns for .over(): group column + facet columns. + // Facet columns must be included so stacking resets per facet panel, + // matching ggplot2 where position adjustments are computed per-panel. + let mut over_cols: Vec = vec![col(&group_col)]; + if let Some(ref facet) = spec.facet { + for aes in facet.layout.internal_facet_names() { + let facet_col = naming::aesthetic_column(&aes); + over_cols.push(col(&facet_col)); + } + } + // Treat NA heights as 0 for stacking // Compute cumulative sums (shared by all modes) let lf = lf @@ -273,7 +284,7 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re .with_column( col(&stack_col) .cum_sum(false) - .over([col(&group_col)]) + .over(&over_cols) .alias("__cumsum__"), ) .with_column( @@ -281,7 +292,7 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re .cum_sum(false) .shift(lit(1)) .fill_null(lit(0.0)) - .over([col(&group_col)]) + .over(&over_cols) .alias("__cumsum_lag__"), ); @@ -293,7 +304,7 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re vec!["__cumsum__", "__cumsum_lag__"], ), StackMode::Fill(target) => { - let total = col(&stack_col).sum().over([col(&group_col)]); + let total = col(&stack_col).sum().over(&over_cols); ( (col("__cumsum__") / total.clone() * lit(target)).alias(&stack_col), (col("__cumsum_lag__") / total * lit(target)).alias(&stack_end_col), @@ -301,7 +312,7 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re ) } StackMode::Center => { - let half_total = col(&stack_col).sum().over([col(&group_col)]) / lit(2.0); + let half_total = col(&stack_col).sum().over(&over_cols) / lit(2.0); ( (col("__cumsum__") - half_total.clone()).alias(&stack_col), (col("__cumsum_lag__") - half_total).alias(&stack_end_col),