Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 1787 extended stats #2247

Merged
merged 32 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8d4f2db
first version of extended stats along with its tests
Nov 4, 2023
86bdb8b
using IntermediateExtendStats instead of IntermediateStats with all t…
Nov 4, 2023
db91df9
Created struct for request and response
Nov 4, 2023
bd7d7e3
first test with extended_stats
Nov 4, 2023
86f3b56
kahan summation and tests with approximate equality
Nov 10, 2023
9efb1f7
version ready for merge
Nov 10, 2023
061b56b
Merge branch 'main' into issue_1787_extended_stats
Nov 10, 2023
3e83f4a
removed approx dependency
Nov 11, 2023
e923daa
refactor for using ExtendedStats only when needed
Nov 19, 2023
dfe46d5
interim version
Nov 20, 2023
8c7df7a
refined version with code formatted
Nov 20, 2023
2548b79
refactored a struct
Nov 23, 2023
1dd6182
cosmetic refactor
Nov 24, 2023
4c6c7bb
Merge branch 'main' into issue_1781_extended_stats_boxed_result
Nov 24, 2023
7763efb
Merge branch 'main' into issue_1787_extended_stats
Apr 20, 2024
8f1409c
fix after merge
Apr 20, 2024
21e5937
fix format
Apr 20, 2024
b8b409c
added extended_stat bench
May 1, 2024
5fdd48f
Merge branch 'main' into issue_1787_extended_stats
May 4, 2024
8ed81d8
Merge branch 'main' into issue_1787_extended_stats
May 9, 2024
bd0759c
merge and new benchmark for extended stats
May 11, 2024
47a1219
Merge branch 'main' into issue_1787_extended_stats
May 11, 2024
ac29ab7
split stat segment collectors
May 18, 2024
5b4aa9f
wrapped intermediate extended stat with a box to limit memory usage
May 18, 2024
ef47a27
Revert "wrapped intermediate extended stat with a box to limit memory…
May 18, 2024
8e4ce2c
some code reformat, commented kahan summation
May 18, 2024
f8c816a
refactor after review
May 19, 2024
9b495cd
refactor after code review
May 19, 2024
1a3413c
fix after incorrectly restoring kahan summation
May 19, 2024
c9dad19
modifications for code review + bug fix in merge_fruit
May 19, 2024
0e3eff5
refactor assert_nearly_equals macro
May 20, 2024
a3718c2
update after code review
Jun 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion benches/agg_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fn bench_agg(mut group: InputGroup<Index>) {
register!(group, average_f64);
register!(group, average_f64_u64);
register!(group, stats_f64);
register!(group, extendedstats_f64);
register!(group, percentiles_f64);
register!(group, terms_few);
register!(group, terms_many);
Expand Down Expand Up @@ -105,7 +106,12 @@ fn stats_f64(index: &Index) {
});
exec_term_with_agg(index, agg_req)
}

fn extendedstats_f64(index: &Index) {
let agg_req = json!({
"extendedstats_f64": { "extended_stats": { "field": "score_f64", } }
});
exec_term_with_agg(index, agg_req)
}
fn percentiles_f64(index: &Index) {
let agg_req = json!({
"mypercentiles": {
Expand Down
8 changes: 7 additions & 1 deletion src/aggregation/agg_req.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use super::bucket::{
DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
};
use super::metric::{
AverageAggregation, CountAggregation, MaxAggregation, MinAggregation,
AverageAggregation, CountAggregation, ExtendedStatsAggregation, MaxAggregation, MinAggregation,
PercentilesAggregationReq, StatsAggregation, SumAggregation, TopHitsAggregation,
};

Expand Down Expand Up @@ -146,6 +146,11 @@ pub enum AggregationVariants {
/// extracted values.
#[serde(rename = "stats")]
Stats(StatsAggregation),
/// Computes a collection of estended statistics (`min`, `max`, `sum`, `count`, `avg`,
/// `sum_of_squares`, `variance`, `variance_sampling`, `std_deviation`,
/// `std_deviation_sampling`) over the extracted values.
#[serde(rename = "extended_stats")]
ExtendedStats(ExtendedStatsAggregation),
/// Computes the sum of the extracted values.
#[serde(rename = "sum")]
Sum(SumAggregation),
Expand All @@ -170,6 +175,7 @@ impl AggregationVariants {
AggregationVariants::Max(max) => vec![max.field_name()],
AggregationVariants::Min(min) => vec![min.field_name()],
AggregationVariants::Stats(stats) => vec![stats.field_name()],
AggregationVariants::ExtendedStats(extended_stats) => vec![extended_stats.field_name()],
AggregationVariants::Sum(sum) => vec![sum.field_name()],
AggregationVariants::Percentiles(per) => vec![per.field_name()],
AggregationVariants::TopHits(top_hits) => top_hits.field_names(),
Expand Down
8 changes: 6 additions & 2 deletions src/aggregation/agg_req_with_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use super::bucket::{
DateHistogramAggregationReq, HistogramAggregation, RangeAggregation, TermsAggregation,
};
use super::metric::{
AverageAggregation, CountAggregation, MaxAggregation, MinAggregation, StatsAggregation,
SumAggregation,
AverageAggregation, CountAggregation, ExtendedStatsAggregation, MaxAggregation, MinAggregation,
StatsAggregation, SumAggregation,
};
use super::segment_agg_result::AggregationLimits;
use super::VecWithNames;
Expand Down Expand Up @@ -276,6 +276,10 @@ impl AggregationWithAccessor {
field: ref field_name,
..
})
| ExtendedStats(ExtendedStatsAggregation {
field: ref field_name,
..
})
| Sum(SumAggregation {
field: ref field_name,
..
Expand Down
7 changes: 6 additions & 1 deletion src/aggregation/agg_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};

use super::bucket::GetDocCount;
use super::metric::{PercentilesMetricResult, SingleMetricResult, Stats, TopHitsMetricResult};
use super::metric::{
ExtendedStats, PercentilesMetricResult, SingleMetricResult, Stats, TopHitsMetricResult,
};
use super::{AggregationError, Key};
use crate::TantivyError;

Expand Down Expand Up @@ -88,6 +90,8 @@ pub enum MetricResult {
Min(SingleMetricResult),
/// Stats metric result.
Stats(Stats),
/// ExtendedStats metric result.
ExtendedStats(Box<ExtendedStats>),
/// Sum metric result.
Sum(SingleMetricResult),
/// Percentiles metric result.
Expand All @@ -104,6 +108,7 @@ impl MetricResult {
MetricResult::Max(max) => Ok(max.value),
MetricResult::Min(min) => Ok(min.value),
MetricResult::Stats(stats) => stats.get_value(agg_property),
MetricResult::ExtendedStats(extended_stats) => extended_stats.get_value(agg_property),
MetricResult::Sum(sum) => Ok(sum.value),
MetricResult::Percentiles(_) => Err(TantivyError::AggregationError(
AggregationError::InvalidRequest("percentiles can't be used to order".to_string()),
Expand Down
18 changes: 16 additions & 2 deletions src/aggregation/intermediate_agg_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ use super::bucket::{
GetDocCount, Order, OrderTarget, RangeAggregation, TermsAggregation,
};
use super::metric::{
IntermediateAverage, IntermediateCount, IntermediateMax, IntermediateMin, IntermediateStats,
IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
IntermediateAverage, IntermediateCount, IntermediateExtendedStats, IntermediateMax,
IntermediateMin, IntermediateStats, IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
};
use super::segment_agg_result::AggregationLimits;
use super::{format_date, AggregationError, Key, SerializedKey};
Expand Down Expand Up @@ -215,6 +215,9 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
Stats(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Stats(
IntermediateStats::default(),
)),
ExtendedStats(_) => IntermediateAggregationResult::Metric(
IntermediateMetricResult::ExtendedStats(IntermediateExtendedStats::default()),
),
Sum(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Sum(
IntermediateSum::default(),
)),
Expand Down Expand Up @@ -282,6 +285,8 @@ pub enum IntermediateMetricResult {
Min(IntermediateMin),
/// Intermediate stats result.
Stats(IntermediateStats),
/// Intermediate stats result.
ExtendedStats(IntermediateExtendedStats),
/// Intermediate sum result.
Sum(IntermediateSum),
/// Intermediate top_hits result
Expand All @@ -306,6 +311,9 @@ impl IntermediateMetricResult {
IntermediateMetricResult::Stats(intermediate_stats) => {
MetricResult::Stats(intermediate_stats.finalize())
}
IntermediateMetricResult::ExtendedStats(intermediate_stats) => {
MetricResult::ExtendedStats(intermediate_stats.finalize())
}
IntermediateMetricResult::Sum(intermediate_sum) => {
MetricResult::Sum(intermediate_sum.finalize().into())
}
Expand Down Expand Up @@ -346,6 +354,12 @@ impl IntermediateMetricResult {
) => {
stats_left.merge_fruits(stats_right);
}
(
IntermediateMetricResult::ExtendedStats(extended_stats_left),
IntermediateMetricResult::ExtendedStats(extended_stats_right),
) => {
extended_stats_left.merge_fruits(extended_stats_right);
}
(IntermediateMetricResult::Sum(sum_left), IntermediateMetricResult::Sum(sum_right)) => {
sum_left.merge_fruits(sum_right);
}
Expand Down
Loading
Loading