Skip to content

Commit

Permalink
MDEV-27229: Estimation for filtered rows less precise ... #5
Browse files Browse the repository at this point in the history
Followup: remove this line from get_column_range_cardinality()

      set_if_bigger(res, col_stats->get_avg_frequency());

and make sure it is only used with the binary histograms.
For JSON histograms, it makes the estimates unnecessarily imprecise.
  • Loading branch information
spetrunia committed Jan 19, 2022
1 parent d3e511d commit db8f15b
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 16 deletions.
17 changes: 13 additions & 4 deletions mysql-test/main/statistics_json.result
Original file line number Diff line number Diff line change
Expand Up @@ -4644,7 +4644,7 @@ Warnings:
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
analyze select * from t1_json where a > 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 0.00 0.00 Using where
drop table ten;
UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
Expand Down Expand Up @@ -4752,7 +4752,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where
explain extended select * from t2 where city < 'Lagos';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
Warnings:
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos'
drop table t1_bin;
Expand Down Expand Up @@ -8164,7 +8164,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
analyze select * from t1 where a < 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.00 0.00 Using where
analyze select * from t1 where a > 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
Expand All @@ -8189,7 +8189,7 @@ test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
analyze select * from t2 where a < 1;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 8.33 0.10 Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
analyze select * from t2 where a =100;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
Expand Down Expand Up @@ -8237,6 +8237,15 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
analyze select COUNT(*) FROM t1 WHERE a >='bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
analyze select COUNT(*) FROM t1 WHERE a <='bar';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where
Expand Down
6 changes: 3 additions & 3 deletions mysql-test/main/statistics_json.test
Original file line number Diff line number Diff line change
Expand Up @@ -409,9 +409,9 @@ analyze select COUNT(*) FROM t1 WHERE a > 'bar';
analyze select COUNT(*) FROM t1 WHERE a >='bar';

# Can enable these after get_avg_frequency issue is resolved:
# analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
# analyze select COUNT(*) FROM t1 WHERE a <='aaa';
# analyze select COUNT(*) FROM t1 WHERE a < 'bar';
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
analyze select COUNT(*) FROM t1 WHERE a < 'bar';

analyze select COUNT(*) FROM t1 WHERE a <='bar';

Expand Down
2 changes: 1 addition & 1 deletion sql/opt_histogram_json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ std::string& Histogram_json_hb::get_end_value(int idx)
*/

double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)
key_range *max_endp, double avg_sel)
{
double min, max;

Expand Down
2 changes: 1 addition & 1 deletion sql/opt_histogram_json.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class Histogram_json_hb : public Histogram_base
double avg_selection,
double total_rows) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp, double avg_sel) override;

void set_json_text(ulonglong sz, const char *json_text_arg,
size_t json_text_len)
Expand Down
5 changes: 4 additions & 1 deletion sql/opt_range.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3277,7 +3277,10 @@ double records_in_column_ranges(PARAM *param, uint idx,
break;
}
total_rows += rows;
}
}
if (total_rows == 0)
total_rows= MY_MIN(1, param->table->stat_records());

return total_rows;
}

Expand Down
13 changes: 9 additions & 4 deletions sql/sql_statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3910,10 +3910,13 @@ double get_column_range_cardinality(Field *field,
if (col_stats->min_max_values_are_provided())
{
Histogram_base *hist= col_stats->histogram;
double avg_frequency= col_stats->get_avg_frequency();
double sel;
if (hist && hist->is_usable(thd))
{
sel= hist->range_selectivity(field, min_endp, max_endp);
sel= hist->range_selectivity(field, min_endp, max_endp,
avg_frequency / col_non_nulls);
res= col_non_nulls * sel;
}
else
{
Expand All @@ -3938,9 +3941,9 @@ double get_column_range_cardinality(Field *field,
max_mp_pos= 1.0;

sel = (max_mp_pos - min_mp_pos);
res= col_non_nulls * sel;
set_if_bigger(res, avg_frequency);
}
res= col_non_nulls * sel;
set_if_bigger(res, col_stats->get_avg_frequency());
}
else
res= col_non_nulls;
Expand Down Expand Up @@ -4076,7 +4079,8 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,

double Histogram_binary::range_selectivity(Field *field,
key_range *min_endp,
key_range *max_endp)
key_range *max_endp,
double avg_sel)
{
double sel, min_mp_pos, max_mp_pos;
Column_statistics *col_stats= field->read_stats;
Expand Down Expand Up @@ -4105,6 +4109,7 @@ double Histogram_binary::range_selectivity(Field *field,
uint max= find_bucket(max_mp_pos, FALSE);
sel= bucket_sel * (max - min + 1);

set_if_bigger(sel, avg_sel);
return sel;
}

Expand Down
4 changes: 2 additions & 2 deletions sql/sql_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class Histogram_base
double avg_selectivity,
double total_rows)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;
key_range *max_endp, double avg_sel)=0;

/*
Legacy: return the size of the histogram on disk.
Expand Down Expand Up @@ -353,7 +353,7 @@ class Histogram_binary : public Histogram_base
}

double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp, double avg_sel) override;

/*
Estimate selectivity of "col=const" using a histogram
Expand Down

0 comments on commit db8f15b

Please sign in to comment.