From 1aea13fe62b1a062cb0bf4aa1a30ee1d0dd525bb Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 28 Jun 2022 12:28:23 +0300 Subject: [PATCH 1/4] Load neighbours with the fss hash except dublicated neighours. Rewrite test for look-a-like functional. Current tests contain correlation columns and queries have more nodes and description features. Add aqo_k as custom parameter to define few number of features for prediction. Its default value is 3. Queries can contain a larger number of features than 3 especially generic queries. Also add predict_a_few_neibours parameter for switch avalable to predict a few neibors than 3. It is done for not to change the previous logic of the code --- aqo.c | 26 +- aqo.h | 1 + cardinality_estimation.c | 2 +- expected/look_a_like.out | 517 ++++++++++++++++++++++++++++----------- machine_learning.c | 5 +- sql/look_a_like.sql | 110 ++++++--- storage.c | 78 ++++-- storage.h | 2 +- 8 files changed, 543 insertions(+), 198 deletions(-) diff --git a/aqo.c b/aqo.c index a80d0a0f..63e46a86 100644 --- a/aqo.c +++ b/aqo.c @@ -34,6 +34,7 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; /* * Show special info in EXPLAIN mode. @@ -71,7 +72,7 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* @@ -293,6 +294,29 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.k_neighbors_threshold", + "Set the threshold of number of neighbors for predicting.", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Make prediction with less neighbors than we should have.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 0a373147..9418646c 100644 --- a/aqo.h +++ b/aqo.h @@ -217,6 +217,7 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index aca17f1e..f93e0905 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -93,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) result = -1; else { diff --git a/expected/look_a_like.out b/expected/look_a_like.out index ecd73fb4..b0d3047c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,14 +2,17 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -25,207 +28,425 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; - result ------------------------------------------------- - Seq Scan on public.a (actual rows=100 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +-------------------------------------------------------- + Nested Loop (actual rows=10000 loops=1) AQO not used - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(8 rows) +(16 rows) SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=100 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.a (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(17 rows) --- query, executed above. SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. - result --------------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) AQO not used - Output: a.x, sum(a.x) - Group Key: a.x - -> Nested Loop (actual rows=10000 loops=1) - AQO: rows=10000, error=0% - Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=0 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 = 5)) + Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN - JOINS: 1 -(20 rows) + JOINS: 0 +(17 rows) --- cardinality 100 in the first Seq Scan on a +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; - result ------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) + AQO: rows=50000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO: rows=500, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 500 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=70000 loops=1) AQO not used - Output: x, sum(x) - Group Key: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=700 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=700 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(17 rows) --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------- - HashAggregate (actual rows=10 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=40000 loops=1) AQO not used - Output: x - Group Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO not used - Output: x - Filter: (a.x < 10) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(17 rows) --- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------------- - Merge Join (actual rows=100000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x, b.y - Merge Cond: (a.x = b.y) - -> Sort (actual rows=1000 loops=1) - Output: a.x - Sort Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x - Filter: (a.x < 10) - -> Sort (actual rows=99901 loops=1) - Output: b.y - Sort Key: b.y - -> Seq Scan on public.b (actual rows=1000 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used - Output: b.y + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(17 rows) --- cardinality 100 in Seq Scan on a and Seq Scan on b +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - HashAggregate (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=40000 loops=1) + AQO: rows=50000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO: rows=500, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 600 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) AQO not used - Output: a.x - Group Key: a.x - -> Nested Loop (actual rows=0 loops=1) + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) AQO not used - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 - -> Seq Scan on public.a (never executed) - AQO: rows=1000 - Output: a.x - Filter: (a.x < 10) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(19 rows) +(18 rows) --- --- TODO: --- Not executed case. What could we do better here? --- +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) + AQO: rows=2, error=0% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) AQO not used - Output: a.x, b.y - Hash Cond: (a.x = b.y) - -> Seq Scan on public.a (actual rows=1 loops=1) - AQO: rows=1000, error=100% - Output: a.x - Filter: (a.x < 10) - -> Hash (actual rows=0 loops=1) - Output: b.y - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO: rows=1, error=100% - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 (18 rows) -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.c b/machine_learning.c index 7138db38..d4f5cbee 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -74,7 +74,7 @@ fs_distance(double *a, double *b, int len) res += (a[i] - b[i]) * (a[i] - b[i]); } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } @@ -148,6 +148,9 @@ OkNNr_predict(OkNNrdata *data, double *features) Assert(data != NULL); + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index be71feff..5a348cd5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -2,15 +2,20 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -26,55 +31,96 @@ $$ LANGUAGE PLPGSQL; -- no one predicted rows. we use knowledge cardinalities of the query -- in the next queries with the same fss_hash + SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the --- query, executed above. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; --- cardinality 1000 in Seq Scan on a +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- --- TODO: --- Not executed case. What could we do better here? --- SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index fcbe5569..9b92088e 100644 --- a/storage.c +++ b/storage.c @@ -90,6 +90,8 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); +static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); @@ -1409,25 +1411,73 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +bool +neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +{ + int i; + for (i=0; icols == temp_data->cols); Assert(data->matrix); - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) + if (features != NULL) { - int i; + int old_rows = data->rows; + int k = old_rows; - for (i = 0; i < data->rows; i++) + if (data->cols > 0) { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + int i; + + for (i = 0; i < data->rows; i++) + { + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + } + else + { + if (data->rows > 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } } @@ -1503,7 +1553,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch) + bool wideSearch, double *features) { DataEntry *entry; bool found; @@ -1538,7 +1588,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, } temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, features); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1576,7 +1626,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, else list_free(tmp_oids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, NULL); found = true; } } diff --git a/storage.h b/storage.h index 94891c5d..0e7745e1 100644 --- a/storage.h +++ b/storage.h @@ -101,7 +101,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); + bool wideSearch, double *features); extern void aqo_data_flush(void); extern void aqo_data_load(void); From 09f4d126b11dd89f6b12ec47267880769b1e6ffe Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 17:01:58 +0300 Subject: [PATCH 2/4] Add disabled nestloop and mergejoin parameters to stabilize look-a-like test, besides add two additional cases where look-a-like should not be applied. --- aqo.c | 2 +- expected/look_a_like.out | 400 ++++++++++++++++++++++++--------------- sql/look_a_like.sql | 56 ++++-- storage.c | 2 +- 4 files changed, 290 insertions(+), 170 deletions(-) diff --git a/aqo.c b/aqo.c index 63e46a86..b6a9a9ca 100644 --- a/aqo.c +++ b/aqo.c @@ -314,7 +314,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL); prev_shmem_startup_hook = shmem_startup_hook; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index b0d3047c..5910c8ac 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping @@ -29,7 +31,7 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result -------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) @@ -52,49 +54,51 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ - Hash Join (actual rows=50000 loops=1) + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(17 rows) +(19 rows) SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1 loops=1) - AQO: rows=1000, error=100% + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=0 loops=1) + -> Hash (actual rows=500 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=0 loops=1) + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 = 5)) - Rows Removed by Filter: 1000 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 @@ -104,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=50000 loops=1) @@ -129,7 +133,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=70000 loops=1) @@ -154,7 +158,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=40000 loops=1) @@ -179,7 +183,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=50000 loops=1) @@ -205,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ Hash Join (actual rows=40000 loops=1) @@ -230,216 +234,315 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) - AQO: rows=2, error=0% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Sort (actual rows=200000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Sort (actual rows=100000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Sort (actual rows=100000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Sort (actual rows=140000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Sort Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Sort (actual rows=70000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + Sort Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Left Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) - AQO: rows=700, error=0% + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + -> Hash (never executed) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (never executed) + AQO: rows=1000 Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) SELECT 1 FROM aqo_reset(); ?column? @@ -449,4 +552,5 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a348cd5..5dc85b7b 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,8 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; - +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; @@ -35,92 +36,107 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 9b92088e..32446d6c 100644 --- a/storage.c +++ b/storage.c @@ -110,7 +110,7 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - return load_aqo_data(fs, fss, data, reloids, false); + return load_aqo_data(fs, fss, data, reloids, false, NULL); } bool From 3c7f20f5e3627c6bdd8c97879489fe3ca654ee19 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 18:24:04 +0300 Subject: [PATCH 3/4] Add delete table c after finished look-a-like test. --- expected/look_a_like.out | 1 + sql/look_a_like.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 5910c8ac..8b2e315c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -552,5 +552,6 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5dc85b7b..5a41c24a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -138,5 +138,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; From 4c7390623b090c7b1dddca43ee7e074d8fda0b43 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 24 Jan 2023 20:39:04 +0300 Subject: [PATCH 4/4] Rename guc, which connected with setting minimum number of neighbours for predicting and add more understandable explanations of guc. --- aqo.c | 6 +++--- expected/look_a_like.out | 2 +- sql/look_a_like.sql | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index b6a9a9ca..6d125666 100644 --- a/aqo.c +++ b/aqo.c @@ -294,8 +294,8 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.k_neighbors_threshold", - "Set the threshold of number of neighbors for predicting.", + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", NULL, &aqo_k, 3, @@ -307,7 +307,7 @@ _PG_init(void) NULL); DefineCustomBoolVariable("aqo.predict_with_few_neighbors", - "Make prediction with less neighbors than we should have.", + "Establish the ability to make predictions with fewer neighbors than were found.", NULL, &aqo_predict_with_few_neighbors, true, diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 8b2e315c..faa9b0fd 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a41c24a..9705bf1a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off';