Skip to content
Permalink
Browse files

planner: fix wrong selectivity for inner selection in index join (#10633

)
  • Loading branch information...
eurekaka authored and lamxTyler committed Jun 12, 2019
1 parent 980f5bb commit fbf58fc9b5b4592477b21fc3be48b4885263b90d
@@ -156,9 +156,9 @@ Projection_10 0.00 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test.d
├─TableReader_41 0.00 root data:Selection_40
│ └─Selection_40 0.00 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592), not(isnull(test.dt.dic))
│ └─TableScan_39 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo
└─IndexLookUp_18 3.33 root
└─IndexLookUp_18 0.00 root
├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [eq(test.rr.aid, test.dt.aid) eq(test.rr.dic, test.dt.dic)], keep order:false, stats:pseudo
└─Selection_17 3.33 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─Selection_17 0.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─TableScan_16 10.00 cop table:rr, keep order:false, stats:pseudo
explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr;
id count task operator info
@@ -132,9 +132,9 @@ Projection_13 424.00 root test.gad.id, test.dd.id, test.gad.aid, test.gad.cm, te
├─TableReader_29 424.00 root data:Selection_28
│ └─Selection_28 424.00 cop eq(test.gad.bm, 0), eq(test.gad.pt, "android"), gt(test.gad.t, 1478143908), not(isnull(test.gad.ip))
│ └─TableScan_27 1999.00 cop table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_23 455.80 root
└─IndexLookUp_23 0.23 root
├─IndexScan_20 1.00 cop table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.gad.aid)], keep order:false
└─Selection_22 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
└─Selection_22 0.23 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
└─TableScan_21 1.00 cop table:dd, keep order:false
explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
id count task operator info
@@ -144,9 +144,9 @@ Projection_10 170.34 root test.gad.id, test.sdk.id, test.gad.aid, test.gad.cm, t
├─TableReader_23 170.34 root data:Selection_22
│ └─Selection_22 170.34 cop eq(test.gad.bm, 0), eq(test.gad.dit, "mac"), eq(test.gad.pt, "ios"), gt(test.gad.t, 1477971479), not(isnull(test.gad.dic))
│ └─TableScan_21 1999.00 cop table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_17 509.04 root
└─IndexLookUp_17 0.25 root
├─IndexScan_14 1.00 cop table:sdk, index:aid, dic, range: decided by [eq(test.sdk.aid, test.gad.aid)], keep order:false
└─Selection_16 509.04 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479), not(isnull(test.sdk.mac)), not(isnull(test.sdk.t))
└─Selection_16 0.25 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479), not(isnull(test.sdk.mac)), not(isnull(test.sdk.t))
└─TableScan_15 1.00 cop table:dd, keep order:false
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
id count task operator info
@@ -164,9 +164,9 @@ Projection_10 428.32 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test
├─TableReader_41 428.32 root data:Selection_40
│ └─Selection_40 428.32 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592), not(isnull(test.dt.dic))
│ └─TableScan_39 2000.00 cop table:dt, range:[0,+inf], keep order:false
└─IndexLookUp_18 970.00 root
└─IndexLookUp_18 0.48 root
├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [eq(test.rr.aid, test.dt.aid) eq(test.rr.dic, test.dt.dic)], keep order:false
└─Selection_17 970.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─Selection_17 0.48 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592)
└─TableScan_16 1.00 cop table:rr, keep order:false
explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr;
id count task operator info
@@ -45,10 +45,10 @@ id count task operator info
IndexJoin_12 4166.67 root left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1
├─TableReader_24 3333.33 root data:TableScan_23
│ └─TableScan_23 3333.33 cop table:t1, range:(1,+inf], keep order:false, stats:pseudo
└─IndexLookUp_11 0.00 root
├─Selection_10 0.00 cop not(isnull(test.t2.c1))
└─IndexLookUp_11 9.99 root
├─Selection_10 9.99 cop not(isnull(test.t2.c1))
│ └─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [eq(test.t2.c1, test.t1.c2)], keep order:false, stats:pseudo
└─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo
└─TableScan_9 9.99 cop table:t2, keep order:false, stats:pseudo
explain update t1 set t1.c2 = 2 where t1.c1 = 1;
id count task operator info
Point_Get_1 1.00 root table:t1, handle:1
@@ -72,10 +72,10 @@ ANALYZE TABLE sgc1, sgc2;
EXPLAIN SELECT /*+ TIDB_INLJ(sgc1, sgc2) */ * from sgc1 join sgc2 on sgc1.a=sgc2.a;
id count task operator info
IndexJoin_17 5.00 root inner join, inner:IndexLookUp_16, outer key:test.sgc2.a, inner key:test.sgc1.a
├─IndexLookUp_16 0.00 root
│ ├─Selection_15 0.00 cop not(isnull(test.sgc1.a))
├─IndexLookUp_16 5.00 root
│ ├─Selection_15 5.00 cop not(isnull(test.sgc1.a))
│ │ └─IndexScan_13 5.00 cop table:sgc1, index:a, range: decided by [eq(test.sgc1.a, test.sgc2.a)], keep order:false
│ └─TableScan_14 0.00 cop table:sgc1, keep order:false, stats:pseudo
│ └─TableScan_14 5.00 cop table:sgc1, keep order:false, stats:pseudo
└─TableReader_20 1.00 root data:Selection_19
└─Selection_19 1.00 cop not(isnull(test.sgc2.a))
└─TableScan_18 1.00 cop table:sgc2, range:[-inf,+inf], keep order:false
@@ -86,10 +86,10 @@ Projection_6 5.00 root test.sgc1.j1, test.sgc1.j2, test.sgc1.a, test.sgc1.b, tes
├─TableReader_39 1.00 root data:Selection_38
│ └─Selection_38 1.00 cop not(isnull(test.sgc2.a))
│ └─TableScan_37 1.00 cop table:sgc2, range:[-inf,+inf], keep order:false
└─IndexLookUp_12 0.00 root
├─Selection_11 0.00 cop not(isnull(test.sgc1.a))
└─IndexLookUp_12 5.00 root
├─Selection_11 5.00 cop not(isnull(test.sgc1.a))
│ └─IndexScan_9 5.00 cop table:sgc1, index:a, range: decided by [eq(test.sgc1.a, test.sgc2.a)], keep order:false
└─TableScan_10 0.00 cop table:sgc1, keep order:false, stats:pseudo
└─TableScan_10 5.00 cop table:sgc1, keep order:false, stats:pseudo
DROP TABLE IF EXISTS sgc3;
CREATE TABLE sgc3 (
j JSON,
@@ -7,10 +7,10 @@ analyze table t1, t2;
explain select /*+ TIDB_INLJ(t1, t2) */ * from t1 join t2 on t1.a=t2.a;
id count task operator info
IndexJoin_16 5.00 root inner join, inner:IndexLookUp_15, outer key:test.t2.a, inner key:test.t1.a
├─IndexLookUp_15 0.00 root
│ ├─Selection_14 0.00 cop not(isnull(test.t1.a))
├─IndexLookUp_15 5.00 root
│ ├─Selection_14 5.00 cop not(isnull(test.t1.a))
│ │ └─IndexScan_12 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false
│ └─TableScan_13 0.00 cop table:t1, keep order:false, stats:pseudo
│ └─TableScan_13 5.00 cop table:t1, keep order:false, stats:pseudo
└─TableReader_19 1.00 root data:Selection_18
└─Selection_18 1.00 cop not(isnull(test.t2.a))
└─TableScan_17 1.00 cop table:t2, range:[-inf,+inf], keep order:false
@@ -21,10 +21,10 @@ Projection_6 5.00 root test.t1.a, test.t1.b, test.t2.a, test.t2.b
├─TableReader_30 1.00 root data:Selection_29
│ └─Selection_29 1.00 cop not(isnull(test.t2.a))
│ └─TableScan_28 1.00 cop table:t2, range:[-inf,+inf], keep order:false
└─IndexLookUp_11 0.00 root
├─Selection_10 0.00 cop not(isnull(test.t1.a))
└─IndexLookUp_11 5.00 root
├─Selection_10 5.00 cop not(isnull(test.t1.a))
│ └─IndexScan_8 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false
└─TableScan_9 0.00 cop table:t1, keep order:false, stats:pseudo
└─TableScan_9 5.00 cop table:t1, keep order:false, stats:pseudo
drop table if exists t1, t2;
create table t1(a int not null, b int not null);
create table t2(a int not null, b int not null, key a(a));
@@ -177,12 +177,12 @@ Projection_13 0.00 root test.te.expect_time
│ │ │ └─IndexScan_70 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo
│ │ └─Selection_73 0.00 cop eq(test.tr.brand_identy, 32314), eq(test.tr.domain_type, 2)
│ │ └─TableScan_71 0.00 cop table:tr, keep order:false, stats:pseudo
│ └─IndexLookUp_35 250.00 root
│ └─IndexLookUp_35 0.25 root
│ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [eq(test.te.trade_id, test.tr.id)], keep order:false, stats:pseudo
│ └─Selection_34 250.00 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000)
│ └─Selection_34 0.25 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000)
│ └─TableScan_33 10.00 cop table:te, keep order:false, stats:pseudo
└─IndexReader_91 0.00 root index:Selection_90
└─Selection_90 0.00 cop not(isnull(test.p.relate_id))
└─IndexReader_91 9.99 root index:Selection_90
└─Selection_90 9.99 cop not(isnull(test.p.relate_id))
└─IndexScan_89 10.00 cop table:p, index:relate_id, range: decided by [eq(test.p.relate_id, test.tr.id)], keep order:false, stats:pseudo
desc select 1 as a from dual order by a limit 1;
id count task operator info
@@ -260,9 +260,9 @@ Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd
│ └─TableReader_52 36870000.00 root data:Selection_51
│ └─Selection_51 36870000.00 cop lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000)
│ └─TableScan_50 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─IndexLookUp_28 162945114.27 root
└─IndexLookUp_28 0.54 root
├─IndexScan_25 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false
└─Selection_27 162945114.27 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─Selection_27 0.54 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
└─TableScan_26 1.00 cop table:lineitem, keep order:false
/*
Q4 Order Priority Checking Query
@@ -301,9 +301,9 @@ Sort_10 1.00 root tpch.orders.o_orderpriority:asc
├─TableReader_33 2925937.50 root data:Selection_32
│ └─Selection_32 2925937.50 cop ge(tpch.orders.o_orderdate, 1995-01-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1995-04-01)
│ └─TableScan_31 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─IndexLookUp_20 240004648.80 root
└─IndexLookUp_20 0.80 root
├─IndexScan_17 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false
└─Selection_19 240004648.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate)
└─Selection_19 0.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate)
└─TableScan_18 1.00 cop table:lineitem, keep order:false
/*
Q5 Local Supplier Volume Query
@@ -672,9 +672,9 @@ Projection_17 20.00 root tpch.customer.c_custkey, tpch.customer.c_name, 9_col_0,
│ └─TableReader_48 3017307.69 root data:Selection_47
│ └─Selection_47 3017307.69 cop ge(tpch.orders.o_orderdate, 1993-08-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1993-11-01)
│ └─TableScan_46 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─IndexLookUp_31 73916005.00 root
└─IndexLookUp_31 0.25 root
├─IndexScan_28 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false
└─Selection_30 73916005.00 cop eq(tpch.lineitem.l_returnflag, "R")
└─Selection_30 0.25 cop eq(tpch.lineitem.l_returnflag, "R")
└─TableScan_29 1.00 cop table:lineitem, keep order:false
/*
Q11 Important Stock Identification Query
@@ -1241,9 +1241,9 @@ Projection_25 1.00 root tpch.supplier.s_name, 17_col_0
│ └─IndexLookUp_55 1.00 root
│ ├─IndexScan_53 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.l2.l_orderkey, tpch.l1.l_orderkey)], keep order:false
│ └─TableScan_54 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_39 240004648.80 root
└─IndexLookUp_39 0.80 root
├─IndexScan_36 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.l3.l_orderkey, tpch.l1.l_orderkey)], keep order:false
└─Selection_38 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate)
└─Selection_38 0.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate)
└─TableScan_37 1.00 cop table:lineitem, keep order:false
/*
Q22 Global Sales Opportunity Query
@@ -67,11 +67,11 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) {
"│ └─TableReader_17 9990.00 root data:Selection_16",
"│ └─Selection_16 9990.00 cop not(isnull(test.t1.a))",
"│ └─TableScan_15 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
"└─UnionScan_12 0.00 root not(isnull(test.t2.a))",
" └─IndexLookUp_11 0.00 root ",
" ├─Selection_10 0.00 cop not(isnull(test.t2.a))",
"└─UnionScan_12 9.99 root not(isnull(test.t2.a))",
" └─IndexLookUp_11 9.99 root ",
" ├─Selection_10 9.99 cop not(isnull(test.t2.a))",
" │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo",
" └─TableScan_9 9.99 cop table:t2, keep order:false, stats:pseudo",
))
tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
"2 2 2 2 2",
@@ -85,9 +85,9 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) {
" │ └─TableReader_16 9990.00 root data:Selection_15",
" │ └─Selection_15 9990.00 cop not(isnull(test.t1.a))",
" │ └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
" └─UnionScan_11 0.00 root not(isnull(test.t2.a))",
" └─IndexReader_10 0.00 root index:Selection_9",
" └─Selection_9 0.00 cop not(isnull(test.t2.a))",
" └─UnionScan_11 9.99 root not(isnull(test.t2.a))",
" └─IndexReader_10 9.99 root index:Selection_9",
" └─Selection_9 9.99 cop not(isnull(test.t2.a))",
" └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
))
tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows(
@@ -114,9 +114,9 @@ func (s *testSuite1) TestBatchIndexJoinUnionScan(c *C) {
" │ └─TableReader_22 9990.00 root data:Selection_21",
" │ └─Selection_21 9990.00 cop not(isnull(test.t1.a))",
" │ └─TableScan_20 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
" └─UnionScan_26 0.00 root not(isnull(test.t2.a))",
" └─IndexReader_25 0.00 root index:Selection_24",
" └─Selection_24 0.00 cop not(isnull(test.t2.a))",
" └─UnionScan_26 9.99 root not(isnull(test.t2.a))",
" └─IndexReader_25 9.99 root index:Selection_24",
" └─Selection_24 9.99 cop not(isnull(test.t2.a))",
" └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
))
tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows(
@@ -908,8 +908,8 @@ func (s *testAnalyzeSuite) TestIssue9562(c *C) {
"├─TableReader_12 9980.01 root data:Selection_11",
"│ └─Selection_11 9980.01 cop not(isnull(test.t1.a)), not(isnull(test.t1.c))",
"│ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo",
"└─IndexReader_8 0.00 root index:Selection_7",
" └─Selection_7 0.00 cop not(isnull(test.t2.a)), not(isnull(test.t2.c))",
"└─IndexReader_8 9.98 root index:Selection_7",
" └─Selection_7 9.98 cop not(isnull(test.t2.a)), not(isnull(test.t2.c))",
" └─IndexScan_6 10.00 cop table:t2, index:a, b, c, range: decided by [eq(test.t2.a, test.t1.a) gt(test.t2.b, minus(test.t1.b, 1)) lt(test.t2.b, plus(test.t1.b, 1))], keep order:false, stats:pseudo",
))

@@ -588,8 +588,23 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn

is.initSchema(ds.id, idx, cop.tablePlan != nil)
indexConds, tblConds := splitIndexFilterConditions(filterConds, idx.Columns, ds.tableInfo)
path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64}
is.addPushedDownSelection(cop, ds, math.MaxFloat64, path)
path := &accessPath{
indexFilters: indexConds,
tableFilters: tblConds,
countAfterAccess: rowCount,
}
// Assume equal conditions used by index join and other conditions are independent.
if len(indexConds) > 0 {
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds)
if err != nil {
logutil.Logger(context.Background()).Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
}
path.countAfterIndex = rowCount * selectivity
}
selectivity := ds.stats.RowCount / ds.tableStats.RowCount
finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount)
is.addPushedDownSelection(cop, ds, path, finalStats)
t := finishCopTask(ds.ctx, cop)
reader := t.plan()
return p.constructInnerUnionScan(us, reader)

0 comments on commit fbf58fc

Please sign in to comment.
You can’t perform that action at this time.