Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: support using clause in join statement. #3372

Merged
merged 6 commits into from
Jun 12, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ast/dml.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ type Join struct {
Tp JoinType
// On represents join on condition.
On *OnCondition
// Using represents join using clause.
Using []*ColumnName
}

// Accept implements Node Accept interface.
Expand Down
39 changes: 39 additions & 0 deletions executor/join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,45 @@ func (s *testSuite) TestJoin(c *C) {

}

func (s *testSuite) TestUsing(c *C) {
defer func() {
s.cleanEnv(c)
testleak.AfterTest(c)()
}()
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2, t3, t4")
tk.MustExec("create table t1 (a int, c int)")
tk.MustExec("create table t2 (a int, d int)")
tk.MustExec("create table t3 (a int)")
tk.MustExec("create table t4 (a int)")
tk.MustExec("insert t1 values (2, 4), (1, 3)")
tk.MustExec("insert t2 values (2, 5), (3, 6)")
tk.MustExec("insert t3 values (1)")

tk.MustQuery("select * from t1 join t2 using (a)").Check(testkit.Rows("2 4 5"))
tk.MustQuery("select t1.a, t2.a from t1 join t2 using (a)").Check(testkit.Rows("2 2"))

tk.MustQuery("select * from t1 right join t2 using (a) order by a").Check(testkit.Rows("2 5 4", "3 6 <nil>"))
tk.MustQuery("select t1.a, t2.a from t1 right join t2 using (a) order by t2.a").Check(testkit.Rows("2 2", "<nil> 3"))

tk.MustQuery("select * from t1 left join t2 using (a) order by a").Check(testkit.Rows("1 3 <nil>", "2 4 5"))
tk.MustQuery("select t1.a, t2.a from t1 left join t2 using (a) order by t1.a").Check(testkit.Rows("1 <nil>", "2 2"))

tk.MustQuery("select * from t1 join t2 using (a) right join t3 using (a)").Check(testkit.Rows("1 <nil> <nil>"))
tk.MustQuery("select * from t1 join t2 using (a) right join t3 on (t2.a = t3.a)").Check(testkit.Rows("<nil> <nil> <nil> 1"))
tk.MustQuery("select t2.a from t1 join t2 using (a) right join t3 on (t1.a = t3.a)").Check(testkit.Rows("<nil>"))
tk.MustQuery("select t1.a, t2.a, t3.a from t1 join t2 using (a) right join t3 using (a)").Check(testkit.Rows("<nil> <nil> 1"))
tk.MustQuery("select t1.c, t2.d from t1 join t2 using (a) right join t3 using (a)").Check(testkit.Rows("<nil> <nil>"))

tk.MustExec("alter table t1 add column b int default 1 after a")
tk.MustExec("alter table t2 add column b int default 1 after a")
tk.MustQuery("select * from t1 join t2 using (b, a)").Check(testkit.Rows("2 1 4 5"))

tk.MustExec("select * from (t1 join t2 using (a)) join (t3 join t4 using (a)) on (t2.a = t4.a and t1.a = t3.a)")
}

func (s *testSuite) TestMultiJoin(c *C) {
defer func() {
s.cleanEnv(c)
Expand Down
9 changes: 9 additions & 0 deletions expression/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@ func (s *Schema) ColumnsIndices(cols []*Column) (ret []int) {

// MergeSchema will merge two schema into one schema.
func MergeSchema(lSchema, rSchema *Schema) *Schema {
if lSchema == nil && rSchema == nil {
return nil
}
if lSchema == nil {
return rSchema.Clone()
}
if rSchema == nil {
return lSchema.Clone()
}
tmpL := lSchema.Clone()
tmpR := rSchema.Clone()
ret := NewSchema(append(tmpL.Columns, tmpR.Columns...)...)
Expand Down
11 changes: 9 additions & 2 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ import (
/* A dummy token to force the priority of TableRef production in a join. */
%left tableRefPriority
%precedence lowerThanOn
%precedence on
%precedence on using
%right assignmentEq
%left oror or
%left xor
Expand Down Expand Up @@ -4437,12 +4437,19 @@ JoinTable:
on := &ast.OnCondition{Expr: $5.(ast.ExprNode)}
$$ = &ast.Join{Left: $1.(ast.ResultSetNode), Right: $3.(ast.ResultSetNode), Tp: ast.CrossJoin, On: on}
}
| TableRef CrossOpt TableRef "USING" '(' ColumnNameList ')'
{
$$ = &ast.Join{Left: $1.(ast.ResultSetNode), Right: $3.(ast.ResultSetNode), Tp: ast.CrossJoin, Using: $6.([]*ast.ColumnName)}
}
| TableRef JoinType OuterOpt "JOIN" TableRef "ON" Expression
{
on := &ast.OnCondition{Expr: $7.(ast.ExprNode)}
$$ = &ast.Join{Left: $1.(ast.ResultSetNode), Right: $5.(ast.ResultSetNode), Tp: $2.(ast.JoinType), On: on}
}
/* Support Using */
| TableRef JoinType OuterOpt "JOIN" TableRef "USING" '(' ColumnNameList ')'
{
$$ = &ast.Join{Left: $1.(ast.ResultSetNode), Right: $5.(ast.ResultSetNode), Tp: $2.(ast.JoinType), Using: $8.([]*ast.ColumnName)}
}

JoinType:
"LEFT"
Expand Down
3 changes: 3 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ func (s *testParserSuite) TestDMLStmt(c *C) {
{"select * from t1 join t2 left join t3 on t2.id = t3.id", true},
{"select * from t1 right join t2 on t1.id = t2.id left join t3 on t3.id = t2.id", true},
{"select * from t1 right join t2 on t1.id = t2.id left join t3", false},
{"select * from t1 join t2 left join t3 using (id)", true},
{"select * from t1 right join t2 using (id) left join t3 using (id)", true},
{"select * from t1 right join t2 using (id) left join t3", false},

// for admin
{"admin show ddl;", true},
Expand Down
11 changes: 11 additions & 0 deletions plan/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -999,5 +999,16 @@ func (er *expressionRewriter) toColumn(v *ast.ColumnName) {
return
}
}
if join, ok := er.p.(*LogicalJoin); ok && join.coalescedSchema != nil {
column, err := join.coalescedSchema.FindColumn(v)
if err != nil {
er.err = ErrAmbiguous.GenByArgs(v.Name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just errors.Trace(err)?
And it's impossible for coalesced schema to return an ambiguous error.

return
}
if column != nil {
er.ctxStack = append(er.ctxStack, column.Clone())
return
}
}
er.err = errors.Errorf("Unknown column %s %s %s.", v.Schema.L, v.Table.L, v.Name.L)
}
82 changes: 81 additions & 1 deletion plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package plan

import (
"fmt"
"sort"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
Expand Down Expand Up @@ -227,6 +228,15 @@ func (b *planBuilder) buildJoin(join *ast.Join) LogicalPlan {
addChild(joinPlan, rightPlan)
joinPlan.SetSchema(newSchema)

var lCoalesced, rCoalesced *expression.Schema
if left, ok := leftPlan.(*LogicalJoin); ok && left.coalescedSchema != nil {
lCoalesced = left.coalescedSchema
}
if right, ok := rightPlan.(*LogicalJoin); ok && right.coalescedSchema != nil {
rCoalesced = right.coalescedSchema
}
joinPlan.coalescedSchema = expression.MergeSchema(lCoalesced, rCoalesced)

if b.TableHints() != nil {
joinPlan.preferMergeJoin = b.TableHints().ifPreferMergeJoin(leftAlias, rightAlias)
if b.TableHints().ifPreferINLJ(leftAlias) {
Expand All @@ -240,7 +250,12 @@ func (b *planBuilder) buildJoin(join *ast.Join) LogicalPlan {
}
}

if join.On != nil {
if join.Using != nil {
if err := b.buildUsingClause(joinPlan, leftPlan, rightPlan, join); err != nil {
b.err = err
return nil
}
} else if join.On != nil {
onExpr, _, err := b.rewrite(join.On.Expr, joinPlan, nil, false)
if err != nil {
b.err = err
Expand All @@ -266,6 +281,71 @@ func (b *planBuilder) buildJoin(join *ast.Join) LogicalPlan {
return joinPlan
}

func (b *planBuilder) buildUsingClause(p *LogicalJoin, leftPlan, rightPlan LogicalPlan, join *ast.Join) error {
lsc := leftPlan.Schema().Clone()
rsc := rightPlan.Schema().Clone()

schemaCols := make([]*expression.Column, 0, len(lsc.Columns)+len(rsc.Columns)-len(join.Using))
coalescedCols := make([]*expression.Column, 0, len(join.Using))
conds := make([]*expression.ScalarFunction, 0, len(join.Using))

coalesced := make(map[string]bool, len(join.Using))
for _, col := range join.Using {
var (
err error
lc, rc *expression.Column
cond expression.Expression
)

if lc, err = lsc.FindColumn(col); err != nil {
return errors.Trace(err)
}
if rc, err = rsc.FindColumn(col); err != nil {
return errors.Trace(err)
}
coalesced[col.Name.L] = true
if lc == nil || rc == nil {
return ErrUnknownColumn.GenByArgs(col.Name, "from clause")
}

if cond, err = expression.NewFunction(b.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), lc, rc); err != nil {
return errors.Trace(err)
}
conds = append(conds, cond.(*expression.ScalarFunction))
if join.Tp == ast.RightJoin {
schemaCols = append(schemaCols, rc)
coalescedCols = append(coalescedCols, lc)
} else {
schemaCols = append(schemaCols, lc)
coalescedCols = append(coalescedCols, rc)
}
}

sort.Slice(schemaCols, func(i, j int) bool {
return schemaCols[i].Position < schemaCols[j].Position
})

if join.Tp == ast.RightJoin {
lsc, rsc = rsc, lsc
}
for _, col := range lsc.Columns {
if !coalesced[col.ColName.L] {
schemaCols = append(schemaCols, col)
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just schemaCols = append(schemaCols, lsc.Columns) will do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to MySQL

Redundant column elimination and column ordering occurs according to standard SQL, producing this display order:

First, coalesced common columns of the two joined tables, in the order in which they occur in the first table

Second, columns unique to the first table, in order in which they occur in that table

Third, columns unique to the second table, in order in which they occur in that table

And the first table is the right hand side table in right join

So I first found the columns which in both table, then sort them based on their position in source table, then switch the lsc and rsc if there is a right join, finally I append remain columns in lsc and rsc.

for _, col := range rsc.Columns {
if !coalesced[col.ColName.L] {
schemaCols = append(schemaCols, col)
}
}

p.SetSchema(expression.NewSchema(schemaCols...))
p.EqualConditions = append(conds, p.EqualConditions...)
p.coalescedSchema = expression.MergeSchema(p.coalescedSchema, expression.NewSchema(coalescedCols...))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the coalescedSchema empty, why do we need to merge it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

coalescedSchema is not empty in

select * from (t1 join t2 using (a)) join (t3 join t4 using (a)) on (t2.a = t4.a and t1.a = t3.a)

L231-L238 merge the sub join's coalescedSchema.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to copy sub join's coalescedSchema to outer join?
For example.

select * from (t1 join t2 using (a)) join t3 using (b)

The outer join's coalesedSchema should be b rather than a, b.

If t3 doesn't have column a.

select * from (t1 join t2 using (a)) join t3 using (a)

should return error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I see we can access coalesced columns explicit in MySQL:

select t2.a from (t1 join t2 using (a)) join t3 using (a);
select * from (t1 join t2 using (a) join t4 using (a)) join t3 on (t1.a = t2.a);

t2.a is coalesced in t1 join t2 using (a), but we can access it in outer join. So if we don't merge the coalescedSchema into outer join, we must search all sub join's coalescedSchema in toColumn.

Copy link
Member

@coocood coocood Jun 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bobotu
Then just search all sub join's coalescesSchema if we can't find it in normal join schema?
I think this is simpler and clearer.

Copy link
Member

@coocood coocood Jun 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bobotu
OK, I think I misunderstood coalescedSchema, it's the column will not be used.
The name is misleading.
For example, a and b coalesced into a, a should be the coalesced column instead of b.
And the comment for it should be more specific.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'll change it's name and specify the comment.


return nil
}

func (b *planBuilder) buildSelection(p LogicalPlan, where ast.ExprNode, AggMapper map[*ast.AggregateFuncExpr]int) LogicalPlan {
b.optFlag = b.optFlag | flagPredicatePushDown
conditions := splitWhere(where)
Expand Down
3 changes: 3 additions & 0 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ type LogicalJoin struct {
// DefaultValues is only used for outer join, which stands for the default values when the outer table cannot find join partner
// instead of null padding.
DefaultValues []types.Datum

// coalescedSchema is used for select column when specified table name.
coalescedSchema *expression.Schema
}

func (p *LogicalJoin) columnSubstitute(schema *expression.Schema, exprs []expression.Expression) {
Expand Down