forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Expression.scala
240 lines (209 loc) · 9.11 KB
/
Expression.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, Code, CodeGenContext}
import org.apache.spark.sql.catalyst.trees
import org.apache.spark.sql.catalyst.trees.TreeNode
import org.apache.spark.sql.types._
abstract class Expression extends TreeNode[Expression] {
self: Product =>
/**
* Returns true when an expression is a candidate for static evaluation before the query is
* executed.
*
* The following conditions are used to determine suitability for constant folding:
* - A [[Coalesce]] is foldable if all of its children are foldable
* - A [[BinaryExpression]] is foldable if its both left and right child are foldable
* - A [[Not]], [[IsNull]], or [[IsNotNull]] is foldable if its child is foldable
* - A [[Literal]] is foldable
* - A [[Cast]] or [[UnaryMinus]] is foldable if its child is foldable
*/
def foldable: Boolean = false
/**
* Returns true when the current expression always return the same result for fixed input values.
*/
// TODO: Need to define explicit input values vs implicit input values.
def deterministic: Boolean = true
def nullable: Boolean
def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))
/** Returns the result of evaluating this expression on a given input Row */
def eval(input: Row = null): Any
/**
* Returns an [[GeneratedExpressionCode]], which contains Java source code that
* can be used to generate the result of evaluating the expression on an input row.
*
* @param ctx a [[CodeGenContext]]
* @return [[GeneratedExpressionCode]]
*/
def gen(ctx: CodeGenContext): GeneratedExpressionCode = {
val nullTerm = ctx.freshName("nullTerm")
val primitiveTerm = ctx.freshName("primitiveTerm")
val objectTerm = ctx.freshName("objectTerm")
val ve = GeneratedExpressionCode("", nullTerm, primitiveTerm, objectTerm)
ve.code = genCode(ctx, ve)
ve
}
/**
* Returns Java source code for this expression.
*
* @param ctx a [[CodeGenContext]]
* @param ev an [[GeneratedExpressionCode]] with unique terms.
* @return Java source code
*/
def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): Code = {
val e = this.asInstanceOf[Expression]
ctx.references += e
s"""
/* expression: ${this} */
Object ${ev.objectTerm} = expressions[${ctx.references.size - 1}].eval(i);
boolean ${ev.nullTerm} = ${ev.objectTerm} == null;
${ctx.primitiveType(e.dataType)} ${ev.primitiveTerm} =
${ctx.defaultValue(e.dataType)};
if (!${ev.nullTerm}) ${ev.primitiveTerm} =
(${ctx.boxedType(e.dataType)})${ev.objectTerm};
"""
}
/**
* Returns `true` if this expression and all its children have been resolved to a specific schema
* and input data types checking passed, and `false` if it still contains any unresolved
* placeholders or has data types mismatch.
* Implementations of expressions should override this if the resolution of this type of
* expression involves more than just the resolution of its children and type checking.
*/
lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess
/**
* Returns the [[DataType]] of the result of evaluating this expression. It is
* invalid to query the dataType of an unresolved expression (i.e., when `resolved` == false).
*/
def dataType: DataType
/**
* Returns true if all the children of this expression have been resolved to a specific schema
* and false if any still contains any unresolved placeholders.
*/
def childrenResolved: Boolean = children.forall(_.resolved)
/**
* Returns a string representation of this expression that does not have developer centric
* debugging information like the expression id.
*/
def prettyString: String = {
transform {
case a: AttributeReference => PrettyAttribute(a.name)
case u: UnresolvedAttribute => PrettyAttribute(u.name)
}.toString
}
/**
* Returns true when two expressions will always compute the same result, even if they differ
* cosmetically (i.e. capitalization of names in attributes may be different).
*/
def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && {
val elements1 = this.productIterator.toSeq
val elements2 = other.asInstanceOf[Product].productIterator.toSeq
elements1.length == elements2.length && elements1.zip(elements2).forall {
case (e1: Expression, e2: Expression) => e1 semanticEquals e2
case (i1, i2) => i1 == i2
}
}
/**
* Checks the input data types, returns `TypeCheckResult.success` if it's valid,
* or returns a `TypeCheckResult` with an error message if invalid.
* Note: it's not valid to call this method until `childrenResolved == true`
* TODO: we should remove the default implementation and implement it for all
* expressions with proper error message.
*/
def checkInputDataTypes(): TypeCheckResult = TypeCheckResult.TypeCheckSuccess
}
abstract class BinaryExpression extends Expression with trees.BinaryNode[Expression] {
self: Product =>
def symbol: String = sys.error(s"BinaryExpressions must override either toString or symbol")
override def foldable: Boolean = left.foldable && right.foldable
override def nullable: Boolean = left.nullable || right.nullable
override def toString: String = s"($left $symbol $right)"
/**
* Short hand for generating binary evaluation code, which depends on two sub-evaluations of
* the same type. If either of the sub-expressions is null, the result of this computation
* is assumed to be null.
*
* @param f a function from two primitive term names to a tree that evaluates them.
*/
def evaluate(ctx: CodeGenContext,
ev: GeneratedExpressionCode,
f: (String, String) => String): String = {
// TODO: Right now some timestamp tests fail if we enforce this...
if (left.dataType != right.dataType) {
// log.warn(s"${left.dataType} != ${right.dataType}")
}
val eval1 = left.gen(ctx)
val eval2 = right.gen(ctx)
val resultCode = f(eval1.primitiveTerm, eval2.primitiveTerm)
s"""
${eval1.code}
boolean ${ev.nullTerm} = ${eval1.nullTerm};
${ctx.primitiveType(dataType)} ${ev.primitiveTerm} = ${ctx.defaultValue(dataType)};
if (!${ev.nullTerm}) {
${eval2.code}
if(!${eval2.nullTerm}) {
${ev.primitiveTerm} = (${ctx.primitiveType(dataType)})($resultCode);
} else {
${ev.nullTerm} = true;
}
}
"""
}
}
abstract class LeafExpression extends Expression with trees.LeafNode[Expression] {
self: Product =>
}
abstract class UnaryExpression extends Expression with trees.UnaryNode[Expression] {
self: Product =>
def castOrNull(ctx: CodeGenContext,
ev: GeneratedExpressionCode,
f: String => String): String = {
val eval = child.gen(ctx)
eval.code + s"""
boolean ${ev.nullTerm} = ${eval.nullTerm};
${ctx.primitiveType(dataType)} ${ev.primitiveTerm} = ${ctx.defaultValue(dataType)};
if (!${ev.nullTerm}) {
${ev.primitiveTerm} = ${f(eval.primitiveTerm)};
}
"""
}
}
// TODO Semantically we probably not need GroupExpression
// All we need is holding the Seq[Expression], and ONLY used in doing the
// expressions transformation correctly. Probably will be removed since it's
// not like a real expressions.
case class GroupExpression(children: Seq[Expression]) extends Expression {
self: Product =>
override def eval(input: Row): Any = throw new UnsupportedOperationException
override def nullable: Boolean = false
override def foldable: Boolean = false
override def dataType: DataType = throw new UnsupportedOperationException
}
/**
* Expressions that require a specific `DataType` as input should implement this trait
* so that the proper type conversions can be performed in the analyzer.
*/
trait ExpectsInputTypes {
self: Expression =>
def expectedChildTypes: Seq[DataType]
override def checkInputDataTypes(): TypeCheckResult = {
// We will always do type casting for `ExpectsInputTypes` in `HiveTypeCoercion`,
// so type mismatch error won't be reported here, but for underling `Cast`s.
TypeCheckResult.TypeCheckSuccess
}
}