Skip to content

Commit

Permalink
Add an analysis rule to convert aggregate function to the new version.
Browse files Browse the repository at this point in the history
  • Loading branch information
yhuai committed Jul 13, 2015
1 parent 5c00f3f commit b7720ba
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.aggregate2.{AggregateExpression2, AggregateFunction2}
import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
Expand Down Expand Up @@ -483,11 +482,7 @@ class Analyzer(
q transformExpressions {
case u @ UnresolvedFunction(name, children) =>
withPosition(u) {
registry.lookupFunction(name, children) match {
case agg2: AggregateFunction2 =>
AggregateExpression2(agg2, aggregate2.Complete, false)
case other => other
}
registry.lookupFunction(name, children)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ object FunctionRegistry {

// aggregate functions
expression[Average]("avg"),
expression[aggregate2.Average]("avg2"),
expression[Count]("count"),
expression[First]("first"),
expression[Last]("last"),
Expand Down
3 changes: 3 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import java.beans.Introspector
import java.util.Properties
import java.util.concurrent.atomic.AtomicReference

import org.apache.spark.sql.execution.aggregate2.ConvertAggregateFunction

import scala.collection.JavaConversions._
import scala.collection.immutable
import scala.language.implicitConversions
Expand Down Expand Up @@ -148,6 +150,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
override val extendedResolutionRules =
ExtractPythonUDFs ::
sources.PreInsertCastAndRename ::
ConvertAggregateFunction(self) ::
Nil

override val extendedCheckRules = Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
* limitations under the License.
*/

package org.apache.spark.sql.execution
package org.apache.spark.sql.execution.aggregate2

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.errors._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate2._
import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, AllTuples, UnspecifiedDistribution, Distribution}
import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, UnspecifiedDistribution}
import org.apache.spark.sql.execution.{SparkPlan, UnaryNode}
import org.apache.spark.sql.types.NullType

case class Aggregate2Sort(
Expand Down Expand Up @@ -71,7 +72,7 @@ case class Aggregate2Sort(
case PartialMerge | Final => func
}
bufferOffset = aggregateExpressions(i).mode match {
case Partial | PartialMerge => bufferOffset + func.bufferValueDataTypes.length
case Partial | PartialMerge => bufferOffset + func.bufferSchema.length
case Final | Complete => bufferOffset + 1
}
i += 1
Expand All @@ -88,7 +89,7 @@ case class Aggregate2Sort(
var i = 0
var size = 0
while (i < aggregateFunctions.length) {
size += aggregateFunctions(i).bufferValueDataTypes.length
size += aggregateFunctions(i).bufferSchema.length
i += 1
}
if (preShuffle) {
Expand Down Expand Up @@ -132,7 +133,7 @@ case class Aggregate2Sort(

lazy val updateProjection = {
val bufferSchema = aggregateFunctions.flatMap {
case ae: AlgebraicAggregate => ae.bufferSchema
case ae: AlgebraicAggregate => ae.bufferAttributes
}
val updateExpressions = aggregateFunctions.flatMap {
case ae: AlgebraicAggregate => ae.updateExpressions
Expand All @@ -145,7 +146,7 @@ case class Aggregate2Sort(
val mergeProjection = {
val bufferSchemata =
offsetAttributes ++ aggregateFunctions.flatMap {
case ae: AlgebraicAggregate => ae.bufferSchema
case ae: AlgebraicAggregate => ae.bufferAttributes
} ++ offsetAttributes ++ aggregateFunctions.flatMap {
case ae: AlgebraicAggregate => ae.rightBufferSchema
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.aggregate2

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.expressions.{Average => Average1}
import org.apache.spark.sql.catalyst.expressions.aggregate2.{Average => Average2, AggregateExpression2, Complete}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule

case class ConvertAggregateFunction(context: SQLContext) extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case p: LogicalPlan if !p.childrenResolved => p

case p if context.conf.useSqlAggregate2 => p.transformExpressionsUp {
case Average1(child) => AggregateExpression2(Average2(child), Complete, false)
}
}
}

0 comments on commit b7720ba

Please sign in to comment.