Skip to content

Commit

Permalink
added documentation on repartitions
Browse files Browse the repository at this point in the history
  • Loading branch information
brkyvz committed Apr 29, 2015
1 parent 5807e35 commit b1e76dd
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,12 @@ case class Distinct(child: LogicalPlan) extends UnaryNode {
override def output: Seq[Attribute] = child.output
}

/**
* Return a new RDD that has exactly `numPartitions` partitions. Differs from
* [[RepartitionByExpression]] as this method is called directly by DataFrame's, because the user
* asked for `coalesce` or `repartition`. [[RepartitionByExpression]] is used when the consumer
* of the output requires some specific ordering or distribution of the data.
*/
case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
extends UnaryNode {
override def output: Seq[Attribute] = child.output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,11 @@ abstract class RedistributeData extends UnaryNode {
case class SortPartitions(sortExpressions: Seq[SortOrder], child: LogicalPlan)
extends RedistributeData

/**
* This method repartitions data using [[Expression]]s, and receives information about the
* number of partitions during execution. Used when a specific ordering or distribution is
* expected by the consumer of the query result. Use [[Repartition]] for RDD-like
* `coalesce` and `repartition`.
*/
case class RepartitionByExpression(partitionExpressions: Seq[Expression], child: LogicalPlan)
extends RedistributeData

0 comments on commit b1e76dd

Please sign in to comment.