Skip to content

Commit

Permalink
Add flag to turn off locality for shuffle deps
Browse files Browse the repository at this point in the history
  • Loading branch information
shivaram committed Jun 5, 2015
1 parent 6cfae98 commit e5d56bd
Showing 1 changed file with 4 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ class DAGScheduler(
private[scheduler] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
taskScheduler.setDAGScheduler(this)

// Flag to control if reduce tasks are assigned preferred locations
private val shuffleLocalityEnabled = sc.getConf.getBoolean("spark.shuffle.locality.enabled", true)
// Number of map, reduce tasks above which we do not assign preferred locations
// based on map output sizes. We limit the size of jobs for which assign preferred locations
// as sorting the locations by size becomes expensive.
Expand Down Expand Up @@ -1410,7 +1412,8 @@ class DAGScheduler(
case s: ShuffleDependency[_, _, _] =>
// For shuffle dependencies, pick the 5 locations with the largest map outputs as preferred
// locations
if (rdd.partitions.size < SHUFFLE_PREF_REDUCE_THRESHOLD &&
if (shuffleLocalityEnabled &&
rdd.partitions.size < SHUFFLE_PREF_REDUCE_THRESHOLD &&
s.rdd.partitions.size < SHUFFLE_PREF_MAP_THRESHOLD) {
// Get the preferred map output locations for this reducer
val topLocsForReducer = mapOutputTracker.getLocationsWithLargestOutputs(s.shuffleId,
Expand Down

0 comments on commit e5d56bd

Please sign in to comment.