Skip to content

Commit

Permalink
Make FileOutputCommiter as default (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
ajayborra authored and tovbinm committed Aug 25, 2018
1 parent cf812f7 commit e515e32
Show file tree
Hide file tree
Showing 7 changed files with 3 additions and 149 deletions.
2 changes: 0 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,8 @@ configure(allProjs) {
ignoreFailures = true
include '**/*.java', '**/*.scala'
exclude '**/org/apache/spark/ml/SparkDefaultParamsReadWrite.scala',
'**/com/salesforce/op/utils/io/DirectMapreduceOutputCommitter.scala',
'**/com/salesforce/op/test/TestSparkContext.scala',
'**/com/salesforce/op/test/TempDirectoryTest.scala',
'**/com/salesforce/op/utils/io/DirectOutputCommitter.scala',
'**/com/salesforce/op/stages/impl/tuning/OpCrossValidation.scala',
'**/com/salesforce/op/stages/impl/tuning/OpTrainValidationSplit.scala',
'**/com/salesforce/op/test/*.java',
Expand Down
3 changes: 1 addition & 2 deletions gradle/spark.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ task sparkSubmit(type: Exec, dependsOn: copyLog4jToSpark) {
"spark.hadoop.avro.output.codec=deflate",
"spark.hadoop.avro.mapred.deflate.level=6",
"spark.hadoop.validateOutputSpecs=false",
"spark.hadoop.mapred.output.committer.class=com.salesforce.op.utils.io.DirectOutputCommitter",
"spark.hadoop.spark.sql.sources.outputCommitterClass=com.salesforce.op.utils.io.DirectMapreduceOutputCommitter"
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2"
].collect { ["--conf", it] }.flatten()

environment SPARK_HOME: sparkHome
Expand Down
3 changes: 1 addition & 2 deletions helloworld/gradle/spark.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ task sparkSubmit(type: Exec, dependsOn: copyLog4jToSpark) {
"spark.hadoop.avro.output.codec=deflate",
"spark.hadoop.avro.mapred.deflate.level=6",
"spark.hadoop.validateOutputSpecs=false",
"spark.hadoop.mapred.output.committer.class=com.salesforce.op.utils.io.DirectOutputCommitter",
"spark.hadoop.spark.sql.sources.outputCommitterClass=com.salesforce.op.utils.io.DirectMapreduceOutputCommitter"
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2"
].collect { ["--conf", it] }.flatten()

environment SPARK_HOME: sparkHome
Expand Down
3 changes: 1 addition & 2 deletions templates/simple/spark.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ task sparkSubmit(dependsOn: copyLog4jToSparkNoInstall) {
"spark.hadoop.avro.output.codec=deflate",
"spark.hadoop.avro.mapred.deflate.level=6",
"spark.hadoop.validateOutputSpecs=false",
"spark.hadoop.mapred.output.committer.class=com.salesforce.op.utils.io.DirectOutputCommitter",
"spark.hadoop.spark.sql.sources.outputCommitterClass=com.salesforce.op.utils.io.DirectMapreduceOutputCommitter"
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2"
].collect { ["--conf", it] }.flatten()

def hadoopConfDir = System.env.HOME + "/.fake_hadoop_conf"
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ package com.salesforce.op.utils.io.avro

import java.net.URI

import com.salesforce.op.utils.io.DirectOutputCommitter
import com.salesforce.op.utils.spark.RichRDD._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
Expand Down Expand Up @@ -156,7 +155,6 @@ object AvroInOut {

private def createJobConfFromContext(schema: String)(implicit sc: SparkSession) = {
val jobConf = new JobConf(sc.sparkContext.hadoopConfiguration)
jobConf.setOutputCommitter(classOf[DirectOutputCommitter])
AvroJob.setOutputSchema(jobConf, new Schema.Parser().parse(schema))
jobConf
}
Expand Down

0 comments on commit e515e32

Please sign in to comment.