Skip to content

Commit

Permalink
[SPARK-25200][YARN] Allow specifying HADOOP_CONF_DIR as spark property
Browse files Browse the repository at this point in the history
We use the InProcessLauncher internally [1] to launch to different YARN
clusters. The clusters might need different configuration files, which
we can't keep apart if the InProcessLauncher discovers config folders
from the same HADOOP_CONF_DIR environment variable.

This change allows us to specify different config directories using
Spark config.

See upstream PR [2] and ticket [3].

[1] https://pl.ntr/1UK
[2] apache#22289
[1] https://issues.apache.org/jira/browse/SPARK-25200

Co-authored-by: Adam Balogh <abalogh@palantir.com>
Co-authored-by: Robert Kruszewski <robertk@palantir.com>
Co-authored-by: Josh Casale <jcasale@palantir.com>
Co-authored-by: Will Raschkowski <wraschkowski@palantir.com>
  • Loading branch information
4 people committed Feb 23, 2021
1 parent b5ff90f commit 4d2d123
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S

if (master.startsWith("yarn")) {
val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
if (!hasHadoopEnv && !Utils.isTesting) {
val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir")
if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) {
error(s"When running with master '$master' " +
"either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")
"either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment, +" +
"or spark.yarn.conf.dir in the spark properties.")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ List<String> buildClassPath(String appClassPath) throws IOException {

addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
addToClassPath(cp, getenv("YARN_CONF_DIR"));
addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir"));
addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH"));
return new ArrayList<>(cp);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -710,11 +710,11 @@ private[spark] class Client(
// SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs so that user's
// environments do not interfere with tests. This allows a special env variable during
// tests so that custom conf dirs can be used by unit tests.
val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++
(if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil)
val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").flatMap(sys.env.get) ++
sparkConf.getOption("spark.yarn.conf.dir")

confDirs.foreach { envKey =>
sys.env.get(envKey).foreach { path =>
confDirs.foreach {
path => {
val dir = new File(path)
if (dir.isDirectory()) {
val files = dir.listFiles()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
val finalState = runSpark(false,
mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass),
appArgs = Seq("key=value", "spark.test.key=testvalue", result.getAbsolutePath()),
extraConf = Map("spark.hadoop.key" -> "value"),
extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> customConf.getAbsolutePath()))
extraConf = Map(
"spark.hadoop.key" -> "value",
"spark.yarn.conf.dir" -> customConf.getAbsolutePath))
checkResult(finalState, result)
}

Expand Down

0 comments on commit 4d2d123

Please sign in to comment.