From c3d8352ca1a59ce5cc37840919c0e799f5150efa Mon Sep 17 00:00:00 2001 From: Norbert Schultz Date: Wed, 20 Jan 2021 09:39:13 +0900 Subject: [PATCH] [SPARK-34115][CORE] Check SPARK_TESTING as lazy val to avoid slowdown ### What changes were proposed in this pull request? Check SPARK_TESTING as lazy val to avoid slow down when there are many environment variables ### Why are the changes needed? If there are many environment variables, sys.env slows is very slow. As Utils.isTesting is called very often during Dataframe-Optimization, this can slow down evaluation very much. An example for triggering the problem can be found in the bug ticket https://issues.apache.org/jira/browse/SPARK-34115 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? With the example provided in the ticket. Closes #31244 from nob13/bug/34115. Lead-authored-by: Norbert Schultz Co-authored-by: Norbert Schultz Signed-off-by: HyukjinKwon --- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 5e68dcd9df7fc..13fcfe4aed023 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1939,7 +1939,9 @@ private[spark] object Utils extends Logging { * Indicates whether Spark is currently running unit tests. */ def isTesting: Boolean = { - sys.env.contains("SPARK_TESTING") || sys.props.contains(IS_TESTING.key) + // Scala's `sys.env` creates a ton of garbage by constructing Scala immutable maps, so + // we directly use the Java APIs instead. + System.getenv("SPARK_TESTING") != null || System.getProperty(IS_TESTING.key) != null } /**