From 3f56e1b37c24e99a3ab5d00ff2ddc2836b298bae Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sun, 31 Mar 2024 21:27:33 -0700 Subject: [PATCH] [SPARK-47645][BUILD][CORE][SQL][YARN] Make Spark build with `-release` instead of `-target` ### What changes were proposed in this pull request? This pr makes the following changes to allow Spark to build with `-release` instead of `-target`: 1. Use `MethodHandle` instead of direct calls to `sun.security.action.GetBooleanAction` and `sun.util.calendar.ZoneInfo`, because they are not `exports` APIs. 2. `Channels.newReader` is used instead of ``,StreamDecoder.forDecoder because `StreamDecoder.forDecoder` is also not `exports` APIs. ```java public static Reader newReader(ReadableByteChannel ch, CharsetDecoder dec, int minBufferCap) { Objects.requireNonNull(ch, "ch"); return StreamDecoder.forDecoder(ch, dec.reset(), minBufferCap); } ``` 3. Adjusted the import of `java.io._` in `yarn/Client.scala` to fix the compilation error: ``` Error: ] /home/runner/work/spark/spark/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:20: object FileSystem is not a member of package java.io ``` 4. Replaced `-target` with `-release` in `pom.xml` and `SparkBuild.scala`, and removed the `-source` option, because using `-release` is sufficient. 5. Upgrade `scala-maven-plugin` from 4.7.1 to 4.8.1 to fix the error `[ERROR] -release cannot be less than -target` when executing `build/mvn clean install -DskipTests -Djava.version=21` ### Why are the changes needed? After Scala 2.13.9, the compile option `-target` has been deprecated, it is recommended to use `-release`: - https://github.com/scala/scala/pull/9982 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions ### Was this patch authored or co-authored using generative AI tooling? No Closes #45716 from LuciferYang/scala-maven-plugin-491. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../serializer/SerializationDebugger.scala | 13 ++++++++++--- pom.xml | 16 ++++++---------- project/SparkBuild.scala | 5 ++--- .../org/apache/spark/deploy/yarn/Client.scala | 2 +- .../catalyst/util/SparkDateTimeUtils.scala | 19 +++++++++++++++---- .../catalyst/json/CreateJacksonParser.scala | 7 +++---- .../sql/catalyst/xml/CreateXmlParser.scala | 7 +++---- 7 files changed, 40 insertions(+), 29 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala index 2879124902356..b05babdce1699 100644 --- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala @@ -18,14 +18,16 @@ package org.apache.spark.serializer import java.io._ +import java.lang.invoke.MethodHandles import java.lang.reflect.{Field, Method} -import java.security.AccessController +import java.security.{AccessController, PrivilegedAction} import scala.annotation.tailrec import scala.collection.mutable import scala.util.control.NonFatal import org.apache.spark.internal.Logging +import org.apache.spark.util.SparkClassUtils private[spark] object SerializationDebugger extends Logging { @@ -68,8 +70,13 @@ private[spark] object SerializationDebugger extends Logging { } private[serializer] var enableDebugging: Boolean = { - !AccessController.doPrivileged(new sun.security.action.GetBooleanAction( - "sun.io.serialization.extendedDebugInfo")).booleanValue() + val lookup = MethodHandles.lookup() + val clazz = SparkClassUtils.classForName("sun.security.action.GetBooleanAction") + val constructor = clazz.getConstructor(classOf[String]) + val mh = lookup.unreflectConstructor(constructor) + val action = mh.invoke("sun.io.serialization.extendedDebugInfo") + .asInstanceOf[PrivilegedAction[Boolean]] + !AccessController.doPrivileged(action).booleanValue() } private class SerializationDebugger { diff --git a/pom.xml b/pom.xml index d4e0a2d840de2..f7c104749e0d5 100644 --- a/pom.xml +++ b/pom.xml @@ -114,8 +114,7 @@ UTF-8 UTF-8 17 - ${java.version} - ${java.version} + ${java.version} 3.9.6 3.1.0 spark @@ -175,8 +174,7 @@ 2.13.13 2.13 2.2.0 - - 4.7.1 + 4.8.1 false 2.16.2 @@ -3060,7 +3058,8 @@ -deprecation -feature -explaintypes - -target:17 + -release + 17 -Wconf:cat=deprecation:wv,any:e -Wunused:imports -Wconf:cat=scaladoc:wv @@ -3092,9 +3091,7 @@ -XX:ReservedCodeCacheSize=${CodeCacheSize} - -source - ${java.version} - -target + --release ${java.version} -Xlint:all,-serial,-path,-try @@ -3105,8 +3102,7 @@ maven-compiler-plugin 3.12.1 - ${java.version} - ${java.version} + ${java.version} true true diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index cfe9a9cd224c7..c2b1bc03a967f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -311,18 +311,17 @@ object SparkBuild extends PomBuild { (Compile / javacOptions) ++= Seq( "-encoding", UTF_8.name(), - "-source", javaVersion.value + "--release", javaVersion.value ), // This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since // `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629 // for additional discussion and explanation. (Compile / compile / javacOptions) ++= Seq( - "-target", javaVersion.value, "-Xlint:unchecked" ), (Compile / scalacOptions) ++= Seq( - s"-target:${javaVersion.value}", + "-release", javaVersion.value, "-sourcepath", (ThisBuild / baseDirectory).value.getAbsolutePath // Required for relative source links in scaladoc ), diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 1c762b98c4d04..1d10e85485130 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy.yarn -import java.io.{FileSystem => _, _} +import java.io.{File, FileFilter, FileNotFoundException, FileOutputStream, InterruptedIOException, IOException, OutputStreamWriter} import java.net.{InetAddress, UnknownHostException, URI, URL} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index ed4d68f553f19..8db8c3cd39d74 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.sql.catalyst.util +import java.lang.invoke.{MethodHandles, MethodType} import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime, ZoneId, ZoneOffset} import java.util.TimeZone @@ -24,14 +25,13 @@ import java.util.regex.Pattern import scala.util.control.NonFatal -import sun.util.calendar.ZoneInfo - import org.apache.spark.QueryContext import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros} import org.apache.spark.sql.errors.ExecutionErrors import org.apache.spark.sql.types.{DateType, TimestampType} import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.SparkClassUtils trait SparkDateTimeUtils { @@ -197,6 +197,15 @@ trait SparkDateTimeUtils { rebaseJulianToGregorianDays(julianDays) } + private val zoneInfoClassName = "sun.util.calendar.ZoneInfo" + private val getOffsetsByWallHandle = { + val lookup = MethodHandles.lookup() + val classType = SparkClassUtils.classForName(zoneInfoClassName) + val methodName = "getOffsetsByWall" + val methodType = MethodType.methodType(classOf[Int], classOf[Long], classOf[Array[Int]]) + lookup.findVirtual(classType, methodName, methodType) + } + /** * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given @@ -215,8 +224,10 @@ trait SparkDateTimeUtils { val rebasedDays = rebaseGregorianToJulianDays(days) val localMillis = Math.multiplyExact(rebasedDays, MILLIS_PER_DAY) val timeZoneOffset = TimeZone.getDefault match { - case zoneInfo: ZoneInfo => zoneInfo.getOffsetsByWall(localMillis, null) - case timeZone: TimeZone => timeZone.getOffset(localMillis - timeZone.getRawOffset) + case zoneInfo: TimeZone if zoneInfo.getClass.getName == zoneInfoClassName => + getOffsetsByWallHandle.invoke(zoneInfo, localMillis, null).asInstanceOf[Int] + case timeZone: TimeZone => + timeZone.getOffset(localMillis - timeZone.getRawOffset) } new Date(localMillis - timeZoneOffset) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala index 61ef14a3f103f..ba7b54fc04e84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala @@ -17,14 +17,13 @@ package org.apache.spark.sql.catalyst.json -import java.io.{ByteArrayInputStream, InputStream, InputStreamReader} +import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, Reader} import java.nio.channels.Channels import java.nio.charset.Charset import java.nio.charset.StandardCharsets import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.hadoop.io.Text -import sun.nio.cs.StreamDecoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.unsafe.types.UTF8String @@ -58,13 +57,13 @@ object CreateJacksonParser extends Serializable { // a reader with specific encoding. // The method creates a reader for an array with given encoding and sets size of internal // decoding buffer according to size of input array. - private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = { + private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): Reader = { val bais = new ByteArrayInputStream(in, 0, length) val byteChannel = Channels.newChannel(bais) val decodingBufferSize = Math.min(length, 8192) val decoder = Charset.forName(enc).newDecoder() - StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize) + Channels.newReader(byteChannel, decoder, decodingBufferSize) } def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/CreateXmlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/CreateXmlParser.scala index 553c09da3326b..f4e5bac337996 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/CreateXmlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/CreateXmlParser.scala @@ -17,14 +17,13 @@ package org.apache.spark.sql.catalyst.xml -import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, StringReader} +import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, Reader, StringReader} import java.nio.channels.Channels import java.nio.charset.{Charset, StandardCharsets} import javax.xml.stream.{EventFilter, XMLEventReader, XMLInputFactory, XMLStreamConstants} import javax.xml.stream.events.XMLEvent import org.apache.hadoop.io.Text -import sun.nio.cs.StreamDecoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.unsafe.types.UTF8String @@ -75,13 +74,13 @@ object CreateXmlParser extends Serializable { // a reader with specific encoding. // The method creates a reader for an array with given encoding and sets size of internal // decoding buffer according to size of input array. - private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = { + private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): Reader = { val bais = new ByteArrayInputStream(in, 0, length) val byteChannel = Channels.newChannel(bais) val decodingBufferSize = Math.min(length, 8192) val decoder = Charset.forName(enc).newDecoder() - StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize) + Channels.newReader(byteChannel, decoder, decodingBufferSize) } def text(enc: String, xmlInputFactory: XMLInputFactory, record: Text): XMLEventReader = {