diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a3724a46585b6..81425b84ce3aa 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -354,6 +354,7 @@ object Assembly { object PySparkAssembly { import sbtassembly.Plugin._ import AssemblyKeys._ + import java.util.zip.{ZipOutputStream, ZipEntry} lazy val settings = Seq( unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" }, @@ -365,12 +366,7 @@ object PySparkAssembly { val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") zipFile.delete() - def entries(f: File): List[File] = - f :: (if (f.isDirectory) IO.listFiles(f).toList.flatMap(entries(_)) else Nil) - val sources = entries(src).map { d => - (d, d.getAbsolutePath.substring(src.getParent.length +1)) - } - IO.zip(sources, zipFile) + zipRecursive(src, zipFile) val dst = new File(outDir, "pyspark") if (!dst.isDirectory()) { @@ -380,6 +376,34 @@ object PySparkAssembly { } ) + private def zipRecursive(source: File, destZipFile: File) = { + val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile)) + addFilesToZipStream("", source, destOutput) + destOutput.flush() + destOutput.close() + } + + private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = { + if (source.isDirectory()) { + output.putNextEntry(new ZipEntry(parent + source.getName())) + for (file <- source.listFiles()) { + addFilesToZipStream(parent + source.getName() + File.separator, file, output) + } + } else { + val in = new FileInputStream(source) + output.putNextEntry(new ZipEntry(parent + source.getName())) + val buf = new Array[Byte](8192) + var n = 0 + while (n != -1) { + n = in.read(buf) + if (n != -1) { + output.write(buf, 0, n) + } + } + in.close() + } + } + private def copy(src: File, dst: File): Seq[File] = { src.listFiles().flatMap { f => val child = new File(dst, f.getName())