diff --git a/bin/run-example b/bin/run-example index e1b0d5789bed6..dd0e3c4120260 100755 --- a/bin/run-example +++ b/bin/run-example @@ -21,56 +21,5 @@ if [ -z "${SPARK_HOME}" ]; then export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)" fi -EXAMPLES_DIR="${SPARK_HOME}"/examples - -. "${SPARK_HOME}"/bin/load-spark-env.sh - -if [ -n "$1" ]; then - EXAMPLE_CLASS="$1" - shift -else - echo "Usage: ./bin/run-example [example-args]" 1>&2 - echo " - set MASTER=XX to use a specific master" 1>&2 - echo " - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2 - echo " (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2 - exit 1 -fi - -if [ -f "${SPARK_HOME}/RELEASE" ]; then - JAR_PATH="${SPARK_HOME}/lib" -else - JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}" -fi - -JAR_COUNT=0 - -for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do - if [[ ! -e "$f" ]]; then - echo "Failed to find Spark examples assembly in ${SPARK_HOME}/lib or ${SPARK_HOME}/examples/target" 1>&2 - echo "You need to build Spark before running this program" 1>&2 - exit 1 - fi - SPARK_EXAMPLES_JAR="$f" - JAR_COUNT=$((JAR_COUNT+1)) -done - -if [ "$JAR_COUNT" -gt "1" ]; then - echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2 - ls "${JAR_PATH}"/spark-examples-*hadoop*.jar 1>&2 - echo "Please remove all but one jar." 1>&2 - exit 1 -fi - -export SPARK_EXAMPLES_JAR - -EXAMPLE_MASTER=${MASTER:-"local[*]"} - -if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then - EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS" -fi - -exec "${SPARK_HOME}"/bin/spark-submit \ - --master $EXAMPLE_MASTER \ - --class $EXAMPLE_CLASS \ - "$SPARK_EXAMPLES_JAR" \ - "$@" +export _SPARK_CMD_USAGE="Usage: ./bin/run-example [options] example-class [example args]" +exec "${SPARK_HOME}"/bin/spark-submit run-example "$@" diff --git a/bin/run-example.cmd b/bin/run-example.cmd index 64f6bc3728d07..f9b786e92b823 100644 --- a/bin/run-example.cmd +++ b/bin/run-example.cmd @@ -17,7 +17,6 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -rem This is the entry point for running a Spark example. To avoid polluting -rem the environment, it just launches a new cmd to do the real work. - -cmd /V /E /C "%~dp0run-example2.cmd" %* +set SPARK_HOME=%~dp0.. +set _SPARK_CMD_USAGE=Usage: ./bin/run-example [options] example-class [example args] +cmd /V /E /C "%~dp0spark-submit.cmd" run-example %* diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd deleted file mode 100644 index fada43581d184..0000000000000 --- a/bin/run-example2.cmd +++ /dev/null @@ -1,85 +0,0 @@ -@echo off - -rem -rem Licensed to the Apache Software Foundation (ASF) under one or more -rem contributor license agreements. See the NOTICE file distributed with -rem this work for additional information regarding copyright ownership. -rem The ASF licenses this file to You under the Apache License, Version 2.0 -rem (the "License"); you may not use this file except in compliance with -rem the License. You may obtain a copy of the License at -rem -rem http://www.apache.org/licenses/LICENSE-2.0 -rem -rem Unless required by applicable law or agreed to in writing, software -rem distributed under the License is distributed on an "AS IS" BASIS, -rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -rem See the License for the specific language governing permissions and -rem limitations under the License. -rem - -set SCALA_VERSION=2.10 - -rem Figure out where the Spark framework is installed -set SPARK_HOME=%~dp0.. - -call "%SPARK_HOME%\bin\load-spark-env.cmd" - -rem Test that an argument was given -if not "x%1"=="x" goto arg_given - echo Usage: run-example ^ [example-args] - echo - set MASTER=XX to use a specific master - echo - can use abbreviated example class name relative to com.apache.spark.examples - echo (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL) - goto exit -:arg_given - -set EXAMPLES_DIR=%SPARK_HOME%\examples - -rem Figure out the JAR file that our examples were packaged into. -set SPARK_EXAMPLES_JAR= -if exist "%SPARK_HOME%\RELEASE" ( - for %%d in ("%SPARK_HOME%\lib\spark-examples*.jar") do ( - set SPARK_EXAMPLES_JAR=%%d - ) -) else ( - for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do ( - set SPARK_EXAMPLES_JAR=%%d - ) -) -if "x%SPARK_EXAMPLES_JAR%"=="x" ( - echo Failed to find Spark examples assembly JAR. - echo You need to build Spark before running this program. - goto exit -) - -rem Set master from MASTER environment variable if given -if "x%MASTER%"=="x" ( - set EXAMPLE_MASTER=local[*] -) else ( - set EXAMPLE_MASTER=%MASTER% -) - -rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that -set EXAMPLE_CLASS=%1 -set PREFIX=%EXAMPLE_CLASS:~0,25% -if not %PREFIX%==org.apache.spark.examples ( - set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS% -) - -rem Get the tail of the argument list, to skip the first one. This is surprisingly -rem complicated on Windows. -set "ARGS=" -:top -shift -if "%~1" neq "" ( - set ARGS=%ARGS% "%~1" - goto :top -) -if defined ARGS set ARGS=%ARGS:~1% - -call "%SPARK_HOME%\bin\spark-submit.cmd" ^ - --master %EXAMPLE_MASTER% ^ - --class %EXAMPLE_CLASS% ^ - "%SPARK_EXAMPLES_JAR%" %ARGS% - -:exit diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index ac4e9b90f0177..dbdd42ff9e087 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -166,11 +166,14 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" -cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" # This will fail if the -Pyarn profile is not provided # In this case, silence the error and ignore the return code of this command cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || : +# Copy examples and dependencies +mkdir -p "$DISTDIR/examples/jars" +cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars" + # Copy example sources (needed for python and SQL) mkdir -p "$DISTDIR/examples/src/main" cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/" diff --git a/examples/pom.xml b/examples/pom.xml index 92bb373c7382d..1aa730c0dcdac 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -322,36 +322,36 @@ org.apache.maven.plugins - maven-shade-plugin + maven-jar-plugin + + + prepare-test-jar + none + + test-jar + + + - false - ${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - reference.conf - - - log4j.properties - - + ${jars.target.dir} + + org.apache.maven.plugins + maven-dependency-plugin + + + package + + copy-dependencies + + + runtime + ${jars.target.dir} + + + + diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index b2dd6ac4c3982..56e4107c5a0c7 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -30,7 +30,8 @@ * driver-side options and special parsing behavior needed for the special-casing certain internal * Spark applications. *

- * This class has also some special features to aid launching pyspark. + * This class has also some special features to aid launching shells (pyspark and sparkR) and also + * examples. */ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { @@ -62,6 +63,17 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { */ static final String SPARKR_SHELL_RESOURCE = "sparkr-shell"; + /** + * Name of app resource used to identify examples. When running examples, args[0] should be + * this name. The app resource will identify the example class to run. + */ + static final String RUN_EXAMPLE = "run-example"; + + /** + * Prefix for example class names. + */ + static final String EXAMPLE_CLASS_PREFIX = "org.apache.spark.examples."; + /** * This map must match the class names for available special classes, since this modifies the way * command line parsing works. This maps the class name to the resource to use when calling @@ -78,6 +90,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { final List sparkArgs; private final boolean printInfo; + private final boolean isExample; /** * Controls whether mixing spark-submit arguments with app arguments is allowed. This is needed @@ -89,10 +102,13 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { SparkSubmitCommandBuilder() { this.sparkArgs = new ArrayList<>(); this.printInfo = false; + this.isExample = false; } SparkSubmitCommandBuilder(List args) { - this.sparkArgs = new ArrayList<>(); + this.allowsMixedArguments = false; + + boolean isExample = false; List submitArgs = args; if (args.size() > 0 && args.get(0).equals(PYSPARK_SHELL)) { this.allowsMixedArguments = true; @@ -102,10 +118,14 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { this.allowsMixedArguments = true; appResource = SPARKR_SHELL_RESOURCE; submitArgs = args.subList(1, args.size()); - } else { - this.allowsMixedArguments = false; + } else if (args.size() > 0 && args.get(0).equals(RUN_EXAMPLE)) { + isExample = true; + submitArgs = args.subList(1, args.size()); } + this.sparkArgs = new ArrayList<>(); + this.isExample = isExample; + OptionParser parser = new OptionParser(); parser.parse(submitArgs); this.printInfo = parser.infoRequested; @@ -155,6 +175,10 @@ List buildSparkSubmitArgs() { args.add(propertiesFile); } + if (isExample) { + jars.addAll(findExamplesJars()); + } + if (!jars.isEmpty()) { args.add(parser.JARS); args.add(join(",", jars)); @@ -170,6 +194,9 @@ List buildSparkSubmitArgs() { args.add(join(",", pyFiles)); } + if (!printInfo) { + checkArgument(!isExample || mainClass != null, "Missing example class name."); + } if (mainClass != null) { args.add(parser.CLASS); args.add(mainClass); @@ -308,6 +335,25 @@ private boolean isThriftServer(String mainClass) { mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2")); } + private List findExamplesJars() { + List examplesJars = new ArrayList<>(); + String sparkHome = getSparkHome(); + + File jarsDir; + if (new File(sparkHome, "RELEASE").isFile()) { + jarsDir = new File(sparkHome, "examples/jars"); + } else { + jarsDir = new File(sparkHome, + String.format("examples/target/scala-%s/jars", getScalaVersion())); + } + checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.", + jarsDir.getAbsolutePath()); + + for (File f: jarsDir.listFiles()) { + examplesJars.add(f.getAbsolutePath()); + } + return examplesJars; + } private class OptionParser extends SparkSubmitOptionParser { @@ -367,6 +413,14 @@ protected boolean handleUnknown(String opt) { if (allowsMixedArguments) { appArgs.add(opt); return true; + } else if (isExample) { + String className = opt; + if (!className.startsWith(EXAMPLE_CLASS_PREFIX)) { + className = EXAMPLE_CLASS_PREFIX + className; + } + mainClass = className; + appResource = "spark-internal"; + return false; } else { checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt); sparkArgs.add(opt); @@ -376,8 +430,10 @@ protected boolean handleUnknown(String opt) { @Override protected void handleExtraArgs(List extra) { - for (String arg : extra) { - sparkArgs.add(arg); + if (isExample) { + appArgs.addAll(extra); + } else { + sparkArgs.addAll(extra); } } diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 00f967122bd70..b7f4f2efc5d84 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -151,6 +151,24 @@ public void testPySparkFallback() throws Exception { assertEquals("arg1", cmd.get(cmd.size() - 1)); } + @Test + public void testExamplesRunner() throws Exception { + List sparkSubmitArgs = Arrays.asList( + SparkSubmitCommandBuilder.RUN_EXAMPLE, + parser.MASTER + "=foo", + parser.DEPLOY_MODE + "=bar", + "SparkPi", + "42"); + + Map env = new HashMap(); + List cmd = buildCommand(sparkSubmitArgs, env); + assertEquals("foo", findArgValue(cmd, parser.MASTER)); + assertEquals("bar", findArgValue(cmd, parser.DEPLOY_MODE)); + assertEquals(SparkSubmitCommandBuilder.EXAMPLE_CLASS_PREFIX + "SparkPi", + findArgValue(cmd, parser.CLASS)); + assertEquals("42", cmd.get(cmd.size() - 1)); + } + private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception { String deployMode = isDriver ? "client" : "cluster"; diff --git a/pom.xml b/pom.xml index 0faa691c5e78b..92a32e7797bbc 100644 --- a/pom.xml +++ b/pom.xml @@ -178,6 +178,9 @@ ${java.home} + + ${project.build.directory}/scala-${scala.binary.version}/jars +