From b1dba921ca8736d1f766e96559cbe3bd5a28d2bb Mon Sep 17 00:00:00 2001 From: Peter Rudenko Date: Wed, 10 Jun 2020 17:47:05 +0300 Subject: [PATCH] Update README and prepare release action. (#22) --- .github/workflows/sparkucx-ci.yml | 9 ++---- .github/workflows/sparkucx-release.yml | 42 ++++++++++++++++++++++++++ README.md | 21 +++++++------ pom.xml | 2 +- 4 files changed, 58 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/sparkucx-release.yml diff --git a/.github/workflows/sparkucx-ci.yml b/.github/workflows/sparkucx-ci.yml index 01339492..bc060fd1 100755 --- a/.github/workflows/sparkucx-ci.yml +++ b/.github/workflows/sparkucx-ci.yml @@ -1,18 +1,15 @@ name: SparkUCX CI on: - push: - branches: - - master pull_request: branches: - master -jobs: +jobs: build-sparkucx: strategy: matrix: - spark_version: [2.4, 3.0] + spark_version: ["2.4", "3.0"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 @@ -21,7 +18,7 @@ jobs: with: java-version: 1.8 - name: Build with Maven - run: mvn -B package -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + run: mvn -B package -Pspark-${{ matrix.spark_version }} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn --file pom.xml - name: Run Sonar code analysis run: mvn -B sonar:sonar -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Dsonar.projectKey=openucx:spark-ucx -Dsonar.organization=openucx -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=97f4df88ff4fa04e2d5b061acf07315717f1f08b -Pspark-${{ matrix.spark_version }} diff --git a/.github/workflows/sparkucx-release.yml b/.github/workflows/sparkucx-release.yml new file mode 100644 index 00000000..09766096 --- /dev/null +++ b/.github/workflows/sparkucx-release.yml @@ -0,0 +1,42 @@ +on: + push: + # Sequence of patterns matched against refs/tags + tags: + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + +name: Upload Release Asset + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +jobs: + release: + strategy: + matrix: + spark_version: ["2.4", "3.0"] + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up JDK 1.8 + uses: actions/setup-java@v1 + with: + java-version: 1.8 + + - name: Build with Maven + id: maven_package + run: | + mvn -B -Pspark-${{ matrix.spark_version }} clean package \ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ + --file pom.xml + cd target + echo "::set-output name=jar_name::$(echo spark-ucx-*-jar-with-dependencies.jar)" + + - name: Upload Release Jars + uses: svenstaro/upload-release-action@v1-release + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ./target/${{ steps.maven_package.outputs.jar_name }} + asset_name: ${{ steps.maven_package.outputs.jar_name }} + tag: ${{ github.ref }} diff --git a/README.md b/README.md index a3531ef4..e7a2b9a7 100755 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ for your spark version (e.g. spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies Put SparkUCX jar file in $SPARK_UCX_HOME on all the nodes in your cluster.
If you would like to build the project yourself, please refer to the ["Build"](https://github.com/openucx/sparkucx#build) section below. -Ucx binaries **must** be in `java.library.path` on every Spark Master and Worker. +Ucx binaries **must** be in Spark classpath on every Spark Master and Worker. It can be obtained by installing latest version of [Mellanox OFED](http://www.mellanox.com/page/products_dyn?product_family=26) or following [ucx build instruction](https://github.com/openucx/ucx#using-ucx). E.g.: @@ -37,19 +37,16 @@ Provide Spark the location of the SparkUCX plugin jars and ucx shared binaries b spark.driver.extraClassPath $SPARK_UCX_HOME/spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies.jar:$UCX_PREFIX/lib spark.executor.extraClassPath $SPARK_UCX_HOME/spark-ucx-1.0-for-spark-2.4.0-jar-with-dependencies.jar:$UCX_PREFIX/lib ``` - -Add UCX shared binaries to `java.library.path` for Spark driver and executors: -``` -spark.driver.extraJavaOptions -Djava.library.path=$UCX_PREFIX/lib -spark.executor.extraJavaOptions -Djava.library.path=$UCX_PREFIX/lib -``` - To enable the SparkUCX Shuffle Manager plugin, add the following configuration property to spark (e.g. in $SPARK_HOME/conf/spark-defaults.conf): ``` spark.shuffle.manager org.apache.spark.shuffle.UcxShuffleManager ``` +For spark-3.0 version add SparkUCX ShuffleIO plugin: +``` +spark.shuffle.sort.io.plugin.class org.apache.spark.shuffle.compat.spark_3_0.UcxLocalDiskShuffleDataIO +``` ### Build @@ -60,6 +57,12 @@ Build instructions: ``` % git clone https://github.com/openucx/sparkucx % cd sparkucx -% mvn -DskipTests clean package -Pspark-2.3 +% mvn -DskipTests clean package -Pspark-2.4 ``` +### Performance + +SparkUCX plugin is built to provide the best performance out-of-the-box, and provides multiple configuration options to further tune SparkUCX per-job. For more information on how to setup [HiBench](https://github.com/Intel-bigdata/HiBench) benchmark and reproduce results, please refer to [Accelerated Apache SparkUCX 2.4/3.0 cluster deployment](https://docs.mellanox.com/pages/releaseview.action?pageId=19819236). + +![Performance results](https://docs.mellanox.com/download/attachments/19819236/image2020-1-23_15-39-14.png) + diff --git a/pom.xml b/pom.xml index f251d82c..42336715 100755 --- a/pom.xml +++ b/pom.xml @@ -70,7 +70,7 @@ See file LICENSE for terms. org.openucx jucx - 1.9.0-SNAPSHOT + 1.8.0