Skip to content

Commit

Permalink
downgrade 3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
serena-ruan committed Sep 8, 2022
1 parent 8d55274 commit 7abf839
Show file tree
Hide file tree
Showing 19 changed files with 37 additions and 84 deletions.
12 changes: 7 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,33 @@ import scala.xml.transform.{RewriteRule, RuleTransformer}
import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _}

val condaEnvName = "synapseml"
val sparkVersion = "3.2.2"
val sparkVersion = "3.1.3"
name := "synapseml"
ThisBuild / organization := "com.microsoft.azure"
ThisBuild / scalaVersion := "2.12.15"
ThisBuild / scalaVersion := "2.12.10"

val scalaMajorVersion = 2.12

val excludes = Seq(
ExclusionRule("org.apache.spark", s"spark-tags_$scalaMajorVersion"),
ExclusionRule("org.scalatest")
ExclusionRule("org.scalatest"),
ExclusionRule("org.json4s", s"json4s-ast_$scalaMajorVersion"),
)

val coreDependencies = Seq(
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.12.5",
"org.apache.spark" %% "spark-core" % sparkVersion % "compile",
"org.apache.spark" %% "spark-mllib" % sparkVersion % "compile",
"org.apache.spark" %% "spark-avro" % sparkVersion % "provided",
"org.apache.spark" %% "spark-tags" % sparkVersion % "test",
"org.scalatest" %% "scalatest" % "3.0.5" % "test")
val extraDependencies = Seq(
"org.scalactic" %% "scalactic" % "3.0.5",
"io.spray" %% "spray-json" % "1.3.5",
"io.spray" %% "spray-json" % "1.3.2",
"com.jcraft" % "jsch" % "0.1.54",
"org.apache.httpcomponents" % "httpclient" % "4.5.6",
"org.apache.httpcomponents" % "httpmime" % "4.5.6",
"com.linkedin.isolation-forest" %% "isolation-forest_3.2.0" % "2.0.8"
"com.linkedin.isolation-forest" %% "isolation-forest_3.0.0" % "1.0.1"
).map(d => d excludeAll (excludes: _*))
val dependencies = coreDependencies ++ extraDependencies

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.{DataFrame, Row}

import java.util.concurrent.TimeoutException


trait TextEndpoint {
lazy val textKey: String = sys.env.getOrElse("TEXT_API_KEY", Secrets.CognitiveApiKey)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class CleanMissingData(override val uid: String) extends Estimator[CleanMissingD
// Verify columns are supported for imputation
verifyColumnsSupported(dataset, colsToClean)
val row =
dataset.select(columns.map(column => call_udf("percentile_approx",
dataset.select(columns.map(column => callUDF("percentile_approx",
column, lit(0.5))): _*)
.collect()(0)
rowToValues(row)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class PatchedImageFileFormat extends ImageFileFormat with Serializable with Logg
val bytes = try {
IOUtils.toByteArray(stream)
} finally {
IOUtils.close(stream)
IOUtils.closeQuietly(stream)
}

val resultOpt = catchFlakiness(5)(ImageSchema.decode(origin, bytes))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def test_zip_with_index_sort_by_column_within_partitions(self):
order_by_col="user",
)
expected = [
("OrgB", "Joe", 0),
("OrgA", "Alice", 0),
("OrgA", "Bob", 1),
("OrgA", "Joe", 2),
("OrgB", "Joe", 0),
]
assert result.collect() == expected

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ object RTestGen {
| "spark.sql.shuffle.partitions=10",
| "spark.sql.crossJoin.enabled=true")
|
|sc <- spark_connect(master = "local", version = "3.2.2", config = conf)
|sc <- spark_connect(master = "local", version = "3.1.3", config = conf)
|
|""".stripMargin, StandardOpenOption.CREATE)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ object DatabricksUtilities {

// ADB Info
val Region = "eastus"
val PoolName = "synapseml-build-10.4"
val GpuPoolName = "synapseml-build-10.4-gpu"
val AdbRuntime = "10.4.x-scala2.12"
val AdbGpuRuntime = "10.4.x-gpu-ml-scala2.12"
val PoolName = "synapseml-build-9.1"
val GpuPoolName = "synapseml-build-9.1-gpu"
val AdbRuntime = "9.1.x-scala2.12"
val AdbGpuRuntime = "9.1.x-gpu-ml-scala2.12"
val NumWorkers = 5
val AutoTerminationMinutes = 15

Expand Down Expand Up @@ -81,7 +81,9 @@ object DatabricksUtilities {

val ParallelizableNotebooks: Seq[File] = NotebookFiles.filterNot(_.isDirectory)

val CPUNotebooks: Seq[File] = ParallelizableNotebooks.filterNot(_.getAbsolutePath.contains("simple_deep_learning"))
val CPUNotebooks: Seq[File] = ParallelizableNotebooks
.filterNot(_.getAbsolutePath.contains("simple_deep_learning"))
.filterNot(_.getAbsolutePath.contains("GeospatialServices")) // This service doesn't work on databricks for spark3.1

val GPUNotebooks: Seq[File] = ParallelizableNotebooks.filter(_.getAbsolutePath.contains("simple_deep_learning"))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ object SynapseUtilities {
| "nodeSizeFamily": "MemoryOptimized",
| "provisioningState": "Succeeded",
| "sessionLevelPackagesEnabled": "true",
| "sparkVersion": "3.2"
| "sparkVersion": "3.1"
| }
|}
|""".stripMargin
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- default
dependencies:
- python=3.8.8
- pyspark=3.2.2
- pyspark=3.1.3
- requests=2.26.0
- pip=21.3
- r-base=4.1.1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,58 +398,6 @@
"source": [
"display(df_p)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Log and Load Model with MLFlow\n",
"We can now store the trained model for later use."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# setup mlflow\n",
"import mlflow\n",
"import trident.mlflow\n",
"from trident.mlflow import get_sds_url\n",
"\n",
"EXPERIMENT_NAME = \"aisample-timeseries\"\n",
"\n",
"mlflow.set_tracking_uri(get_sds_url())\n",
"mlflow.set_registry_uri(get_sds_url())\n",
"mlflow.set_experiment(EXPERIMENT_NAME)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# log the model and parameters\n",
"model_name = f\"{EXPERIMENT_NAME}-prophet\"\n",
"with mlflow.start_run() as run:\n",
" mlflow.prophet.log_model(m, model_name, registered_model_name=model_name)\n",
" mlflow.log_params({\"seasonality_mode\": \"multiplicative\", \"mcmc_samples\": 1000})\n",
" model_uri = f\"runs:/{run.info.run_id}/{model_name}\"\n",
" print(\"Model saved in run %s\" % run.info.run_id)\n",
" print(f\"Model URI: {model_uri}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load the model back\n",
"loaded_model = mlflow.prophet.load_model(model_uri)"
]
}
],
"metadata": {
Expand All @@ -471,4 +419,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
7 changes: 5 additions & 2 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ trigger:
branches:
include:
- master
- spark3.1
paths:
exclude:
- README.md
Expand All @@ -16,6 +17,7 @@ pr:
branches:
include:
- master
- spark3.1
paths:
exclude:
- README.md
Expand Down Expand Up @@ -99,8 +101,6 @@ jobs:
matrix:
databricks-cpu:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests"
databricks-gpu:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests"
synapse:
TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
steps:
Expand Down Expand Up @@ -449,6 +449,9 @@ jobs:
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
echo "removing ivy2 cache"
sudo rm -rf ~/.ivy2/cache
sbt publishLocal
export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT"
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
Expand Down
2 changes: 1 addition & 1 deletion start
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

export OPENMPI_VERSION="3.1.2"
export SPARK_VERSION="3.2.2"
export SPARK_VERSION="3.1.3"
export HADOOP_VERSION="2.7"
export SYNAPSEML_VERSION="0.10.1" # Binder compatibility version

Expand Down
2 changes: 1 addition & 1 deletion tools/docker/demo/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.10.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.2
ENV SPARK_VERSION=3.1.3
ENV HADOOP_VERSION=2.7
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/minimal/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.10.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.2
ENV SPARK_VERSION=3.1.3
ENV HADOOP_VERSION=2.7
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64
Expand Down
8 changes: 4 additions & 4 deletions tools/dotnet/dotnetSetup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR"
# Install Sleet
dotnet tool install -g sleet

# Install Apache Spark-3.2
curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz
# Install Apache Spark-3.1
curl https://archive.apache.org/dist/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz -o spark-3.1.2-bin-hadoop3.2.tgz
mkdir ~/bin
tar -xzvf spark-3.2.0-bin-hadoop3.2.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/
tar -xzvf spark-3.1.2-bin-hadoop3.2.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.1.2-bin-hadoop3.2/
export PATH=$SPARK_HOME/bin:$PATH
echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME"
echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH"
2 changes: 1 addition & 1 deletion tools/helm/livy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.1.3

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/helm/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.1.3

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/helm/zeppelin/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.1.3

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/tests/run_r_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ if (!require("sparklyr")) {
library("sparklyr")
}

spark_install(version = "3.2.2", hadoop_version = "3.2")
spark_install(version = "3.1.3", hadoop_version = "3.2")
options("testthat.output_file" = "../../../../r-test-results.xml")
devtools::test(reporter = JunitReporter$new())

0 comments on commit 7abf839

Please sign in to comment.