diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala index c59c133b3e..f2b2907f9e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala @@ -26,14 +26,6 @@ object FileUtilities { val CREATE = S.CREATE } - def recursiveListFiles(f: File): Array[File] = { - val these = f.listFiles() - these ++ these - .filter(_.isDirectory) - .flatMap(recursiveListFiles) - .filter(!_.isDirectory) - } - def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = { def loop(dir: File): Array[File] = { val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala index cdd409f520..54009bfdf5 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala @@ -16,16 +16,14 @@ import scala.sys.process.Process /** Tests to validate fuzzing of modules. */ class SynapseTests extends TestBase { - test("Synapse") { + ignore("Synapse") { val os = sys.props("os.name").toLowerCase os match { case x if x contains "windows" => - exec("conda activate synapseml " + - "&& jupyter nbconvert --to script .\\notebooks\\features\\**\\*.ipynb") + exec("conda activate synapseml && jupyter nbconvert --to script .\\notebooks\\*.ipynb") case _ => - Process(s"conda init bash; conda activate synapseml; " + - "jupyter nbconvert --to script ./notebooks/features/**/*.ipynb") + Process(s"conda init bash; conda activate synapseml; jupyter nbconvert --to script ./notebooks/*.ipynb") } SynapseUtilities.listPythonFiles().map(f => { @@ -35,13 +33,8 @@ class SynapseTests extends TestBase { new File(f).renameTo(new File(newPath)) }) - val workspaceName = "mmlsparkppe" - val sparkPools = Array( - "e2etstspark32i1", - "e2etstspark32i2", - "e2etstspark32i3", - "e2etstspark32i4", - "e2etstspark32i5") + val workspaceName = "mmlspark" + val sparkPools = Array("buildpool", "buildpool2", "buildpool3") val livyBatchJobs = SynapseUtilities.listPythonJobFiles() .filterNot(_.contains(" ")) @@ -50,7 +43,7 @@ class SynapseTests extends TestBase { val poolName = SynapseUtilities.monitorPool(workspaceName, sparkPools) val livyUrl = "https://" + workspaceName + - ".dev.azuresynapse-dogfood.net/livyApi/versions/2019-11-01-preview/sparkPools/" + + ".dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/" + poolName + "/batches" val livyBatch: LivyBatch = SynapseUtilities.uploadAndSubmitNotebook(livyUrl, f) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala index 7581553917..580435161f 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala @@ -51,19 +51,18 @@ object SynapseUtilities extends HasHttpClient { lazy val Token: String = getSynapseToken val Folder = s"build_${BuildInfo.version}/scripts" - val TimeoutInMillis: Int = 30 * 60 * 1000 // 30 minutes + val TimeoutInMillis: Int = 20 * 60 * 1000 val StorageAccount: String = "mmlsparkeuap" - val StorageContainer: String = "mmlsparkppefs" + val StorageContainer: String = "synapse" val TenantId: String = "72f988bf-86f1-41af-91ab-2d7cd011db47" val ClientId: String = "85dde348-dd2b-43e5-9f5a-22262af45332" def listPythonFiles(): Array[String] = { - Option({ - val rootDirectory = FileUtilities - .join(BuildInfo.baseDirectory.getParent, "notebooks/features") + Option( + FileUtilities + .join(BuildInfo.baseDirectory.getParent, "notebooks") .getCanonicalFile - - FileUtilities.recursiveListFiles(rootDirectory) + .listFiles() .filter(_.getAbsolutePath.endsWith(".py")) .filter(_.getAbsolutePath.contains("-")) .filterNot(_.getAbsolutePath.contains("CyberML")) @@ -74,40 +73,35 @@ object SynapseUtilities extends HasHttpClient { .filterNot(_.getAbsolutePath.contains("Overview")) .filterNot(_.getAbsolutePath.contains("ModelInterpretation")) .filterNot(_.getAbsolutePath.contains("Interpretability")) - .map(file => file.getAbsolutePath) - }) - .get - .sorted + .map(file => file.getAbsolutePath)) + .get + .sorted } def listPythonJobFiles(): Array[String] = { - Option({ - val rootDirectory = FileUtilities - .join(BuildInfo.baseDirectory.getParent, "notebooks/features") - .getCanonicalFile - - FileUtilities.recursiveListFiles(rootDirectory) - .filter(_.getAbsolutePath.endsWith(".py")) - .filterNot(_.getAbsolutePath.contains("-")) - .filterNot(_.getAbsolutePath.contains(" ")) - .map(file => file.getAbsolutePath) - }) - .get - .sorted + Option( + FileUtilities + .join(BuildInfo.baseDirectory.getParent, "notebooks") + .getCanonicalFile + .listFiles() + .filter(_.getAbsolutePath.endsWith(".py")) + .filterNot(_.getAbsolutePath.contains("-")) + .filterNot(_.getAbsolutePath.contains(" ")) + .map(file => file.getAbsolutePath)) + .get + .sorted } def listNoteBookFiles(): Array[String] = { - Option({ - val rootDirectory = FileUtilities - .join(BuildInfo.baseDirectory.getParent, "notebooks/features") + Option( + FileUtilities + .join(BuildInfo.baseDirectory.getParent, "notebooks") .getCanonicalFile - - FileUtilities.recursiveListFiles(rootDirectory) + .listFiles() .filter(_.getAbsolutePath.endsWith(".ipynb")) - .map(file => file.getAbsolutePath) - }) - .get - .sorted + .map(file => file.getAbsolutePath)) + .get + .sorted } def postMortem(batch: LivyBatch, livyUrl: String): LivyBatch = { @@ -128,7 +122,7 @@ object SynapseUtilities extends HasHttpClient { def showSubmittingJobs(workspaceName: String, poolName: String): Applications = { val uri: String = "https://" + - s"$workspaceName.dev.azuresynapse-dogfood.net" + + s"$workspaceName.dev.azuresynapse.net" + "/monitoring/workloadTypes/spark/applications" + "?api-version=2020-10-01-preview" + "&filter=(((state%20eq%20%27Queued%27)%20or%20(state%20eq%20%27Submitting%27))" + @@ -158,7 +152,7 @@ object SynapseUtilities extends HasHttpClient { readyPool } else { - println(s"No spark pool is ready to submit a new job, waiting 10s") + println(s"None spark pool is ready to submit job, waiting 10s") blocking { Thread.sleep(10000) } @@ -249,8 +243,7 @@ object SynapseUtilities extends HasHttpClient { val excludes: String = "org.scala-lang:scala-reflect," + "org.apache.spark:spark-tags_2.12," + "org.scalactic:scalactic_2.12," + - "org.scalatest:scalatest_2.12," + - "org.slf4j:slf4j-api" + "org.scalatest:scalatest_2.12" val livyPayload: String = s""" @@ -264,7 +257,7 @@ object SynapseUtilities extends HasHttpClient { | "numExecutors" : 2, | "conf" : | { - | "spark.jars.packages" : "com.microsoft.azure:synapseml_2.12:${BuildInfo.version}", + | "spark.jars.packages" : "com.microsoft.azure:synapseml:${BuildInfo.version}", | "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven", | "spark.jars.excludes": "$excludes", | "spark.driver.userClassPathFirst": "true", diff --git a/website/docs/reference/developer-readme.md b/website/docs/reference/developer-readme.md index 897b5828f0..d711c04b5f 100644 --- a/website/docs/reference/developer-readme.md +++ b/website/docs/reference/developer-readme.md @@ -8,32 +8,20 @@ description: SynapseML Development Setup # SynapseML Development Setup 1) [Install SBT](https://www.scala-sbt.org/1.x/docs/Setup.html) - - Make sure to download [JDK 11](https://www.oracle.com/java/technologies/javase/jdk11-archive-downloads.html) if you don't have it -2) Fork the repository on github - - See how to here: [Fork a repo - GitHub Docs](https://docs.github.com/en/get-started/quickstart/fork-a-repo) -3) Clone your fork - - `git clone https://github.com//SynapseML.git` - - This will automatically add your fork as the default remote, called `origin` -4) Add another Git Remote to track the original SynapseML repo. It's recommended to call it `upstream`: - - `git remote add upstream https://github.com/microsoft/SynapseML.git` - - See more about Git remotes here: [Git - Working with remotes](https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes) -5) Run sbt to compile and grab datasets + - Make sure to download JDK 11 if you don't have it +3) Fork the repository on github + - This is required if you would like to make PRs. If you choose the fork option, replace the clone link below with that of your fork. +2) Git Clone your fork, or the repo directly + - `git clone https://github.com/Microsoft/SynapseML.git` + - NOTE: If you would like to contribute to synapseml regularly, add your fork as a remote named ``origin`` and Microsoft/SynapseML as a remote named ``upstream`` +3) Run sbt to compile and grab datasets - `cd synapseml` - `sbt setup` -6) [Install IntelliJ](https://www.jetbrains.com/idea/download) +4) [Install IntelliJ](https://www.jetbrains.com/idea/download) - Install Scala plugins during install -7) Configure IntelliJ +5) Configure IntelliJ - **OPEN** the synapseml directory - If the project does not automatically import,click on `build.sbt` and import project -8) Prepare your Python Environment - - Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) - - Activate the `synapseml` conda environment by running `conda env create -f environment.yaml` from the `synapseml` directory. - -> NOTE -> -> If you will be regularly contributing to the SynapseML repo, you'll want to keep your fork synced with the -> upstream repository. Please read [this GitHub doc](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork) -> to know more and learn techniques about how to do it. # Publishing and Using Build Secrets