Revert "fix: Make SynapseE2E Tests work now with Spark 3.2 (#1362)"

This reverts commit 0840e31.
microsoft · Feb 7, 2022 · bf9dc13 · bf9dc13
1 parent f070c2e
commit bf9dc13
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 80 deletions.
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala
@@ -26,14 +26,6 @@ object FileUtilities {
     val CREATE = S.CREATE
   }
 
-  def recursiveListFiles(f: File): Array[File] = {
-    val these = f.listFiles()
-    these ++ these
-      .filter(_.isDirectory)
-      .flatMap(recursiveListFiles)
-      .filter(!_.isDirectory)
-  }
-
   def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = {
     def loop(dir: File): Array[File] = {
       val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)

diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala
@@ -16,16 +16,14 @@ import scala.sys.process.Process
 /** Tests to validate fuzzing of modules. */
 class SynapseTests extends TestBase {
 
-  test("Synapse") {
+  ignore("Synapse") {
 
     val os = sys.props("os.name").toLowerCase
     os match {
       case x if x contains "windows" =>
-        exec("conda activate synapseml " +
-        "&& jupyter nbconvert --to script .\\notebooks\\features\\**\\*.ipynb")
+        exec("conda activate synapseml && jupyter nbconvert --to script .\\notebooks\\*.ipynb")
       case _ =>
-        Process(s"conda init bash; conda activate synapseml; " +
-        "jupyter nbconvert --to script ./notebooks/features/**/*.ipynb")
+        Process(s"conda init bash; conda activate synapseml; jupyter nbconvert --to script ./notebooks/*.ipynb")
     }
 
     SynapseUtilities.listPythonFiles().map(f => {
@@ -35,13 +33,8 @@ class SynapseTests extends TestBase {
       new File(f).renameTo(new File(newPath))
     })
 
-    val workspaceName = "mmlsparkppe"
-    val sparkPools = Array(
-      "e2etstspark32i1",
-      "e2etstspark32i2",
-      "e2etstspark32i3",
-      "e2etstspark32i4",
-      "e2etstspark32i5")
+    val workspaceName = "mmlspark"
+    val sparkPools = Array("buildpool", "buildpool2", "buildpool3")
 
     val livyBatchJobs = SynapseUtilities.listPythonJobFiles()
       .filterNot(_.contains(" "))
@@ -50,7 +43,7 @@ class SynapseTests extends TestBase {
         val poolName = SynapseUtilities.monitorPool(workspaceName, sparkPools)
         val livyUrl = "https://" +
           workspaceName +
-          ".dev.azuresynapse-dogfood.net/livyApi/versions/2019-11-01-preview/sparkPools/" +
+          ".dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/" +
           poolName +
           "/batches"
         val livyBatch: LivyBatch = SynapseUtilities.uploadAndSubmitNotebook(livyUrl, f)

diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala
@@ -51,19 +51,18 @@ object SynapseUtilities extends HasHttpClient {
   lazy val Token: String = getSynapseToken
 
   val Folder = s"build_${BuildInfo.version}/scripts"
-  val TimeoutInMillis: Int = 30 * 60 * 1000 // 30 minutes
+  val TimeoutInMillis: Int = 20 * 60 * 1000
   val StorageAccount: String = "mmlsparkeuap"
-  val StorageContainer: String = "mmlsparkppefs"
+  val StorageContainer: String = "synapse"
   val TenantId: String = "72f988bf-86f1-41af-91ab-2d7cd011db47"
   val ClientId: String = "85dde348-dd2b-43e5-9f5a-22262af45332"
 
   def listPythonFiles(): Array[String] = {
-    Option({
-      val rootDirectory = FileUtilities
-        .join(BuildInfo.baseDirectory.getParent, "notebooks/features")
+    Option(
+      FileUtilities
+        .join(BuildInfo.baseDirectory.getParent, "notebooks")
         .getCanonicalFile
-
-      FileUtilities.recursiveListFiles(rootDirectory)
+        .listFiles()
         .filter(_.getAbsolutePath.endsWith(".py"))
         .filter(_.getAbsolutePath.contains("-"))
         .filterNot(_.getAbsolutePath.contains("CyberML"))
@@ -74,40 +73,35 @@ object SynapseUtilities extends HasHttpClient {
         .filterNot(_.getAbsolutePath.contains("Overview"))
         .filterNot(_.getAbsolutePath.contains("ModelInterpretation"))
         .filterNot(_.getAbsolutePath.contains("Interpretability"))
-        .map(file => file.getAbsolutePath)
-    })
-    .get
-    .sorted
+        .map(file => file.getAbsolutePath))
+      .get
+      .sorted
   }
 
   def listPythonJobFiles(): Array[String] = {
-    Option({
-        val rootDirectory = FileUtilities
-          .join(BuildInfo.baseDirectory.getParent, "notebooks/features")
-          .getCanonicalFile
-
-        FileUtilities.recursiveListFiles(rootDirectory)
-          .filter(_.getAbsolutePath.endsWith(".py"))
-          .filterNot(_.getAbsolutePath.contains("-"))
-          .filterNot(_.getAbsolutePath.contains(" "))
-          .map(file => file.getAbsolutePath)
-    })
-    .get
-    .sorted
+    Option(
+      FileUtilities
+        .join(BuildInfo.baseDirectory.getParent, "notebooks")
+        .getCanonicalFile
+        .listFiles()
+        .filter(_.getAbsolutePath.endsWith(".py"))
+        .filterNot(_.getAbsolutePath.contains("-"))
+        .filterNot(_.getAbsolutePath.contains(" "))
+        .map(file => file.getAbsolutePath))
+      .get
+      .sorted
   }
 
   def listNoteBookFiles(): Array[String] = {
-    Option({
-      val rootDirectory = FileUtilities
-        .join(BuildInfo.baseDirectory.getParent, "notebooks/features")
+    Option(
+      FileUtilities
+        .join(BuildInfo.baseDirectory.getParent, "notebooks")
         .getCanonicalFile
-
-      FileUtilities.recursiveListFiles(rootDirectory)
+        .listFiles()
         .filter(_.getAbsolutePath.endsWith(".ipynb"))
-        .map(file => file.getAbsolutePath)
-    })
-    .get
-    .sorted
+        .map(file => file.getAbsolutePath))
+      .get
+      .sorted
   }
 
   def postMortem(batch: LivyBatch, livyUrl: String): LivyBatch = {
@@ -128,7 +122,7 @@ object SynapseUtilities extends HasHttpClient {
   def showSubmittingJobs(workspaceName: String, poolName: String): Applications = {
     val uri: String =
       "https://" +
-        s"$workspaceName.dev.azuresynapse-dogfood.net" +
+        s"$workspaceName.dev.azuresynapse.net" +
         "/monitoring/workloadTypes/spark/applications" +
         "?api-version=2020-10-01-preview" +
         "&filter=(((state%20eq%20%27Queued%27)%20or%20(state%20eq%20%27Submitting%27))" +
@@ -158,7 +152,7 @@ object SynapseUtilities extends HasHttpClient {
       readyPool
     }
     else {
-      println(s"No spark pool is ready to submit a new job, waiting 10s")
+      println(s"None spark pool is ready to submit job, waiting 10s")
       blocking {
         Thread.sleep(10000)
       }
@@ -249,8 +243,7 @@ object SynapseUtilities extends HasHttpClient {
     val excludes: String = "org.scala-lang:scala-reflect," +
       "org.apache.spark:spark-tags_2.12," +
       "org.scalactic:scalactic_2.12," +
-      "org.scalatest:scalatest_2.12," +
-      "org.slf4j:slf4j-api"
+      "org.scalatest:scalatest_2.12"
 
     val livyPayload: String =
       s"""
@@ -264,7 +257,7 @@ object SynapseUtilities extends HasHttpClient {
          | "numExecutors" : 2,
          | "conf" :
          |     {
-         |         "spark.jars.packages" : "com.microsoft.azure:synapseml_2.12:${BuildInfo.version}",
+         |         "spark.jars.packages" : "com.microsoft.azure:synapseml:${BuildInfo.version}",
          |         "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven",
          |         "spark.jars.excludes": "$excludes",
          |         "spark.driver.userClassPathFirst": "true",

diff --git a/website/docs/reference/developer-readme.md b/website/docs/reference/developer-readme.md
@@ -8,32 +8,20 @@ description: SynapseML Development Setup
 # SynapseML Development Setup
 
 1) [Install SBT](https://www.scala-sbt.org/1.x/docs/Setup.html)
-    - Make sure to download [JDK 11](https://www.oracle.com/java/technologies/javase/jdk11-archive-downloads.html) if you don't have it
-2) Fork the repository on github
-    - See how to here: [Fork a repo - GitHub Docs](https://docs.github.com/en/get-started/quickstart/fork-a-repo)
-3) Clone your fork
-    - `git clone https://github.com/<your GitHub handle>/SynapseML.git`
-    - This will automatically add your fork as the default remote, called `origin`
-4) Add another Git Remote to track the original SynapseML repo. It's recommended to call it `upstream`:
-    - `git remote add upstream https://github.com/microsoft/SynapseML.git`
-    - See more about Git remotes here: [Git - Working with remotes](https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes)
-5) Run sbt to compile and grab datasets
+    - Make sure to download JDK 11 if you don't have it
+3) Fork the repository on github
+    - This is required if you would like to make PRs. If you choose the fork option, replace the clone link below with that of your fork.
+2) Git Clone your fork, or the repo directly
+    - `git clone https://github.com/Microsoft/SynapseML.git`
+    - NOTE: If you would like to contribute to synapseml regularly, add your fork as a remote named ``origin`` and Microsoft/SynapseML as a remote named ``upstream``
+3) Run sbt to compile and grab datasets
     - `cd synapseml`
     - `sbt setup`
-6) [Install IntelliJ](https://www.jetbrains.com/idea/download)
+4) [Install IntelliJ](https://www.jetbrains.com/idea/download)
     - Install Scala plugins during install
-7) Configure IntelliJ
+5) Configure IntelliJ
     - **OPEN** the synapseml directory
     - If the project does not automatically import,click on `build.sbt` and import project
-8) Prepare your Python Environment
-    - Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
-    - Activate the `synapseml` conda environment by running `conda env create -f environment.yaml` from the `synapseml` directory.
-
-> NOTE
-> 
-> If you will be regularly contributing to the SynapseML repo, you'll want to keep your fork synced with the
-> upstream repository. Please read [this GitHub doc](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork)
-> to know more and learn techniques about how to do it.
 
 # Publishing and Using Build Secrets