Skip to content

Commit

Permalink
Revert "fix: Make SynapseE2E Tests work now with Spark 3.2 (#1362)"
Browse files Browse the repository at this point in the history
This reverts commit 0840e31.
  • Loading branch information
serena-ruan committed Feb 7, 2022
1 parent f070c2e commit bf9dc13
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,6 @@ object FileUtilities {
val CREATE = S.CREATE
}

def recursiveListFiles(f: File): Array[File] = {
val these = f.listFiles()
these ++ these
.filter(_.isDirectory)
.flatMap(recursiveListFiles)
.filter(!_.isDirectory)
}

def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = {
def loop(dir: File): Array[File] = {
val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@ import scala.sys.process.Process
/** Tests to validate fuzzing of modules. */
class SynapseTests extends TestBase {

test("Synapse") {
ignore("Synapse") {

val os = sys.props("os.name").toLowerCase
os match {
case x if x contains "windows" =>
exec("conda activate synapseml " +
"&& jupyter nbconvert --to script .\\notebooks\\features\\**\\*.ipynb")
exec("conda activate synapseml && jupyter nbconvert --to script .\\notebooks\\*.ipynb")
case _ =>
Process(s"conda init bash; conda activate synapseml; " +
"jupyter nbconvert --to script ./notebooks/features/**/*.ipynb")
Process(s"conda init bash; conda activate synapseml; jupyter nbconvert --to script ./notebooks/*.ipynb")
}

SynapseUtilities.listPythonFiles().map(f => {
Expand All @@ -35,13 +33,8 @@ class SynapseTests extends TestBase {
new File(f).renameTo(new File(newPath))
})

val workspaceName = "mmlsparkppe"
val sparkPools = Array(
"e2etstspark32i1",
"e2etstspark32i2",
"e2etstspark32i3",
"e2etstspark32i4",
"e2etstspark32i5")
val workspaceName = "mmlspark"
val sparkPools = Array("buildpool", "buildpool2", "buildpool3")

val livyBatchJobs = SynapseUtilities.listPythonJobFiles()
.filterNot(_.contains(" "))
Expand All @@ -50,7 +43,7 @@ class SynapseTests extends TestBase {
val poolName = SynapseUtilities.monitorPool(workspaceName, sparkPools)
val livyUrl = "https://" +
workspaceName +
".dev.azuresynapse-dogfood.net/livyApi/versions/2019-11-01-preview/sparkPools/" +
".dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/" +
poolName +
"/batches"
val livyBatch: LivyBatch = SynapseUtilities.uploadAndSubmitNotebook(livyUrl, f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,18 @@ object SynapseUtilities extends HasHttpClient {
lazy val Token: String = getSynapseToken

val Folder = s"build_${BuildInfo.version}/scripts"
val TimeoutInMillis: Int = 30 * 60 * 1000 // 30 minutes
val TimeoutInMillis: Int = 20 * 60 * 1000
val StorageAccount: String = "mmlsparkeuap"
val StorageContainer: String = "mmlsparkppefs"
val StorageContainer: String = "synapse"
val TenantId: String = "72f988bf-86f1-41af-91ab-2d7cd011db47"
val ClientId: String = "85dde348-dd2b-43e5-9f5a-22262af45332"

def listPythonFiles(): Array[String] = {
Option({
val rootDirectory = FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks/features")
Option(
FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks")
.getCanonicalFile

FileUtilities.recursiveListFiles(rootDirectory)
.listFiles()
.filter(_.getAbsolutePath.endsWith(".py"))
.filter(_.getAbsolutePath.contains("-"))
.filterNot(_.getAbsolutePath.contains("CyberML"))
Expand All @@ -74,40 +73,35 @@ object SynapseUtilities extends HasHttpClient {
.filterNot(_.getAbsolutePath.contains("Overview"))
.filterNot(_.getAbsolutePath.contains("ModelInterpretation"))
.filterNot(_.getAbsolutePath.contains("Interpretability"))
.map(file => file.getAbsolutePath)
})
.get
.sorted
.map(file => file.getAbsolutePath))
.get
.sorted
}

def listPythonJobFiles(): Array[String] = {
Option({
val rootDirectory = FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks/features")
.getCanonicalFile

FileUtilities.recursiveListFiles(rootDirectory)
.filter(_.getAbsolutePath.endsWith(".py"))
.filterNot(_.getAbsolutePath.contains("-"))
.filterNot(_.getAbsolutePath.contains(" "))
.map(file => file.getAbsolutePath)
})
.get
.sorted
Option(
FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks")
.getCanonicalFile
.listFiles()
.filter(_.getAbsolutePath.endsWith(".py"))
.filterNot(_.getAbsolutePath.contains("-"))
.filterNot(_.getAbsolutePath.contains(" "))
.map(file => file.getAbsolutePath))
.get
.sorted
}

def listNoteBookFiles(): Array[String] = {
Option({
val rootDirectory = FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks/features")
Option(
FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks")
.getCanonicalFile

FileUtilities.recursiveListFiles(rootDirectory)
.listFiles()
.filter(_.getAbsolutePath.endsWith(".ipynb"))
.map(file => file.getAbsolutePath)
})
.get
.sorted
.map(file => file.getAbsolutePath))
.get
.sorted
}

def postMortem(batch: LivyBatch, livyUrl: String): LivyBatch = {
Expand All @@ -128,7 +122,7 @@ object SynapseUtilities extends HasHttpClient {
def showSubmittingJobs(workspaceName: String, poolName: String): Applications = {
val uri: String =
"https://" +
s"$workspaceName.dev.azuresynapse-dogfood.net" +
s"$workspaceName.dev.azuresynapse.net" +
"/monitoring/workloadTypes/spark/applications" +
"?api-version=2020-10-01-preview" +
"&filter=(((state%20eq%20%27Queued%27)%20or%20(state%20eq%20%27Submitting%27))" +
Expand Down Expand Up @@ -158,7 +152,7 @@ object SynapseUtilities extends HasHttpClient {
readyPool
}
else {
println(s"No spark pool is ready to submit a new job, waiting 10s")
println(s"None spark pool is ready to submit job, waiting 10s")
blocking {
Thread.sleep(10000)
}
Expand Down Expand Up @@ -249,8 +243,7 @@ object SynapseUtilities extends HasHttpClient {
val excludes: String = "org.scala-lang:scala-reflect," +
"org.apache.spark:spark-tags_2.12," +
"org.scalactic:scalactic_2.12," +
"org.scalatest:scalatest_2.12," +
"org.slf4j:slf4j-api"
"org.scalatest:scalatest_2.12"

val livyPayload: String =
s"""
Expand All @@ -264,7 +257,7 @@ object SynapseUtilities extends HasHttpClient {
| "numExecutors" : 2,
| "conf" :
| {
| "spark.jars.packages" : "com.microsoft.azure:synapseml_2.12:${BuildInfo.version}",
| "spark.jars.packages" : "com.microsoft.azure:synapseml:${BuildInfo.version}",
| "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven",
| "spark.jars.excludes": "$excludes",
| "spark.driver.userClassPathFirst": "true",
Expand Down
30 changes: 9 additions & 21 deletions website/docs/reference/developer-readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,20 @@ description: SynapseML Development Setup
# SynapseML Development Setup

1) [Install SBT](https://www.scala-sbt.org/1.x/docs/Setup.html)
- Make sure to download [JDK 11](https://www.oracle.com/java/technologies/javase/jdk11-archive-downloads.html) if you don't have it
2) Fork the repository on github
- See how to here: [Fork a repo - GitHub Docs](https://docs.github.com/en/get-started/quickstart/fork-a-repo)
3) Clone your fork
- `git clone https://github.com/<your GitHub handle>/SynapseML.git`
- This will automatically add your fork as the default remote, called `origin`
4) Add another Git Remote to track the original SynapseML repo. It's recommended to call it `upstream`:
- `git remote add upstream https://github.com/microsoft/SynapseML.git`
- See more about Git remotes here: [Git - Working with remotes](https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes)
5) Run sbt to compile and grab datasets
- Make sure to download JDK 11 if you don't have it
3) Fork the repository on github
- This is required if you would like to make PRs. If you choose the fork option, replace the clone link below with that of your fork.
2) Git Clone your fork, or the repo directly
- `git clone https://github.com/Microsoft/SynapseML.git`
- NOTE: If you would like to contribute to synapseml regularly, add your fork as a remote named ``origin`` and Microsoft/SynapseML as a remote named ``upstream``
3) Run sbt to compile and grab datasets
- `cd synapseml`
- `sbt setup`
6) [Install IntelliJ](https://www.jetbrains.com/idea/download)
4) [Install IntelliJ](https://www.jetbrains.com/idea/download)
- Install Scala plugins during install
7) Configure IntelliJ
5) Configure IntelliJ
- **OPEN** the synapseml directory
- If the project does not automatically import,click on `build.sbt` and import project
8) Prepare your Python Environment
- Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
- Activate the `synapseml` conda environment by running `conda env create -f environment.yaml` from the `synapseml` directory.

> NOTE
>
> If you will be regularly contributing to the SynapseML repo, you'll want to keep your fork synced with the
> upstream repository. Please read [this GitHub doc](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork)
> to know more and learn techniques about how to do it.

# Publishing and Using Build Secrets

Expand Down

0 comments on commit bf9dc13

Please sign in to comment.