chore: Update to version 1.0.3 (#2183)

Co-authored-by: Mark Hamilton <mhamilton723@gmail.com>
microsoft · Mar 22, 2024 · 964ebfc · 964ebfc
1 parent 9aea411
commit 964ebfc
Show file tree

Hide file tree

Showing 137 changed files with 22,596 additions and 128 deletions.
diff --git a/README.md b/README.md
@@ -120,7 +120,7 @@ In Azure Synapse notebooks please place the following in the first cell of your
 {
   "name": "synapseml",
   "conf": {
-      "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.2",
+      "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.3",
       "spark.jars.repositories": "https://mmlspark.azureedge.net/maven",
       "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind",
       "spark.yarn.user.classpath.first": "true",
@@ -175,7 +175,7 @@ the above example, or from python:
 ```python
 import pyspark
 spark = pyspark.sql.SparkSession.builder.appName("MyApp") \
-            .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.2") \
+            .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.3") \
             .getOrCreate()
 import synapse.ml
 ```
@@ -186,9 +186,9 @@ SynapseML can be conveniently installed on existing Spark clusters via the
 `--packages` option, examples:
 
 ```bash
-spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.2
-pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.2
-spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.2 MyApp.jar
+spark-shell --packages com.microsoft.azure:synapseml_2.12:1.0.3
+pyspark --packages com.microsoft.azure:synapseml_2.12:1.0.3
+spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.3 MyApp.jar
 ```
 
 ### SBT
@@ -197,7 +197,7 @@ If you are building a Spark application in Scala, add the following lines to
 your `build.sbt`:
 
 ```scala
-libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.2"
+libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.3"
 ```
 
 ### Apache Livy and HDInsight
@@ -211,7 +211,7 @@ Excluding certain packages from the library may be necessary due to current issu
 {
     "name": "synapseml",
     "conf": {
-        "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.2",
+        "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.3",
         "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind"
     }
 }

diff --git a/build.sbt b/build.sbt
@@ -224,7 +224,7 @@ publishDotnetBase := {
   packDotnetAssemblyCmd(join(dotnetBaseDir, "target").getAbsolutePath, dotnetBaseDir)
   val packagePath = join(dotnetBaseDir,
     // Update the version whenever there's a new release
-    "target", s"SynapseML.DotnetBase.${dotnetedVersion("1.0.2")}.nupkg").getAbsolutePath
+    "target", s"SynapseML.DotnetBase.${dotnetedVersion("1.0.3")}.nupkg").getAbsolutePath
   publishDotnetAssemblyCmd(packagePath, genSleetConfig.value)
 }
 

diff --git a/core/src/main/dotnet/src/dotnetBase.csproj b/core/src/main/dotnet/src/dotnetBase.csproj
@@ -7,7 +7,7 @@
     <IsPackable>true</IsPackable>
 
     <Description>SynapseML .NET Base</Description>
-    <Version>1.0.2</Version>
+    <Version>1.0.3</Version>
   </PropertyGroup>
 
   <ItemGroup>

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetCodegen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetCodegen.scala
@@ -53,7 +53,7 @@ object DotnetCodegen {
          |
          |  <ItemGroup>
          |    <PackageReference Include="Microsoft.Spark" Version="2.1.1" />
-         |    <PackageReference Include="SynapseML.DotnetBase" Version="1.0.2" />
+         |    <PackageReference Include="SynapseML.DotnetBase" Version="1.0.3" />
          |    <PackageReference Include="IgnoresAccessChecksToGenerator" Version="0.4.0" PrivateAssets="All" />
          |    $newtonsoftDep
          |  </ItemGroup>

diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/DotnetTestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/DotnetTestGen.scala
@@ -89,7 +89,7 @@ object DotnetTestGen {
          |      <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
          |    </PackageReference>
          |    <PackageReference Include="Microsoft.Spark" Version="2.1.1" />
-         |    <PackageReference Include="SynapseML.DotnetBase" Version="1.0.2" />
+         |    <PackageReference Include="SynapseML.DotnetBase" Version="1.0.3" />
          |    <PackageReference Include="SynapseML.DotnetE2ETest" Version="${conf.dotnetVersion}" />
          |    <PackageReference Include="SynapseML.$curProject" Version="${conf.dotnetVersion}" />
          |    $referenceCore

diff --git a/docs/Explore Algorithms/AI Services/Overview.ipynb b/docs/Explore Algorithms/AI Services/Overview.ipynb
diff --git a/docs/Explore Algorithms/Deep Learning/Getting Started.md b/docs/Explore Algorithms/Deep Learning/Getting Started.md
@@ -21,12 +21,12 @@ Restarting the cluster automatically installs horovod v0.25.0 with pytorch_light
 You could install the single synapseml-deep-learning wheel package to get the full functionality of deep vision classification.
 Run the following command:
 ```powershell
-pip install synapseml==1.0.2
+pip install synapseml==1.0.3
 ```
 
 An alternative is installing the SynapseML jar package in library management section, by adding:
 ```
-Coordinate: com.microsoft.azure:synapseml_2.12:1.0.2
+Coordinate: com.microsoft.azure:synapseml_2.12:1.0.3
 Repository: https://mmlspark.azureedge.net/maven
 ```
 :::note

diff --git a/docs/Explore Algorithms/Other Algorithms/Cyber ML.md b/docs/Explore Algorithms/Other Algorithms/Cyber ML.md
@@ -18,65 +18,65 @@ sidebar_label: CyberML
    In other words, it returns a sample from the complement set.
 
 ## feature engineering: [indexers.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/feature/indexers.py)
-1. [IdIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexer)
+1. [IdIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexer)
    is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html).
    Given a dataframe, it creates an IdIndexerModel (described next) for categorical features. The model
    maps each partition and column seen in the given dataframe to an ID,
    for each partition or one consecutive range for all partition and column values.
-2. [IdIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexerModel)
+2. [IdIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.IdIndexerModel)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    Given a dataframe maps each partition and column field to a consecutive integer ID.
    Partitions or column values not encountered in the estimator are mapped to 0.
    The model can operate in two modes, either create consecutive integer ID independently
-3. [MultiIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexer)
+3. [MultiIndexer](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexer)
    is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html).
    Uses multiple IdIndexers to generate a MultiIndexerModel (described next) for categorical features. The model
    contains multiple IdIndexers for multiple partitions and columns.
-4. [MultiIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexerModel)
+4. [MultiIndexerModel](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.indexers.MultiIndexerModel)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    Given a dataframe maps each partition and column field to a consecutive integer ID.
    Partitions or column values not encountered in the estimator are mapped to 0.
    The model can operate in two modes, either create consecutive integer ID independently
 
 ## feature engineering: [scalers.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/feature/scalers.py)
-1. [StandardScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScaler)
+1. [StandardScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScaler)
    is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html).
    Given a dataframe it creates a StandardScalarScalerModel (described next) which normalizes
    any given dataframe according to the mean and standard deviation calculated on the
    dataframe given to the estimator.
-2. [StandardScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScalerModel)
+2. [StandardScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.StandardScalarScalerModel)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    Given a dataframe with a value column x, the transformer changes its value as follows:
    x'=(x-mean)/stddev.  That is, if the transformer is given the same dataframe the estimator
    was given then the value column will have a mean of 0.0 and a standard deviation of 1.0.
-3. [LinearScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScaler)
+3. [LinearScalarScaler](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScaler)
    is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html).
    Given a dataframe it creates a LinearScalarScalerModel (described next) which normalizes
    any given dataframe according to the minimum and maximum values calculated on the
    dataframe given to the estimator.
-4. [LinearScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScalerModel)
+4. [LinearScalarScalerModel](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.feature.html#synapse.ml.cyber.feature.scalers.LinearScalarScalerModel)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    Given a dataframe with a value column x, the transformer changes its value such that
    if the transformer is given the same dataframe the estimator
    was given then the value column will be scaled linearly to the given ranges.
 
 ## access anomalies: [collaborative_filtering.py](https://github.com/microsoft/SynapseML/blob/master/core/src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py)
-1. [AccessAnomaly](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomaly)
+1. [AccessAnomaly](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomaly)
    is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html).
    Given a dataframe, the estimator generates an AccessAnomalyModel (described next). The model
    can detect anomalous access of users to resources where the access
    is outside of the user's or resources's profile. For instance, a user from HR accessing
    a resource from Finance. This result is based solely on access patterns rather than explicit features.
    Internally, the code is based on Collaborative Filtering as implemented in Spark, using
    Matrix Factorization with Alternating Least Squares.
-2. [AccessAnomalyModel](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyModel)
+2. [AccessAnomalyModel](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyModel)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    Given a dataframe the transformer computes a value between (-inf, inf) where positive
    values indicate an anomaly score. Anomaly scores are computed to have a mean of 1.0
    and a standard deviation of 1.0 over the original dataframe given to the estimator.
-3. [ModelNormalizeTransformer](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.ModelNormalizeTransformer)
+3. [ModelNormalizeTransformer](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.ModelNormalizeTransformer)
    is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html).
    This transformer is used internally by AccessAnomaly to normalize a model to generate
    anomaly scores with mean 0.0 and standard deviation of 1.0.
-4. [AccessAnomalyConfig](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyConfig)
+4. [AccessAnomalyConfig](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.cyber.anomaly.html#synapse.ml.cyber.anomaly.collaborative_filtering.AccessAnomalyConfig)
    contains the default values for AccessAnomaly.
diff --git a/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb b/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb
@@ -34,7 +34,7 @@
     "# Create an Azure Databricks cluster and install the following libs\n",
     "\n",
     "1. In Cluster Libraries install from library source Maven:\n",
-    "Coordinates: com.microsoft.azure:synapseml_2.12:1.0.2\n",
+    "Coordinates: com.microsoft.azure:synapseml_2.12:1.0.3\n",
     "Repository: https://mmlspark.azureedge.net/maven\n",
     "\n",
     "2. In Cluster Libraries install from PyPI the library called plotly"

diff --git a/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb b/docs/Explore Algorithms/Regression/Quickstart - Data Cleaning.ipynb
@@ -16,11 +16,11 @@
     "\n",
     "This sample demonstrates how to use the following APIs:\n",
     "- [`TrainRegressor`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n",
     "- [`ComputePerInstanceStatistics`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputePerInstanceStatistics)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputePerInstanceStatistics)\n",
     "- [`DataConversion`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.DataConversion)"
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.DataConversion)"
    ]
   },
   {

diff --git a/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb b/docs/Explore Algorithms/Regression/Quickstart - Train Regressor.ipynb
@@ -15,15 +15,15 @@
     "\n",
     "This sample demonstrates the use of several members of the synapseml library:\n",
     "- [`TrainRegressor`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.train.html?#module-synapse.ml.train.TrainRegressor)\n",
     "- [`SummarizeData`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.stages.html?#module-synapse.ml.stages.SummarizeData)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.stages.html?#module-synapse.ml.stages.SummarizeData)\n",
     "- [`CleanMissingData`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.CleanMissingData)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.featurize.html?#module-synapse.ml.featurize.CleanMissingData)\n",
     "- [`ComputeModelStatistics`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputeModelStatistics)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.train.html?#module-synapse.ml.train.ComputeModelStatistics)\n",
     "- [`FindBestModel`\n",
-    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.2/pyspark/synapse.ml.automl.html?#module-synapse.ml.automl.FindBestModel)\n",
+    "  ](https://mmlspark.blob.core.windows.net/docs/1.0.3/pyspark/synapse.ml.automl.html?#module-synapse.ml.automl.FindBestModel)\n",
     "\n",
     "First, import the pandas package so that we can read and parse the datafile\n",
     "using `pandas.read_csv()`"