From aa166b55cfb4d38a7964dc7e51d5879f310999fb Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 20 Aug 2021 17:01:49 -0700
Subject: [PATCH 01/32] Initial PDP version.

---
 .../ml/spark/explainers/ICEExplainer.scala    | 171 ++++++++++++++++++
 .../ml/spark/explainers/LocalExplainer.scala  |  32 ----
 .../ml/spark/explainers/SharedParams.scala    |  40 +++-
 .../explainers/split1/ICEExplainerSuite.scala |  72 ++++++++
 4 files changed, 282 insertions(+), 33 deletions(-)
 create mode 100644 core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
 create mode 100644 core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
new file mode 100644
index 0000000000..4c479d139b
--- /dev/null
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -0,0 +1,171 @@
+package com.microsoft.ml.spark.explainers
+import com.microsoft.ml.spark.core.contracts.HasOutputCol
+import com.microsoft.ml.spark.core.schema.DatasetExtensions
+import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
+import org.apache.spark.ml.param.{DoubleParam, IntParam, ParamMap, Params}
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.stat.Summarizer
+
+
+trait ICEFeatureParams extends Params {
+  val feature = new Param[String] (this, "feature", "Feature to explain")
+  def getFeature: String = $(feature)
+  def setFeature(value: String): this.type = set(feature, value)
+
+  val featureType = new Param[String] (this, "featureType", "Type of feature to explain")
+  def getFeatureType: String = $(featureType)
+  def setFeatureType(value: String): this.type = set(featureType, value)
+
+  val topNValues = new IntParam (this, "topNValues", "topNValues")
+  def getTopNValues: Int = $(topNValues)
+  def setTopNValues(value: Int): this.type = set(topNValues, value)
+
+  val nSplits = new IntParam (this, "nSplits", "nSplits")
+  def getNSplits: Int = $(nSplits)
+  def setNSplits(value: Int): this.type = set(nSplits, value)
+
+  val rangeMax = new DoubleParam(this, "rangeMax", "rangeMax")
+  def getRangeMax: Double = $(rangeMax)
+  def setRangeMax(value: Double): this.type = set(rangeMax, value)
+
+  val rangeMin = new DoubleParam(this, "rangeMin", "rangeMin")
+  def getRangeMin: Double = $(rangeMin)
+  def setRangeMin(value: Double): this.type = set(rangeMin, value)
+
+  setDefault(featureType -> "discrete", topNValues -> 100, nSplits -> 20)
+
+}
+
+class ICETransformer(override val uid: String) extends Transformer
+  with HasNumSamples
+  with HasExplainTarget
+  with HasModel
+  with ICEFeatureParams
+  with HasOutputCol {
+
+  /* transform:
+         1) gives feature values 
+         2) individual series plots 
+
+    */
+  def this() = {
+    this(Identifiable.randomUID("ICETransformer"))
+  }
+
+  def transform(instances: Dataset[_]): DataFrame = {
+
+    val df = instances.toDF
+    val idCol = DatasetExtensions.findUnusedColumnName("id", df)
+    val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
+    val dfWithId = df
+      .withColumn(idCol, monotonically_increasing_id())
+      .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
+
+
+    val values = $(featureType).toLowerCase match {
+      case "discrete" =>
+        collectDiscreteValues(dfWithId, $(feature), $(topNValues))
+      case "continuous" =>
+        collectSplits(dfWithId, $(feature), $(nSplits), get(rangeMin), get(rangeMax))
+      case other =>
+        throw new IllegalArgumentException(
+          s"The feature type can only be 'discrete' or 'continuous'. Instead it is set to '$other'"
+        )
+    }
+
+    val dataType = dfWithId.schema($(feature)).dataType
+    val explodeFunc = explode(array(values.map(v => lit(v).cast(dataType)): _*))
+
+    val predicted = getModel.transform(dfWithId.withColumn($(feature), explodeFunc))
+    val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
+
+    val explainTarget = extractTarget(predicted.schema, targetClasses)
+    val result = predicted.withColumn(targetCol, explainTarget)
+
+    result
+      .groupBy($(feature))
+      .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
+      .withColumnRenamed($(feature), "__feature__value__")
+      .withColumn("__feature__name__", lit($(feature)))
+      .select("__feature__name__", "__feature__value__", "__feature__importance__")
+  }
+
+  private def collectDiscreteValues[_](df: DataFrame, feature: String, topNValues: Int): Array[_] = {
+    val values = df
+      .groupBy(col(feature))
+      .agg(count("*").as("__feature__count__"))
+      .orderBy(col("__feature__count__").desc)
+      .head(topNValues)
+      .map(row => row.get(0))
+    values
+  }
+
+  private def collectSplits(df: DataFrame, feature: String, nSplits: Int,
+                            rangeMin: Option[Double], rangeMax: Option[Double]): Array[Double] = {
+    def createNSplits(n: Int)(from: Double, to: Double): Seq[Double] = {
+      (0 to n) map {
+        i => (to - from) / n * i + from
+      }
+    }
+
+    val featureCol = df.schema(feature)
+
+    val createSplits = createNSplits(nSplits) _
+
+    val values = if (rangeMin.isDefined && rangeMax.isDefined) {
+      val (mi, ma) = (rangeMin.get, rangeMax.get)
+
+      // The ranges are defined
+      featureCol.dataType match {
+        case _@(ByteType | IntegerType | LongType | ShortType) =>
+          if (ma.toLong - mi.toLong <= nSplits) {
+            // For integral types, no need to create more splits than needed.
+            (mi.toLong to ma.toLong) map (_.toDouble)
+          } else {
+            createSplits(mi, ma)
+          }
+        case _ =>
+          createSplits(mi, ma)
+      }
+    } else {
+      // The ranges need to be calculated from background dataset.
+      featureCol.dataType match {
+        case _@(ByteType | IntegerType | LongType | ShortType) =>
+          val Row(minValue: Long, maxValue: Long) = df
+            .agg(min(col(feature)).cast(LongType), max(col(feature)).cast(LongType))
+            .head
+
+          val mi = rangeMin.map(_.toLong).getOrElse(minValue)
+          val ma = rangeMax.map(_.toLong).getOrElse(maxValue)
+
+          if (ma - mi <= nSplits) {
+            // For integral types, no need to create more splits than needed.
+            (mi to ma) map (_.toDouble)
+          } else {
+            createSplits(mi, ma)
+          }
+        case _ =>
+          val Row(minValue: Double, maxValue: Double) = df
+            .agg(min(col(feature)).cast(DoubleType), max(col(feature)).cast(DoubleType))
+            .head
+
+          val mi = rangeMin.getOrElse(minValue)
+          val ma = rangeMax.getOrElse(maxValue)
+          createSplits(mi, ma)
+      }
+    }
+    values.toArray
+  }
+
+  override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
+
+  override def transformSchema(schema: StructType): StructType = {
+    this.validateSchema(schema)
+    schema.add(getOutputCol, ArrayType(VectorType))
+  }
+}
diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala
index dd1578fb75..4c3bd98c20 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala
@@ -31,38 +31,6 @@ trait LocalExplainer
   }
 
   protected def preprocess(df: DataFrame): DataFrame = df
-
-  /**
-   * This function supports a variety of target column types:
-   * - NumericType: in the case of a regression model
-   * - VectorType: in the case of a typical Spark ML classification model with probability output
-   * - ArrayType(NumericType): in the case where the output was converted to an array of numeric types.
-   * - MapType(IntegerType, NumericType): this is to support ZipMap type of output for sklearn models via ONNX runtime.
-   */
-  private[explainers] def extractTarget(schema: StructType, targetClassesCol: String): Column = {
-    val toVector = UDFUtils.oldUdf(
-      (values: Seq[Double]) => Vectors.dense(values.toArray),
-      VectorType
-    )
-
-    val target = schema(getTargetCol).dataType match {
-      case _: NumericType =>
-        toVector(array(col(getTargetCol)))
-      case VectorType =>
-        SlicerFunctions.vectorSlicer(col(getTargetCol), col(targetClassesCol))
-      case ArrayType(elementType: NumericType, _) =>
-        SlicerFunctions.arraySlicer(elementType)(col(getTargetCol), col(targetClassesCol))
-      case MapType(_: IntegerType, valueType: NumericType, _) =>
-        SlicerFunctions.mapSlicer(valueType)(col(getTargetCol), col(targetClassesCol))
-      case other =>
-        throw new IllegalArgumentException(
-          s"Only numeric types, vector type, array of numeric types and map types with numeric value type " +
-            s"are supported as target column. The current type is $other."
-        )
-    }
-
-    target
-  }
 }
 
 object LocalExplainer {
diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala
index 5502cfa35b..f8205157fd 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala
@@ -3,9 +3,14 @@
 
 package com.microsoft.ml.spark.explainers
 
+import com.microsoft.ml.spark.core.utils.SlicerFunctions
+import org.apache.spark.injections.UDFUtils
 import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
+import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param._
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.{array, col}
+import org.apache.spark.sql.{Column, DataFrame}
 import org.apache.spark.sql.types._
 
 trait CanValidateSchema {
@@ -149,4 +154,37 @@ trait HasExplainTarget extends Params with CanValidateSchema {
   }
 
   setDefault(targetClasses -> Array.empty[Int])
+
+  /**
+    * This function supports a variety of target column types:
+    * - NumericType: in the case of a regression model
+    * - VectorType: in the case of a typical Spark ML classification model with probability output
+    * - ArrayType(NumericType): in the case where the output was converted to an array of numeric types.
+    * - MapType(IntegerType, NumericType): this is to support ZipMap type of output for sklearn models via ONNX runtime.
+    */
+
+  private[explainers] def extractTarget(schema: StructType, targetClassesCol: String): Column = {
+    val toVector = UDFUtils.oldUdf(
+      (values: Seq[Double]) => Vectors.dense(values.toArray),
+      VectorType
+    )
+
+    val target = schema(getTargetCol).dataType match {
+      case _: NumericType =>
+        toVector(array(col(getTargetCol)))
+      case VectorType =>
+        SlicerFunctions.vectorSlicer(col(getTargetCol), col(targetClassesCol))
+      case ArrayType(elementType: NumericType, _) =>
+        SlicerFunctions.arraySlicer(elementType)(col(getTargetCol), col(targetClassesCol))
+      case MapType(_: IntegerType, valueType: NumericType, _) =>
+        SlicerFunctions.mapSlicer(valueType)(col(getTargetCol), col(targetClassesCol))
+      case other =>
+        throw new IllegalArgumentException(
+          s"Only numeric types, vector type, array of numeric types and map types with numeric value type " +
+            s"are supported as target column. The current type is $other."
+        )
+    }
+
+    target
+  }
 }
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
new file mode 100644
index 0000000000..e1165253bd
--- /dev/null
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -0,0 +1,72 @@
+package com.microsoft.ml.spark.explainers.split1
+
+import org.apache.spark.ml.{Pipeline, PipelineModel}
+import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler}
+import com.microsoft.ml.spark.core.test.base.TestBase
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions._
+import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
+import com.microsoft.ml.spark.explainers.ICETransformer
+
+
+class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
+
+  import spark.implicits._
+  val data: DataFrame = (1 to 100).flatMap(_ => Seq(
+    (-5d, "a", -5d, 0),
+    (-5d, "b", -5d, 0),
+    (5d, "a", 5d, 1),
+    (5d, "b", 5d, 1)
+  )).toDF("col1", "col2", "col3", "label")
+
+  val new_data = data.withColumn("col4", rand()*100)
+
+  new_data.show()
+
+  val pipeline: Pipeline = new Pipeline().setStages(Array(
+    new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
+    new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
+    new VectorAssembler().setInputCols(Array("col1", "col2_enc", "col3", "col4")).setOutputCol("features"),
+    new LogisticRegression().setLabelCol("label").setFeaturesCol("features")
+  ))
+
+
+  val model: PipelineModel = pipeline.fit(new_data)
+
+  val ice = new ICETransformer()
+
+  ice.setModel(model).setOutputCol("iceValues").setTargetCol("probability").setFeature("col1")
+    .setTargetClasses(Array(1))
+
+
+  val output = ice.transform(new_data)
+  output.show()
+
+  val iceCon = new ICETransformer()
+
+  iceCon.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setFeature("col4")
+    .setFeatureType("continuous")
+    .setTargetClasses(Array(1))
+
+  val outputCon = iceCon.transform(new_data)
+  outputCon.show()
+
+
+  val iceCon1 = new ICETransformer()
+
+  iceCon1.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setFeature("col4")
+    .setFeatureType("continuous")
+    .setRangeMin(0.0)
+    .setRangeMax(100.0)
+    .setTargetClasses(Array(1))
+
+  val outputCon1 = iceCon.transform(new_data)
+  outputCon1.show()
+
+}

From 151ef9926d230dca238d6caf2257f8de732d0c03 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Tue, 21 Sep 2021 11:28:31 -0700
Subject: [PATCH 02/32] Apply suggestions

---
 .../ml/spark/explainers/ICEExplainer.scala    | 58 +++++++++++++------
 .../explainers/split1/ICEExplainerSuite.scala |  2 +
 2 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 4c479d139b..96531b4bcf 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -12,37 +12,65 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.stat.Summarizer
 
 
-trait ICEFeatureParams extends Params {
-  val feature = new Param[String] (this, "feature", "Feature to explain")
+trait ICEFeatureParams extends Params with HasNumSamples {
+  val feature = new Param[String] (
+    this,
+    "feature",
+    "Feature to explain"
+  )
   def getFeature: String = $(feature)
   def setFeature(value: String): this.type = set(feature, value)
 
-  val featureType = new Param[String] (this, "featureType", "Type of feature to explain")
+  val featureType = new Param[String] (
+    this,
+    "featureType",
+    "Type of feature to explain",
+    ParamValidators.inArray(Array("discrete", "continuous"))
+  )
   def getFeatureType: String = $(featureType)
   def setFeatureType(value: String): this.type = set(featureType, value)
 
-  val topNValues = new IntParam (this, "topNValues", "topNValues")
+  val topNValues = new IntParam (
+    this,
+    "topNValues",
+    "topNValues",
+    ParamValidators.gt(0)
+  )
   def getTopNValues: Int = $(topNValues)
   def setTopNValues(value: Int): this.type = set(topNValues, value)
 
-  val nSplits = new IntParam (this, "nSplits", "nSplits")
+  val nSplits = new IntParam (
+    this,
+    "nSplits",
+    "nSplits",
+    ParamValidators.gt(0)
+  )
   def getNSplits: Int = $(nSplits)
   def setNSplits(value: Int): this.type = set(nSplits, value)
 
-  val rangeMax = new DoubleParam(this, "rangeMax", "rangeMax")
+  val rangeMax = new DoubleParam(
+    this,
+    "rangeMax",
+    "rangeMax",
+    ParamValidators.gtEq(0.0)
+  )
   def getRangeMax: Double = $(rangeMax)
   def setRangeMax(value: Double): this.type = set(rangeMax, value)
 
-  val rangeMin = new DoubleParam(this, "rangeMin", "rangeMin")
+  val rangeMin = new DoubleParam(
+    this,
+    "rangeMin",
+    "rangeMin",
+    ParamValidators.gtEq(0.0)
+  )
   def getRangeMin: Double = $(rangeMin)
   def setRangeMin(value: Double): this.type = set(rangeMin, value)
 
-  setDefault(featureType -> "discrete", topNValues -> 100, nSplits -> 20)
+  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, nSplits -> 20)
 
 }
 
 class ICETransformer(override val uid: String) extends Transformer
-  with HasNumSamples
   with HasExplainTarget
   with HasModel
   with ICEFeatureParams
@@ -57,25 +85,20 @@ class ICETransformer(override val uid: String) extends Transformer
     this(Identifiable.randomUID("ICETransformer"))
   }
 
-  def transform(instances: Dataset[_]): DataFrame = {
+  def transform(ds: Dataset[_]): DataFrame = {
 
-    val df = instances.toDF
-    val idCol = DatasetExtensions.findUnusedColumnName("id", df)
+    val df = ds.toDF
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
     val dfWithId = df
-      .withColumn(idCol, monotonically_increasing_id())
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
+    transformSchema(df.schema)
 
     val values = $(featureType).toLowerCase match {
       case "discrete" =>
         collectDiscreteValues(dfWithId, $(feature), $(topNValues))
       case "continuous" =>
         collectSplits(dfWithId, $(feature), $(nSplits), get(rangeMin), get(rangeMax))
-      case other =>
-        throw new IllegalArgumentException(
-          s"The feature type can only be 'discrete' or 'continuous'. Instead it is set to '$other'"
-        )
     }
 
     val dataType = dfWithId.schema($(feature)).dataType
@@ -165,6 +188,7 @@ class ICETransformer(override val uid: String) extends Transformer
   override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
 
   override def transformSchema(schema: StructType): StructType = {
+    assert(!schema.fieldNames.contains(feature.name), s"The schema does not contain column ${feature.name}")
     this.validateSchema(schema)
     schema.add(getOutputCol, ArrayType(VectorType))
   }
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index e1165253bd..c1b916cc4d 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -69,4 +69,6 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   val outputCon1 = iceCon.transform(new_data)
   outputCon1.show()
 
+  println("Finished")
+
 }

From b47d4103e023339782046eedb0463d420d0be016 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 23 Sep 2021 15:15:45 -0700
Subject: [PATCH 03/32] Added ICE

---
 .../ml/spark/explainers/ICEExplainer.scala    | 64 +++++++++++++------
 .../explainers/split1/ICEExplainerSuite.scala | 25 ++++++--
 2 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 96531b4bcf..2ef92f464d 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -3,12 +3,11 @@ import com.microsoft.ml.spark.core.contracts.HasOutputCol
 import com.microsoft.ml.spark.core.schema.DatasetExtensions
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
-import org.apache.spark.ml.param.{DoubleParam, IntParam, ParamMap, Params}
+import org.apache.spark.ml.param.{DoubleParam, IntParam, ParamMap, ParamValidators, Params, _}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.ml.param._
 import org.apache.spark.ml.stat.Summarizer
 
 
@@ -66,7 +65,16 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getRangeMin: Double = $(rangeMin)
   def setRangeMin(value: Double): this.type = set(rangeMin, value)
 
-  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, nSplits -> 20)
+  val kind = new Param[String] (
+    this,
+    "kind",
+    "pdp or ice",
+    ParamValidators.inArray(Array("average", "individual"))
+  )
+  def getKind: String = $(kind)
+  def setKind(value: String): this.type = set(kind, value)
+
+  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, nSplits -> 20, kind -> "individual")
 
 }
 
@@ -88,55 +96,72 @@ class ICETransformer(override val uid: String) extends Transformer
   def transform(ds: Dataset[_]): DataFrame = {
 
     val df = ds.toDF
+    val idCol = DatasetExtensions.findUnusedColumnName("id", df)
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
     val dfWithId = df
+      .withColumn(idCol, monotonically_increasing_id())
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
     transformSchema(df.schema)
+    val feature = this.getFeature
 
-    val values = $(featureType).toLowerCase match {
+    val values = getFeatureType.toLowerCase match {
       case "discrete" =>
-        collectDiscreteValues(dfWithId, $(feature), $(topNValues))
+        collectDiscreteValues(dfWithId)
       case "continuous" =>
-        collectSplits(dfWithId, $(feature), $(nSplits), get(rangeMin), get(rangeMax))
+        collectSplits(dfWithId, get(rangeMin), get(rangeMax))
     }
 
-    val dataType = dfWithId.schema($(feature)).dataType
+    val dataType = dfWithId.schema(feature).dataType
     val explodeFunc = explode(array(values.map(v => lit(v).cast(dataType)): _*))
 
-    val predicted = getModel.transform(dfWithId.withColumn($(feature), explodeFunc))
+    val predicted = getModel.transform(dfWithId.withColumn(feature, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
 
     val explainTarget = extractTarget(predicted.schema, targetClasses)
     val result = predicted.withColumn(targetCol, explainTarget)
 
-    result
-      .groupBy($(feature))
-      .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
-      .withColumnRenamed($(feature), "__feature__value__")
-      .withColumn("__feature__name__", lit($(feature)))
-      .select("__feature__name__", "__feature__value__", "__feature__importance__")
+    //result.show()
+
+    getKind.toLowerCase match {
+      case "average" =>
+        result
+            .groupBy(feature)
+            .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
+            .withColumnRenamed(feature, "__feature__value__")
+            .withColumn("__feature__name__", lit(feature))
+            .select("__feature__name__", "__feature__value__", "__feature__importance__")
+      case "individual" =>
+        // storing as a map feature -> target value
+        result.groupBy("id")
+          .agg(collect_list(feature).alias("feature_list"), collect_list(targetCol).alias("target_list"))
+          .withColumn("__feature__importance__", map_from_arrays(col("feature_list"), col("target_list")))
+          .select(idCol, "__feature__importance__")
+          .orderBy(idCol)
+
+    }
   }
 
-  private def collectDiscreteValues[_](df: DataFrame, feature: String, topNValues: Int): Array[_] = {
+  private def collectDiscreteValues[_](df: DataFrame): Array[_] = {
     val values = df
-      .groupBy(col(feature))
+      .groupBy(col(getFeature))
       .agg(count("*").as("__feature__count__"))
       .orderBy(col("__feature__count__").desc)
-      .head(topNValues)
+      .head(getTopNValues)
       .map(row => row.get(0))
     values
   }
 
-  private def collectSplits(df: DataFrame, feature: String, nSplits: Int,
-                            rangeMin: Option[Double], rangeMax: Option[Double]): Array[Double] = {
+  private def collectSplits(df: DataFrame, rangeMin: Option[Double], rangeMax: Option[Double]): Array[Double] = {
     def createNSplits(n: Int)(from: Double, to: Double): Seq[Double] = {
       (0 to n) map {
         i => (to - from) / n * i + from
       }
     }
 
+    val feature = getFeature
     val featureCol = df.schema(feature)
+    val nSplits = getNSplits
 
     val createSplits = createNSplits(nSplits) _
 
@@ -182,6 +207,7 @@ class ICETransformer(override val uid: String) extends Transformer
           createSplits(mi, ma)
       }
     }
+
     values.toArray
   }
 
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index c1b916cc4d..bc4b7dfec0 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -40,7 +40,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
 
   val output = ice.transform(new_data)
-  output.show()
+  output.show(false)
 
   val iceCon = new ICETransformer()
 
@@ -52,7 +52,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     .setTargetClasses(Array(1))
 
   val outputCon = iceCon.transform(new_data)
-  outputCon.show()
+  outputCon.show(false)
 
 
   val iceCon1 = new ICETransformer()
@@ -66,9 +66,24 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     .setRangeMax(100.0)
     .setTargetClasses(Array(1))
 
-  val outputCon1 = iceCon.transform(new_data)
-  outputCon1.show()
+  val outputCon1 = iceCon1.transform(new_data)
+  outputCon1.show(false)
 
-  println("Finished")
+
+  val pdp = new ICETransformer()
+
+  pdp.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setFeature("col4")
+    .setFeatureType("continuous")
+    .setRangeMin(0.0)
+    .setRangeMax(100.0)
+    .setNSplits(3)
+    .setTargetClasses(Array(1))
+    .setKind("average")
+
+  val pdpOutput = pdp.transform(new_data)
+  pdpOutput.show(false)
 
 }

From 7d701100888deb2acf3ef43b01941fba9a1bcba9 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Mon, 4 Oct 2021 15:51:38 -0700
Subject: [PATCH 04/32] Apply suggestions and fix

---
 .../ml/spark/explainers/ICEExplainer.scala    | 70 ++++++++++---------
 .../explainers/split1/ICEExplainerSuite.scala | 69 ++++++++++--------
 2 files changed, 76 insertions(+), 63 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 2ef92f464d..7e708af037 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -15,7 +15,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   val feature = new Param[String] (
     this,
     "feature",
-    "Feature to explain"
+    "The feature to explain."
   )
   def getFeature: String = $(feature)
   def setFeature(value: String): this.type = set(feature, value)
@@ -23,7 +23,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   val featureType = new Param[String] (
     this,
     "featureType",
-    "Type of feature to explain",
+    "Whether the feature is discrete or continuous.",
     ParamValidators.inArray(Array("discrete", "continuous"))
   )
   def getFeatureType: String = $(featureType)
@@ -32,7 +32,8 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   val topNValues = new IntParam (
     this,
     "topNValues",
-    "topNValues",
+    "At most how many discrete values do we collect for discrete features. " +
+      "The features are ranked by occurrence in descending order.",
     ParamValidators.gt(0)
   )
   def getTopNValues: Int = $(topNValues)
@@ -41,7 +42,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   val nSplits = new IntParam (
     this,
     "nSplits",
-    "nSplits",
+    "How many ways to split the value range for continuous feature.",
     ParamValidators.gt(0)
   )
   def getNSplits: Int = $(nSplits)
@@ -50,31 +51,45 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   val rangeMax = new DoubleParam(
     this,
     "rangeMax",
-    "rangeMax",
+    "Specifies the max value of the range for continuous features. " +
+      "If not specified, it will be computed from the background dataset.",
     ParamValidators.gtEq(0.0)
   )
-  def getRangeMax: Double = $(rangeMax)
+  def getRangeMax: Option[Double] = get(rangeMax)
   def setRangeMax(value: Double): this.type = set(rangeMax, value)
 
   val rangeMin = new DoubleParam(
     this,
     "rangeMin",
-    "rangeMin",
+    "Specifies the min value of the range for continuous features. " +
+      "If not specified, it will be computed from the background dataset.",
     ParamValidators.gtEq(0.0)
   )
-  def getRangeMin: Double = $(rangeMin)
+  def getRangeMin: Option[Double] = get(rangeMin)
   def setRangeMin(value: Double): this.type = set(rangeMin, value)
 
   val kind = new Param[String] (
     this,
     "kind",
-    "pdp or ice",
+    "Whether to return the partial dependence averaged across all the samples in the dataset or one line per sample.",
     ParamValidators.inArray(Array("average", "individual"))
   )
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, nSplits -> 20, kind -> "individual")
+  def setDiscreteFeature(feature: String, topN: Int): this.type = {
+    this.setFeature(feature).setFeatureType("discrete").setTopNValues(topN)
+  }
+
+  def setContinuousFeature(feature: String, nSplits: Int,
+                           rangeMin: Option[Double] = None,
+                           rangeMax: Option[Double] = None): this.type = {
+    rangeMin.foreach(this.setRangeMin)
+    rangeMax.foreach(this.setRangeMax)
+    this.setFeature(feature).setFeatureType("continuous").setNSplits(nSplits)
+  }
+
+  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, kind -> "individual")
 
 }
 
@@ -84,11 +99,6 @@ class ICETransformer(override val uid: String) extends Transformer
   with ICEFeatureParams
   with HasOutputCol {
 
-  /* transform:
-         1) gives feature values 
-         2) individual series plots 
-
-    */
   def this() = {
     this(Identifiable.randomUID("ICETransformer"))
   }
@@ -96,7 +106,7 @@ class ICETransformer(override val uid: String) extends Transformer
   def transform(ds: Dataset[_]): DataFrame = {
 
     val df = ds.toDF
-    val idCol = DatasetExtensions.findUnusedColumnName("id", df)
+    val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
     val dfWithId = df
       .withColumn(idCol, monotonically_increasing_id())
@@ -109,20 +119,19 @@ class ICETransformer(override val uid: String) extends Transformer
       case "discrete" =>
         collectDiscreteValues(dfWithId)
       case "continuous" =>
-        collectSplits(dfWithId, get(rangeMin), get(rangeMax))
+        collectSplits(dfWithId)
     }
 
     val dataType = dfWithId.schema(feature).dataType
-    val explodeFunc = explode(array(values.map(v => lit(v).cast(dataType)): _*))
+    val explodeFunc = explode(array(values.map(v => lit(v)): _*).cast(ArrayType(dataType)))
 
-    val predicted = getModel.transform(dfWithId.withColumn(feature, explodeFunc))
+    val sampled = dfWithId.orderBy(rand()).limit(getNumSamples).cache()
+    val predicted = getModel.transform(sampled.withColumn(feature, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
 
     val explainTarget = extractTarget(predicted.schema, targetClasses)
     val result = predicted.withColumn(targetCol, explainTarget)
 
-    //result.show()
-
     getKind.toLowerCase match {
       case "average" =>
         result
@@ -133,12 +142,11 @@ class ICETransformer(override val uid: String) extends Transformer
             .select("__feature__name__", "__feature__value__", "__feature__importance__")
       case "individual" =>
         // storing as a map feature -> target value
-        result.groupBy("id")
+        val iceFeatures = result.groupBy("idCol")
           .agg(collect_list(feature).alias("feature_list"), collect_list(targetCol).alias("target_list"))
           .withColumn("__feature__importance__", map_from_arrays(col("feature_list"), col("target_list")))
           .select(idCol, "__feature__importance__")
-          .orderBy(idCol)
-
+        sampled.join(iceFeatures, idCol)
     }
   }
 
@@ -152,22 +160,20 @@ class ICETransformer(override val uid: String) extends Transformer
     values
   }
 
-  private def collectSplits(df: DataFrame, rangeMin: Option[Double], rangeMax: Option[Double]): Array[Double] = {
-    def createNSplits(n: Int)(from: Double, to: Double): Seq[Double] = {
-      (0 to n) map {
-        i => (to - from) / n * i + from
-      }
+  private def createNSplits(n: Int)(from: Double, to: Double): Seq[Double] = {
+    (0 to n) map {
+      i => (to - from) / n * i + from
     }
+  }
 
-    val feature = getFeature
+  private def collectSplits(df: DataFrame): Array[Double] = {
+    val (feature, nSplits, rangeMin, rangeMax) = (getFeature, getNSplits, getRangeMin, getRangeMax)
     val featureCol = df.schema(feature)
-    val nSplits = getNSplits
 
     val createSplits = createNSplits(nSplits) _
 
     val values = if (rangeMin.isDefined && rangeMax.isDefined) {
       val (mi, ma) = (rangeMin.get, rangeMax.get)
-
       // The ranges are defined
       featureCol.dataType match {
         case _@(ByteType | IntegerType | LongType | ShortType) =>
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index bc4b7dfec0..8b09fc1ebd 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -12,16 +12,15 @@ import com.microsoft.ml.spark.explainers.ICETransformer
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
 
   import spark.implicits._
-  val data: DataFrame = (1 to 100).flatMap(_ => Seq(
+  val dataDF: DataFrame = (1 to 100).flatMap(_ => Seq(
     (-5d, "a", -5d, 0),
     (-5d, "b", -5d, 0),
     (5d, "a", 5d, 1),
     (5d, "b", 5d, 1)
   )).toDF("col1", "col2", "col3", "label")
 
-  val new_data = data.withColumn("col4", rand()*100)
-
-  new_data.show()
+  val data: DataFrame = dataDF.withColumn("col4", rand()*100)
+  data.show()
 
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
@@ -29,61 +28,69 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     new VectorAssembler().setInputCols(Array("col1", "col2_enc", "col3", "col4")).setOutputCol("features"),
     new LogisticRegression().setLabelCol("label").setFeaturesCol("features")
   ))
+  val model: PipelineModel = pipeline.fit(data)
 
 
-  val model: PipelineModel = pipeline.fit(new_data)
-
   val ice = new ICETransformer()
-
-  ice.setModel(model).setOutputCol("iceValues").setTargetCol("probability").setFeature("col1")
+  ice.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setFeature("col1")
     .setTargetClasses(Array(1))
-
-
-  val output = ice.transform(new_data)
+  val output: DataFrame = ice.transform(data)
   output.show(false)
 
   val iceCon = new ICETransformer()
-
   iceCon.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setFeature("col4")
-    .setFeatureType("continuous")
+    .setContinuousFeature(feature = "col4", nSplits = 20)
     .setTargetClasses(Array(1))
-
-  val outputCon = iceCon.transform(new_data)
+  val outputCon: DataFrame = iceCon.transform(data)
   outputCon.show(false)
 
 
   val iceCon1 = new ICETransformer()
-
   iceCon1.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setFeature("col4")
-    .setFeatureType("continuous")
-    .setRangeMin(0.0)
-    .setRangeMax(100.0)
+    .setContinuousFeature(
+      feature = "col4",
+      nSplits = 20,
+      rangeMin = Some(0.0),
+      rangeMax = Some(100.0)
+    )
     .setTargetClasses(Array(1))
-
-  val outputCon1 = iceCon1.transform(new_data)
+  val outputCon1: DataFrame = iceCon1.transform(data)
   outputCon1.show(false)
 
 
   val pdp = new ICETransformer()
-
   pdp.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setFeature("col4")
-    .setFeatureType("continuous")
-    .setRangeMin(0.0)
-    .setRangeMax(100.0)
-    .setNSplits(3)
+    .setContinuousFeature(
+      feature =  "col4",
+      nSplits = 3,
+      rangeMin = Some(0.0),
+      rangeMax = Some(100.0)
+    )
     .setTargetClasses(Array(1))
     .setKind("average")
-
-  val pdpOutput = pdp.transform(new_data)
+  val pdpOutput: DataFrame = pdp.transform(data)
   pdpOutput.show(false)
 
+  val pdpDisc = new ICETransformer()
+  pdpDisc.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setDiscreteFeature(
+      feature = "col4",
+      topN = 2
+    )
+    .setTargetClasses(Array(1))
+    .setKind("average")
+  val pdpOutputDisc: DataFrame = pdpDisc.transform(data)
+  pdpOutputDisc.show(false)
+
 }

From f5049e303a1ad56dfa6edb1c520e2b6221dd9d63 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 15 Oct 2021 17:37:01 -0700
Subject: [PATCH 05/32] Added discrete

---
 .../ml/spark/explainers/ICEExplainer.scala    | 259 +++++++++++-------
 .../explainers/split1/ICEExplainerSuite.scala | 122 +++++----
 2 files changed, 229 insertions(+), 152 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 7e708af037..101852b3e8 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -9,64 +9,103 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
+import spray.json.{JsValue, JsonFormat, JsNumber, JsString, JsObject}
 
+case class DiscreteFeature(name: String, numTopValue: Int) {
+  def validate: Boolean = {
+    numTopValue > 0
+  }
+}
 
-trait ICEFeatureParams extends Params with HasNumSamples {
-  val feature = new Param[String] (
-    this,
-    "feature",
-    "The feature to explain."
-  )
-  def getFeature: String = $(feature)
-  def setFeature(value: String): this.type = set(feature, value)
+object DiscreteFeature {
+  implicit val JsonFormat: JsonFormat[DiscreteFeature] = new JsonFormat[DiscreteFeature] {
+    override def read(json: JsValue): DiscreteFeature = {
+      val fields = json.asJsObject.fields
+      val name = fields("name") match {
+        case JsString(value) => value
+        case _ => throw new Exception("The name field must be a JsString.")
+      }
+      val numTopValues = fields("numTopValues") match {
+        case JsNumber(value) => value.toInt
+        case _ => throw new Exception("The numTopValues field must be a JsNumber.")
+      }
 
-  val featureType = new Param[String] (
-    this,
-    "featureType",
-    "Whether the feature is discrete or continuous.",
-    ParamValidators.inArray(Array("discrete", "continuous"))
-  )
-  def getFeatureType: String = $(featureType)
-  def setFeatureType(value: String): this.type = set(featureType, value)
+      DiscreteFeature(name, numTopValues)
 
-  val topNValues = new IntParam (
-    this,
-    "topNValues",
-    "At most how many discrete values do we collect for discrete features. " +
-      "The features are ranked by occurrence in descending order.",
-    ParamValidators.gt(0)
-  )
-  def getTopNValues: Int = $(topNValues)
-  def setTopNValues(value: Int): this.type = set(topNValues, value)
+    }
+    override def write(obj: DiscreteFeature): JsValue = {
+      JsObject(Map("name" -> JsString(obj.name), "numTopValues" -> JsNumber(obj.numTopValue)))
+    }
+  }
+}
 
-  val nSplits = new IntParam (
-    this,
-    "nSplits",
-    "How many ways to split the value range for continuous feature.",
-    ParamValidators.gt(0)
-  )
-  def getNSplits: Int = $(nSplits)
-  def setNSplits(value: Int): this.type = set(nSplits, value)
+case class ContinuousFeature(name: String, numSplits: Option[Int], rangeMin: Option[Double], rangeMax: Option[Double]) {
+  def validate: Boolean = {
+    (numSplits.isEmpty || numSplits.get > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
+  }
+}
 
-  val rangeMax = new DoubleParam(
+object ContinuousFeature {
+  implicit val JsonFormat: JsonFormat[ContinuousFeature] = new JsonFormat[ContinuousFeature] {
+    override def read(json: JsValue): ContinuousFeature = {
+      val fields = json.asJsObject.fields
+      val name = fields("name") match {
+        case JsString(value) => value
+        case _ => throw new Exception("The name field must be a JsString.")
+      }
+      val numSplits = fields.get("numSplits").map {
+        case JsNumber(value) => value.toInt
+        case _ => 10
+      }
+//      val numSplits = fields("numSplits") match {
+//        case JsNumber(value) => value.toInt
+//        case _ => throw new Exception("The numSplits field must be a JsNumber.")
+//      }
+      val rangeMin = fields.get("rangeMin").map {
+        case JsNumber(value) => value.toDouble
+      }
+
+      val rangeMax = fields.get("rangeMax").map {
+        case JsNumber(value) => value.toDouble
+      }
+
+      ContinuousFeature(name, numSplits, rangeMin, rangeMax)
+
+    }
+
+    override def write(obj: ContinuousFeature): JsValue = {
+      val map = Map("name" -> JsString(obj.name))++
+        obj.numSplits.map("numSplits" -> JsNumber(_))++
+       // "numSplits" -> JsNumber(obj.numSplits))++
+        obj.rangeMin.map("rangeMin" -> JsNumber(_))++
+        obj.rangeMax.map("rangeMax" -> JsNumber(_))
+      JsObject(map)
+    }
+  }
+}
+
+
+trait ICEFeatureParams extends Params with HasNumSamples {
+
+  val discreteFeatures = new TypedArrayParam[DiscreteFeature] (
     this,
-    "rangeMax",
-    "Specifies the max value of the range for continuous features. " +
-      "If not specified, it will be computed from the background dataset.",
-    ParamValidators.gtEq(0.0)
+    "discreteFeatures",
+    "The list of discrete features to explain.",
+    {_.forall(_.validate)}
   )
-  def getRangeMax: Option[Double] = get(rangeMax)
-  def setRangeMax(value: Double): this.type = set(rangeMax, value)
 
-  val rangeMin = new DoubleParam(
+  def setDiscreteFeatures(values: Seq[DiscreteFeature]): this.type = this.set(discreteFeatures, values)
+  def getDiscreteFeatures: Seq[DiscreteFeature] = $(discreteFeatures)
+
+  val continuousFeatures = new TypedArrayParam[ContinuousFeature] (
     this,
-    "rangeMin",
-    "Specifies the min value of the range for continuous features. " +
-      "If not specified, it will be computed from the background dataset.",
-    ParamValidators.gtEq(0.0)
+    "continuousFeatures",
+    "The list of continuous features to explain.",
+    {_.forall(_.validate)}
   )
-  def getRangeMin: Option[Double] = get(rangeMin)
-  def setRangeMin(value: Double): this.type = set(rangeMin, value)
+
+  def setContinuousFeatures(values: Seq[ContinuousFeature]): this.type = this.set(continuousFeatures, values)
+  def getContinuousFeatures: Seq[ContinuousFeature] = $(continuousFeatures)
 
   val kind = new Param[String] (
     this,
@@ -77,19 +116,21 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-  def setDiscreteFeature(feature: String, topN: Int): this.type = {
-    this.setFeature(feature).setFeatureType("discrete").setTopNValues(topN)
-  }
+//  def setDiscreteFeature(feature: String, topN: Int): this.type = {
+//    this.setFeature(feature).setFeatureType("discrete").setTopNValues(topN)
+//  }
+//
+//  def setContinuousFeature(feature: String, nSplits: Int,
+//                           rangeMin: Option[Double] = None,
+//                           rangeMax: Option[Double] = None): this.type = {
+//    rangeMin.foreach(this.setRangeMin)
+//    rangeMax.foreach(this.setRangeMax)
+//    this.setFeature(feature).setFeatureType("continuous").setNSplits(nSplits)
+//  }
 
-  def setContinuousFeature(feature: String, nSplits: Int,
-                           rangeMin: Option[Double] = None,
-                           rangeMax: Option[Double] = None): this.type = {
-    rangeMin.foreach(this.setRangeMin)
-    rangeMax.foreach(this.setRangeMax)
-    this.setFeature(feature).setFeatureType("continuous").setNSplits(nSplits)
-  }
+  setDefault(numSamples -> 1000, kind -> "individual")
 
-  setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, kind -> "individual")
+  //setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, kind -> "individual")
 
 }
 
@@ -103,59 +144,83 @@ class ICETransformer(override val uid: String) extends Transformer
     this(Identifiable.randomUID("ICETransformer"))
   }
 
-  def transform(ds: Dataset[_]): DataFrame = {
-
-    val df = ds.toDF
-    val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
-    val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
-    val dfWithId = df
-      .withColumn(idCol, monotonically_increasing_id())
-      .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
-
-    transformSchema(df.schema)
-    val feature = this.getFeature
+  def processDiscreteFeature(sampledTotal: DataFrame, idCol: String, targetClassesColumn: String,
+                             feature: DiscreteFeature, values: Array[_]): DataFrame = {
 
-    val values = getFeatureType.toLowerCase match {
-      case "discrete" =>
-        collectDiscreteValues(dfWithId)
-      case "continuous" =>
-        collectSplits(dfWithId)
-    }
+    val sampled = sampledTotal.limit(feature.numTopValue).cache()
 
-    val dataType = dfWithId.schema(feature).dataType
+    val dataType = sampled.schema(feature.name).dataType
     val explodeFunc = explode(array(values.map(v => lit(v)): _*).cast(ArrayType(dataType)))
 
-    val sampled = dfWithId.orderBy(rand()).limit(getNumSamples).cache()
-    val predicted = getModel.transform(sampled.withColumn(feature, explodeFunc))
+    val predicted = getModel.transform(sampled.withColumn(feature.name, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
 
-    val explainTarget = extractTarget(predicted.schema, targetClasses)
+    val explainTarget = extractTarget(predicted.schema, targetClassesColumn)
     val result = predicted.withColumn(targetCol, explainTarget)
 
+    val featImpName = feature.name + "__imp"
+
+    result.show()
+
     getKind.toLowerCase match {
       case "average" =>
         result
-            .groupBy(feature)
-            .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
-            .withColumnRenamed(feature, "__feature__value__")
-            .withColumn("__feature__name__", lit(feature))
-            .select("__feature__name__", "__feature__value__", "__feature__importance__")
+          .groupBy(feature.name)
+          .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
+          //.withColumnRenamed(feature.name, "__feature__value__")
+          .withColumn(featImpName, lit(feature.name))
+          .select(featImpName, "__feature__importance__")
       case "individual" =>
         // storing as a map feature -> target value
         val iceFeatures = result.groupBy("idCol")
-          .agg(collect_list(feature).alias("feature_list"), collect_list(targetCol).alias("target_list"))
-          .withColumn("__feature__importance__", map_from_arrays(col("feature_list"), col("target_list")))
-          .select(idCol, "__feature__importance__")
-        sampled.join(iceFeatures, idCol)
+          .agg(collect_list(feature.name).alias("feature_list"), collect_list(targetCol).alias("target_list"))
+          .withColumn(featImpName, map_from_arrays(col("feature_list"), col("target_list")))
+          .select(idCol, featImpName)
+        //sampled.join(iceFeatures, idCol)
+      iceFeatures.select(idCol, featImpName)
+    }
+  }
+
+
+
+  def transform(ds: Dataset[_]): DataFrame = {
+
+    val df = ds.toDF
+    val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
+    val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
+    val dfWithId = df
+      .withColumn(idCol, monotonically_increasing_id())
+      .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
+    transformSchema(df.schema)
+
+    // collect feature values for all features from original fataset - dfWithId
+    val discreteFeatures = this.getDiscreteFeatures
+
+    val collectedFeatureValues: Map[DiscreteFeature, Array[_]] = discreteFeatures.map{
+      feature => (feature, collectDiscreteValues(dfWithId, feature))
+    }.toMap
+
+    val sampled = dfWithId.orderBy(rand())
+
+    val processFunc: DiscreteFeature => DataFrame = {
+      f: DiscreteFeature =>
+        processDiscreteFeature(sampled, idCol, targetClasses, f, collectedFeatureValues(f))
     }
+        val stage1 = discreteFeatures map (processFunc)
+
+        val stage2: DataFrame =
+          stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.join(currDF, Seq(idCol), "inner"))
+
+        sampled.join(stage2, idCol).drop(idCol)
+
   }
 
-  private def collectDiscreteValues[_](df: DataFrame): Array[_] = {
+  private def collectDiscreteValues[_](df: DataFrame, feature: DiscreteFeature): Array[_] = {
     val values = df
-      .groupBy(col(getFeature))
+      .groupBy(col(feature.name))
       .agg(count("*").as("__feature__count__"))
       .orderBy(col("__feature__count__").desc)
-      .head(getTopNValues)
+      .head(feature.numTopValue)
       .map(row => row.get(0))
     values
   }
@@ -166,18 +231,19 @@ class ICETransformer(override val uid: String) extends Transformer
     }
   }
 
-  private def collectSplits(df: DataFrame): Array[Double] = {
-    val (feature, nSplits, rangeMin, rangeMax) = (getFeature, getNSplits, getRangeMin, getRangeMax)
+  private def collectSplits(df: DataFrame, continuousFeature: ContinuousFeature): Array[Double] = {
+    val (feature, nSplits, rangeMin, rangeMax) = (continuousFeature.name, continuousFeature.numSplits,
+      continuousFeature.rangeMin, continuousFeature.rangeMax)
     val featureCol = df.schema(feature)
 
-    val createSplits = createNSplits(nSplits) _
+    val createSplits = createNSplits(nSplits.get) _
 
     val values = if (rangeMin.isDefined && rangeMax.isDefined) {
       val (mi, ma) = (rangeMin.get, rangeMax.get)
       // The ranges are defined
       featureCol.dataType match {
         case _@(ByteType | IntegerType | LongType | ShortType) =>
-          if (ma.toLong - mi.toLong <= nSplits) {
+          if (ma.toLong - mi.toLong <= nSplits.get) {
             // For integral types, no need to create more splits than needed.
             (mi.toLong to ma.toLong) map (_.toDouble)
           } else {
@@ -197,7 +263,7 @@ class ICETransformer(override val uid: String) extends Transformer
           val mi = rangeMin.map(_.toLong).getOrElse(minValue)
           val ma = rangeMax.map(_.toLong).getOrElse(maxValue)
 
-          if (ma - mi <= nSplits) {
+          if (ma - mi <= nSplits.get) {
             // For integral types, no need to create more splits than needed.
             (mi to ma) map (_.toDouble)
           } else {
@@ -220,7 +286,6 @@ class ICETransformer(override val uid: String) extends Transformer
   override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
 
   override def transformSchema(schema: StructType): StructType = {
-    assert(!schema.fieldNames.contains(feature.name), s"The schema does not contain column ${feature.name}")
     this.validateSchema(schema)
     schema.add(getOutputCol, ArrayType(VectorType))
   }
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index 8b09fc1ebd..8effeea49a 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -6,7 +6,7 @@ import com.microsoft.ml.spark.core.test.base.TestBase
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
-import com.microsoft.ml.spark.explainers.ICETransformer
+import com.microsoft.ml.spark.explainers.{DiscreteFeature, ICETransformer}
 
 
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
@@ -31,66 +31,78 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   val model: PipelineModel = pipeline.fit(data)
 
 
-  val ice = new ICETransformer()
-  ice.setModel(model)
-    .setOutputCol("iceValues")
-    .setTargetCol("probability")
-    .setFeature("col1")
-    .setTargetClasses(Array(1))
-  val output: DataFrame = ice.transform(data)
-  output.show(false)
-
-  val iceCon = new ICETransformer()
-  iceCon.setModel(model)
-    .setOutputCol("iceValues")
-    .setTargetCol("probability")
-    .setContinuousFeature(feature = "col4", nSplits = 20)
-    .setTargetClasses(Array(1))
-  val outputCon: DataFrame = iceCon.transform(data)
-  outputCon.show(false)
 
+//  val ice = new ICETransformer()
+//  ice.setModel(model)
+//    .setOutputCol("iceValues")
+//    .setTargetCol("probability")
+//    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
+//    .setTargetClasses(Array(1))
+//  val output: DataFrame = ice.transform(data)
+//  output.show(false)
 
-  val iceCon1 = new ICETransformer()
-  iceCon1.setModel(model)
+  val iceAvg = new ICETransformer()
+  iceAvg.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setContinuousFeature(
-      feature = "col4",
-      nSplits = 20,
-      rangeMin = Some(0.0),
-      rangeMax = Some(100.0)
-    )
-    .setTargetClasses(Array(1))
-  val outputCon1: DataFrame = iceCon1.transform(data)
-  outputCon1.show(false)
-
-
-  val pdp = new ICETransformer()
-  pdp.setModel(model)
-    .setOutputCol("iceValues")
-    .setTargetCol("probability")
-    .setContinuousFeature(
-      feature =  "col4",
-      nSplits = 3,
-      rangeMin = Some(0.0),
-      rangeMax = Some(100.0)
-    )
+    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
     .setTargetClasses(Array(1))
     .setKind("average")
-  val pdpOutput: DataFrame = pdp.transform(data)
-  pdpOutput.show(false)
+  val outputAvg: DataFrame = iceAvg.transform(data)
+  outputAvg.show(false)
 
-  val pdpDisc = new ICETransformer()
-  pdpDisc.setModel(model)
-    .setOutputCol("iceValues")
-    .setTargetCol("probability")
-    .setDiscreteFeature(
-      feature = "col4",
-      topN = 2
-    )
-    .setTargetClasses(Array(1))
-    .setKind("average")
-  val pdpOutputDisc: DataFrame = pdpDisc.transform(data)
-  pdpOutputDisc.show(false)
+
+//  val iceCon = new ICETransformer()
+//  iceCon.setModel(model)
+//    .setOutputCol("iceValues")
+//    .setTargetCol("probability")
+//    .setContinuousFeature(feature = "col4", nSplits = 20)
+//    .setTargetClasses(Array(1))
+//  val outputCon: DataFrame = iceCon.transform(data)
+//  outputCon.show(false)
+//
+//
+//  val iceCon1 = new ICETransformer()
+//  iceCon1.setModel(model)
+//    .setOutputCol("iceValues")
+//    .setTargetCol("probability")
+//    .setContinuousFeature(
+//      feature = "col4",
+//      nSplits = 20,
+//      rangeMin = Some(0.0),
+//      rangeMax = Some(100.0)
+//    )
+//    .setTargetClasses(Array(1))
+//  val outputCon1: DataFrame = iceCon1.transform(data)
+//  outputCon1.show(false)
+//
+//
+//  val pdp = new ICETransformer()
+//  pdp.setModel(model)
+//    .setOutputCol("iceValues")
+//    .setTargetCol("probability")
+//    .setContinuousFeature(
+//      feature =  "col4",
+//      nSplits = 3,
+//      rangeMin = Some(0.0),
+//      rangeMax = Some(100.0)
+//    )
+//    .setTargetClasses(Array(1))
+//    .setKind("average")
+//  val pdpOutput: DataFrame = pdp.transform(data)
+//  pdpOutput.show(false)
+//
+//  val pdpDisc = new ICETransformer()
+//  pdpDisc.setModel(model)
+//    .setOutputCol("iceValues")
+//    .setTargetCol("probability")
+//    .setDiscreteFeature(
+//      feature = "col4",
+//      topN = 2
+//    )
+//    .setTargetClasses(Array(1))
+//    .setKind("average")
+//  val pdpOutputDisc: DataFrame = pdpDisc.transform(data)
+//  pdpOutputDisc.show(false)
 
 }

From e6e985e2380f3f1f76651d092bd9aaef32289289 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Mon, 18 Oct 2021 19:34:50 -0700
Subject: [PATCH 06/32] Added logic for discrete features

---
 .../ml/spark/explainers/ICEExplainer.scala    | 43 ++++++-----
 .../explainers/split1/ICEExplainerSuite.scala | 73 +++----------------
 2 files changed, 30 insertions(+), 86 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 101852b3e8..77ad96eb1d 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -53,6 +53,9 @@ object ContinuousFeature {
         case JsString(value) => value
         case _ => throw new Exception("The name field must be a JsString.")
       }
+
+      // I don't know how to pass default value. If I make Option, I need to specify it anyway.
+
       val numSplits = fields.get("numSplits").map {
         case JsNumber(value) => value.toInt
         case _ => 10
@@ -116,22 +119,8 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-//  def setDiscreteFeature(feature: String, topN: Int): this.type = {
-//    this.setFeature(feature).setFeatureType("discrete").setTopNValues(topN)
-//  }
-//
-//  def setContinuousFeature(feature: String, nSplits: Int,
-//                           rangeMin: Option[Double] = None,
-//                           rangeMax: Option[Double] = None): this.type = {
-//    rangeMin.foreach(this.setRangeMin)
-//    rangeMax.foreach(this.setRangeMax)
-//    this.setFeature(feature).setFeatureType("continuous").setNSplits(nSplits)
-//  }
-
   setDefault(numSamples -> 1000, kind -> "individual")
 
-  //setDefault(numSamples -> 1000, featureType -> "discrete", topNValues -> 100, kind -> "individual")
-
 }
 
 class ICETransformer(override val uid: String) extends Transformer
@@ -160,23 +149,21 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val featImpName = feature.name + "__imp"
 
-    result.show()
 
     getKind.toLowerCase match {
       case "average" =>
         result
           .groupBy(feature.name)
           .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
-          //.withColumnRenamed(feature.name, "__feature__value__")
-          .withColumn(featImpName, lit(feature.name))
-          .select(featImpName, "__feature__importance__")
+          .agg(collect_list(feature.name).alias("feature_value_list"),
+            collect_list("__feature__importance__").alias("feature_imp_list"))
+          .withColumn(featImpName, map_from_arrays(col("feature_value_list"), col("feature_imp_list")))
+          .select(featImpName)
       case "individual" =>
-        // storing as a map feature -> target value
         val iceFeatures = result.groupBy("idCol")
           .agg(collect_list(feature.name).alias("feature_list"), collect_list(targetCol).alias("target_list"))
           .withColumn(featImpName, map_from_arrays(col("feature_list"), col("target_list")))
           .select(idCol, featImpName)
-        //sampled.join(iceFeatures, idCol)
       iceFeatures.select(idCol, featImpName)
     }
   }
@@ -193,8 +180,9 @@ class ICETransformer(override val uid: String) extends Transformer
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
     transformSchema(df.schema)
 
-    // collect feature values for all features from original fataset - dfWithId
+    // collect feature values for all features from original dataset - dfWithId
     val discreteFeatures = this.getDiscreteFeatures
+    //val continuousFeature = this.getContinuousFeatures
 
     val collectedFeatureValues: Map[DiscreteFeature, Array[_]] = discreteFeatures.map{
       feature => (feature, collectDiscreteValues(dfWithId, feature))
@@ -206,13 +194,24 @@ class ICETransformer(override val uid: String) extends Transformer
       f: DiscreteFeature =>
         processDiscreteFeature(sampled, idCol, targetClasses, f, collectedFeatureValues(f))
     }
-        val stage1 = discreteFeatures map (processFunc)
 
+    val stage1 = discreteFeatures map (processFunc)
+
+    // I don't know how it's better to handle this 2 cases. For pdp we don't have idCol
+    // and also don't merge it with input data
+
+    getKind.toLowerCase match {
+      case "individual" =>
         val stage2: DataFrame =
           stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.join(currDF, Seq(idCol), "inner"))
 
         sampled.join(stage2, idCol).drop(idCol)
 
+      case "average" =>
+        val stage2: DataFrame = stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.crossJoin(currDF))
+        stage2
+    }
+
   }
 
   private def collectDiscreteValues[_](df: DataFrame, feature: DiscreteFeature): Array[_] = {
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index 8effeea49a..973482706d 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -20,7 +20,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   )).toDF("col1", "col2", "col3", "label")
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
-  data.show()
+
 
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
@@ -31,15 +31,14 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   val model: PipelineModel = pipeline.fit(data)
 
 
-
-//  val ice = new ICETransformer()
-//  ice.setModel(model)
-//    .setOutputCol("iceValues")
-//    .setTargetCol("probability")
-//    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
-//    .setTargetClasses(Array(1))
-//  val output: DataFrame = ice.transform(data)
-//  output.show(false)
+  val ice = new ICETransformer()
+  ice.setModel(model)
+    .setOutputCol("iceValues")
+    .setTargetCol("probability")
+    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
+    .setTargetClasses(Array(1))
+  val output: DataFrame = ice.transform(data)
+  output.show(false)
 
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
@@ -51,58 +50,4 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   val outputAvg: DataFrame = iceAvg.transform(data)
   outputAvg.show(false)
 
-
-//  val iceCon = new ICETransformer()
-//  iceCon.setModel(model)
-//    .setOutputCol("iceValues")
-//    .setTargetCol("probability")
-//    .setContinuousFeature(feature = "col4", nSplits = 20)
-//    .setTargetClasses(Array(1))
-//  val outputCon: DataFrame = iceCon.transform(data)
-//  outputCon.show(false)
-//
-//
-//  val iceCon1 = new ICETransformer()
-//  iceCon1.setModel(model)
-//    .setOutputCol("iceValues")
-//    .setTargetCol("probability")
-//    .setContinuousFeature(
-//      feature = "col4",
-//      nSplits = 20,
-//      rangeMin = Some(0.0),
-//      rangeMax = Some(100.0)
-//    )
-//    .setTargetClasses(Array(1))
-//  val outputCon1: DataFrame = iceCon1.transform(data)
-//  outputCon1.show(false)
-//
-//
-//  val pdp = new ICETransformer()
-//  pdp.setModel(model)
-//    .setOutputCol("iceValues")
-//    .setTargetCol("probability")
-//    .setContinuousFeature(
-//      feature =  "col4",
-//      nSplits = 3,
-//      rangeMin = Some(0.0),
-//      rangeMax = Some(100.0)
-//    )
-//    .setTargetClasses(Array(1))
-//    .setKind("average")
-//  val pdpOutput: DataFrame = pdp.transform(data)
-//  pdpOutput.show(false)
-//
-//  val pdpDisc = new ICETransformer()
-//  pdpDisc.setModel(model)
-//    .setOutputCol("iceValues")
-//    .setTargetCol("probability")
-//    .setDiscreteFeature(
-//      feature = "col4",
-//      topN = 2
-//    )
-//    .setTargetClasses(Array(1))
-//    .setKind("average")
-//  val pdpOutputDisc: DataFrame = pdpDisc.transform(data)
-//  pdpOutputDisc.show(false)
-
 }

From a23df5ca53f192cc0efa642ffbc72cf95ffc2ac3 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Wed, 20 Oct 2021 10:43:35 -0700
Subject: [PATCH 07/32] New logic (without unit tests)

---
 .../ml/spark/explainers/ICEExplainer.scala    | 165 ++++++++++--------
 .../explainers/split1/ICEExplainerSuite.scala |   9 +-
 2 files changed, 94 insertions(+), 80 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 77ad96eb1d..16e2c41056 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -3,7 +3,7 @@ import com.microsoft.ml.spark.core.contracts.HasOutputCol
 import com.microsoft.ml.spark.core.schema.DatasetExtensions
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
-import org.apache.spark.ml.param.{DoubleParam, IntParam, ParamMap, ParamValidators, Params, _}
+import org.apache.spark.ml.param.{ParamMap, ParamValidators, Params, _}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -11,59 +11,67 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import spray.json.{JsValue, JsonFormat, JsNumber, JsString, JsObject}
 
-case class DiscreteFeature(name: String, numTopValue: Int) {
+case class CategoricalFeature(name: String, numTopValues: Option[Int] = None) {
   def validate: Boolean = {
-    numTopValue > 0
+    numTopValues.forall(_ > 0)
+  }
+
+  private val defaultNumTopValue = 100
+  def getNumTopValue: Int = {
+    this.numTopValues.getOrElse(defaultNumTopValue)
   }
 }
 
-object DiscreteFeature {
-  implicit val JsonFormat: JsonFormat[DiscreteFeature] = new JsonFormat[DiscreteFeature] {
-    override def read(json: JsValue): DiscreteFeature = {
+object CategoricalFeature {
+  implicit val JsonFormat: JsonFormat[CategoricalFeature] = new JsonFormat[CategoricalFeature] {
+    override def read(json: JsValue): CategoricalFeature = {
       val fields = json.asJsObject.fields
       val name = fields("name") match {
         case JsString(value) => value
         case _ => throw new Exception("The name field must be a JsString.")
       }
-      val numTopValues = fields("numTopValues") match {
-        case JsNumber(value) => value.toInt
-        case _ => throw new Exception("The numTopValues field must be a JsNumber.")
+      val numTopValues = fields.get("numTopValues") match {
+        case Some(JsNumber(value)) => Some(value.toInt)
+        case _ => None
       }
 
-      DiscreteFeature(name, numTopValues)
+      CategoricalFeature(name, numTopValues)
 
     }
-    override def write(obj: DiscreteFeature): JsValue = {
-      JsObject(Map("name" -> JsString(obj.name), "numTopValues" -> JsNumber(obj.numTopValue)))
+    override def write(obj: CategoricalFeature): JsValue = {
+      val map = Map("name" -> JsString(obj.name))++
+        obj.numTopValues.map("numTopValues" -> JsNumber(_))
+      JsObject(map)
     }
   }
 }
 
-case class ContinuousFeature(name: String, numSplits: Option[Int], rangeMin: Option[Double], rangeMax: Option[Double]) {
+case class NumericFeature(name: String, numSplits: Option[Int] = None,
+                          rangeMin: Option[Double] = None, rangeMax: Option[Double] = None) {
   def validate: Boolean = {
-    (numSplits.isEmpty || numSplits.get > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
+    numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
+  }
+
+  private val defaultNumSplits = 10
+  def getNumSplits: Int = {
+    this.numSplits.getOrElse(defaultNumSplits)
   }
 }
 
-object ContinuousFeature {
-  implicit val JsonFormat: JsonFormat[ContinuousFeature] = new JsonFormat[ContinuousFeature] {
-    override def read(json: JsValue): ContinuousFeature = {
+object NumericFeature {
+  implicit val JsonFormat: JsonFormat[NumericFeature] = new JsonFormat[NumericFeature] {
+    override def read(json: JsValue): NumericFeature = {
       val fields = json.asJsObject.fields
       val name = fields("name") match {
         case JsString(value) => value
         case _ => throw new Exception("The name field must be a JsString.")
       }
 
-      // I don't know how to pass default value. If I make Option, I need to specify it anyway.
-
-      val numSplits = fields.get("numSplits").map {
-        case JsNumber(value) => value.toInt
-        case _ => 10
+      val numSplits = fields.get("numSplits") match {
+        case Some(JsNumber(value)) => Some(value.toInt)
+        case _ => None
       }
-//      val numSplits = fields("numSplits") match {
-//        case JsNumber(value) => value.toInt
-//        case _ => throw new Exception("The numSplits field must be a JsNumber.")
-//      }
+
       val rangeMin = fields.get("rangeMin").map {
         case JsNumber(value) => value.toDouble
       }
@@ -72,14 +80,13 @@ object ContinuousFeature {
         case JsNumber(value) => value.toDouble
       }
 
-      ContinuousFeature(name, numSplits, rangeMin, rangeMax)
+      NumericFeature(name, numSplits, rangeMin, rangeMax)
 
     }
 
-    override def write(obj: ContinuousFeature): JsValue = {
+    override def write(obj: NumericFeature): JsValue = {
       val map = Map("name" -> JsString(obj.name))++
         obj.numSplits.map("numSplits" -> JsNumber(_))++
-       // "numSplits" -> JsNumber(obj.numSplits))++
         obj.rangeMin.map("rangeMin" -> JsNumber(_))++
         obj.rangeMax.map("rangeMax" -> JsNumber(_))
       JsObject(map)
@@ -90,37 +97,38 @@ object ContinuousFeature {
 
 trait ICEFeatureParams extends Params with HasNumSamples {
 
-  val discreteFeatures = new TypedArrayParam[DiscreteFeature] (
+  val categoricalFeatures = new TypedArrayParam[CategoricalFeature] (
     this,
-    "discreteFeatures",
-    "The list of discrete features to explain.",
+    "categoricalFeatures",
+    "The list of categorical features to explain.",
     {_.forall(_.validate)}
   )
 
-  def setDiscreteFeatures(values: Seq[DiscreteFeature]): this.type = this.set(discreteFeatures, values)
-  def getDiscreteFeatures: Seq[DiscreteFeature] = $(discreteFeatures)
+  def setCategoricalFeatures(values: Seq[CategoricalFeature]): this.type = this.set(categoricalFeatures, values)
+  def getCategoricalFeatures: Seq[CategoricalFeature] = $(categoricalFeatures)
 
-  val continuousFeatures = new TypedArrayParam[ContinuousFeature] (
+  val numericFeatures = new TypedArrayParam[NumericFeature] (
     this,
-    "continuousFeatures",
-    "The list of continuous features to explain.",
+    "numericFeatures",
+    "The list of numeric features to explain.",
     {_.forall(_.validate)}
   )
 
-  def setContinuousFeatures(values: Seq[ContinuousFeature]): this.type = this.set(continuousFeatures, values)
-  def getContinuousFeatures: Seq[ContinuousFeature] = $(continuousFeatures)
+  def setNumericFeatures(values: Seq[NumericFeature]): this.type = this.set(numericFeatures, values)
+  def getNumericFeatures: Seq[NumericFeature] = $(numericFeatures)
 
   val kind = new Param[String] (
     this,
     "kind",
-    "Whether to return the partial dependence averaged across all the samples in the dataset or one line per sample.",
+    "Whether to return the partial dependence averaged across all the samples in the " +
+      "dataset or individual feature importance per sample.",
     ParamValidators.inArray(Array("average", "individual"))
   )
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-  setDefault(numSamples -> 1000, kind -> "individual")
-
+  setDefault(kind -> "individual", numericFeatures -> Seq.empty[NumericFeature],
+    categoricalFeatures -> Seq.empty[CategoricalFeature])
 }
 
 class ICETransformer(override val uid: String) extends Transformer
@@ -133,35 +141,34 @@ class ICETransformer(override val uid: String) extends Transformer
     this(Identifiable.randomUID("ICETransformer"))
   }
 
-  def processDiscreteFeature(sampledTotal: DataFrame, idCol: String, targetClassesColumn: String,
-                             feature: DiscreteFeature, values: Array[_]): DataFrame = {
+  def processFeature(sampled: DataFrame, idCol: String, targetClassesColumn: String,
+                                feature: String, values: Array[_]): DataFrame = {
 
-    val sampled = sampledTotal.limit(feature.numTopValue).cache()
-
-    val dataType = sampled.schema(feature.name).dataType
+    val dataType = sampled.schema(feature).dataType
     val explodeFunc = explode(array(values.map(v => lit(v)): _*).cast(ArrayType(dataType)))
 
-    val predicted = getModel.transform(sampled.withColumn(feature.name, explodeFunc))
+    val predicted = getModel.transform(sampled.withColumn(feature, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
 
     val explainTarget = extractTarget(predicted.schema, targetClassesColumn)
     val result = predicted.withColumn(targetCol, explainTarget)
 
-    val featImpName = feature.name + "__imp"
-
+    val featImpName = feature + "__imp"
 
+    // output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
     getKind.toLowerCase match {
       case "average" =>
         result
-          .groupBy(feature.name)
+          .groupBy(feature)
           .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
-          .agg(collect_list(feature.name).alias("feature_value_list"),
+          .agg(collect_list(feature).alias("feature_value_list"),
             collect_list("__feature__importance__").alias("feature_imp_list"))
           .withColumn(featImpName, map_from_arrays(col("feature_value_list"), col("feature_imp_list")))
           .select(featImpName)
+      // output schema: rows * (cols + 1) (ice for the given feature: array(feature_value -> explanations))
       case "individual" =>
-        val iceFeatures = result.groupBy("idCol")
-          .agg(collect_list(feature.name).alias("feature_list"), collect_list(targetCol).alias("target_list"))
+        val iceFeatures = result.groupBy(idCol)
+          .agg(collect_list(feature).alias("feature_list"), collect_list(targetCol).alias("target_list"))
           .withColumn(featImpName, map_from_arrays(col("feature_list"), col("target_list")))
           .select(idCol, featImpName)
       iceFeatures.select(idCol, featImpName)
@@ -169,9 +176,7 @@ class ICETransformer(override val uid: String) extends Transformer
   }
 
 
-
   def transform(ds: Dataset[_]): DataFrame = {
-
     val df = ds.toDF
     val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
@@ -181,24 +186,32 @@ class ICETransformer(override val uid: String) extends Transformer
     transformSchema(df.schema)
 
     // collect feature values for all features from original dataset - dfWithId
-    val discreteFeatures = this.getDiscreteFeatures
-    //val continuousFeature = this.getContinuousFeatures
+    val categoricalFeatures = this.getCategoricalFeatures
+    val numericFeatures = this.getNumericFeatures
 
-    val collectedFeatureValues: Map[DiscreteFeature, Array[_]] = discreteFeatures.map{
-      feature => (feature, collectDiscreteValues(dfWithId, feature))
+    val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
+      feature => (feature.name, collectCategoricalValues(dfWithId, feature))
+    }.toMap
+    
+    val collectedNumFeatureValues: Map[String, Array[_]] = numericFeatures.map {
+      feature => (feature.name, collectSplits(dfWithId, feature))
     }.toMap
 
-    val sampled = dfWithId.orderBy(rand())
+    val sampled = this.get(numSamples).map {
+      s => dfWithId.orderBy(rand()).limit(s)
+    }.getOrElse(dfWithId).cache()
 
-    val processFunc: DiscreteFeature => DataFrame = {
-      f: DiscreteFeature =>
-        processDiscreteFeature(sampled, idCol, targetClasses, f, collectedFeatureValues(f))
+    val processCategoricalFunc: CategoricalFeature => DataFrame = {
+      f: CategoricalFeature =>
+        processFeature(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
     }
 
-    val stage1 = discreteFeatures map (processFunc)
+    val processNumericFunc: NumericFeature => DataFrame = {
+      f: NumericFeature =>
+        processFeature(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
+    }
 
-    // I don't know how it's better to handle this 2 cases. For pdp we don't have idCol
-    // and also don't merge it with input data
+    val stage1 = (categoricalFeatures map processCategoricalFunc) union (numericFeatures map processNumericFunc)
 
     getKind.toLowerCase match {
       case "individual" =>
@@ -211,15 +224,14 @@ class ICETransformer(override val uid: String) extends Transformer
         val stage2: DataFrame = stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.crossJoin(currDF))
         stage2
     }
-
   }
 
-  private def collectDiscreteValues[_](df: DataFrame, feature: DiscreteFeature): Array[_] = {
+  private def collectCategoricalValues[_](df: DataFrame, feature: CategoricalFeature): Array[_] = {
     val values = df
       .groupBy(col(feature.name))
       .agg(count("*").as("__feature__count__"))
       .orderBy(col("__feature__count__").desc)
-      .head(feature.numTopValue)
+      .head(feature.getNumTopValue)
       .map(row => row.get(0))
     values
   }
@@ -230,19 +242,19 @@ class ICETransformer(override val uid: String) extends Transformer
     }
   }
 
-  private def collectSplits(df: DataFrame, continuousFeature: ContinuousFeature): Array[Double] = {
-    val (feature, nSplits, rangeMin, rangeMax) = (continuousFeature.name, continuousFeature.numSplits,
-      continuousFeature.rangeMin, continuousFeature.rangeMax)
+  private def collectSplits(df: DataFrame, numericFeature: NumericFeature): Array[Double] = {
+    val (feature, nSplits, rangeMin, rangeMax) = (numericFeature.name, numericFeature.getNumSplits,
+      numericFeature.rangeMin, numericFeature.rangeMax)
     val featureCol = df.schema(feature)
 
-    val createSplits = createNSplits(nSplits.get) _
+    val createSplits = createNSplits(nSplits) _
 
     val values = if (rangeMin.isDefined && rangeMax.isDefined) {
       val (mi, ma) = (rangeMin.get, rangeMax.get)
       // The ranges are defined
       featureCol.dataType match {
         case _@(ByteType | IntegerType | LongType | ShortType) =>
-          if (ma.toLong - mi.toLong <= nSplits.get) {
+          if (ma.toLong - mi.toLong <= nSplits) {
             // For integral types, no need to create more splits than needed.
             (mi.toLong to ma.toLong) map (_.toDouble)
           } else {
@@ -262,7 +274,7 @@ class ICETransformer(override val uid: String) extends Transformer
           val mi = rangeMin.map(_.toLong).getOrElse(minValue)
           val ma = rangeMax.map(_.toLong).getOrElse(maxValue)
 
-          if (ma - mi <= nSplits.get) {
+          if (ma - mi <= nSplits) {
             // For integral types, no need to create more splits than needed.
             (mi to ma) map (_.toDouble)
           } else {
@@ -278,7 +290,6 @@ class ICETransformer(override val uid: String) extends Transformer
           createSplits(mi, ma)
       }
     }
-
     values.toArray
   }
 
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index 973482706d..81a36464a2 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -6,7 +6,7 @@ import com.microsoft.ml.spark.core.test.base.TestBase
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
-import com.microsoft.ml.spark.explainers.{DiscreteFeature, ICETransformer}
+import com.microsoft.ml.spark.explainers.{CategoricalFeature, ICETransformer, NumericFeature}
 
 
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
@@ -35,7 +35,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   ice.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
+    .setCategoricalFeatures(Array(CategoricalFeature("col1", Some(100)), CategoricalFeature("col4", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
   output.show(false)
@@ -44,10 +44,13 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   iceAvg.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setDiscreteFeatures(Array(DiscreteFeature("col1", 100), DiscreteFeature("col4", 4)))
+    .setCategoricalFeatures(Array(CategoricalFeature("col1", Some(100)), CategoricalFeature("col2")))
+    .setNumericFeatures(Array(NumericFeature("col4"), NumericFeature("col4", Some(3), Some(0.0), Some(100.0))))
     .setTargetClasses(Array(1))
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)
   outputAvg.show(false)
 
+
+
 }

From 43d16481b6fdbd10407430ff49819164078698c6 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@microsoft.com>
Date: Wed, 20 Oct 2021 15:31:59 -0700
Subject: [PATCH 08/32] WIP

---
 .../ml/spark/explainers/ICEExplainer.scala    | 168 +++++-------------
 .../ml/spark/explainers/ICEFeature.scala      |  41 +++++
 .../explainers/split1/ICEExplainerSuite.scala |   8 +-
 3 files changed, 93 insertions(+), 124 deletions(-)
 create mode 100644 core/src/main/scala/com/microsoft/ml/spark/explainers/ICEFeature.scala

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index 16e2c41056..a5c28061fd 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -9,126 +9,42 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
-import spray.json.{JsValue, JsonFormat, JsNumber, JsString, JsObject}
-
-case class CategoricalFeature(name: String, numTopValues: Option[Int] = None) {
-  def validate: Boolean = {
-    numTopValues.forall(_ > 0)
-  }
-
-  private val defaultNumTopValue = 100
-  def getNumTopValue: Int = {
-    this.numTopValues.getOrElse(defaultNumTopValue)
-  }
-}
-
-object CategoricalFeature {
-  implicit val JsonFormat: JsonFormat[CategoricalFeature] = new JsonFormat[CategoricalFeature] {
-    override def read(json: JsValue): CategoricalFeature = {
-      val fields = json.asJsObject.fields
-      val name = fields("name") match {
-        case JsString(value) => value
-        case _ => throw new Exception("The name field must be a JsString.")
-      }
-      val numTopValues = fields.get("numTopValues") match {
-        case Some(JsNumber(value)) => Some(value.toInt)
-        case _ => None
-      }
-
-      CategoricalFeature(name, numTopValues)
-
-    }
-    override def write(obj: CategoricalFeature): JsValue = {
-      val map = Map("name" -> JsString(obj.name))++
-        obj.numTopValues.map("numTopValues" -> JsNumber(_))
-      JsObject(map)
-    }
-  }
-}
-
-case class NumericFeature(name: String, numSplits: Option[Int] = None,
-                          rangeMin: Option[Double] = None, rangeMax: Option[Double] = None) {
-  def validate: Boolean = {
-    numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
-  }
-
-  private val defaultNumSplits = 10
-  def getNumSplits: Int = {
-    this.numSplits.getOrElse(defaultNumSplits)
-  }
-}
-
-object NumericFeature {
-  implicit val JsonFormat: JsonFormat[NumericFeature] = new JsonFormat[NumericFeature] {
-    override def read(json: JsValue): NumericFeature = {
-      val fields = json.asJsObject.fields
-      val name = fields("name") match {
-        case JsString(value) => value
-        case _ => throw new Exception("The name field must be a JsString.")
-      }
-
-      val numSplits = fields.get("numSplits") match {
-        case Some(JsNumber(value)) => Some(value.toInt)
-        case _ => None
-      }
-
-      val rangeMin = fields.get("rangeMin").map {
-        case JsNumber(value) => value.toDouble
-      }
-
-      val rangeMax = fields.get("rangeMax").map {
-        case JsNumber(value) => value.toDouble
-      }
-
-      NumericFeature(name, numSplits, rangeMin, rangeMax)
-
-    }
-
-    override def write(obj: NumericFeature): JsValue = {
-      val map = Map("name" -> JsString(obj.name))++
-        obj.numSplits.map("numSplits" -> JsNumber(_))++
-        obj.rangeMin.map("rangeMin" -> JsNumber(_))++
-        obj.rangeMax.map("rangeMax" -> JsNumber(_))
-      JsObject(map)
-    }
-  }
-}
-
 
 trait ICEFeatureParams extends Params with HasNumSamples {
-
-  val categoricalFeatures = new TypedArrayParam[CategoricalFeature] (
+  val categoricalFeatures = new TypedArrayParam[ICECategoricalFeature] (
     this,
     "categoricalFeatures",
     "The list of categorical features to explain.",
     {_.forall(_.validate)}
   )
 
-  def setCategoricalFeatures(values: Seq[CategoricalFeature]): this.type = this.set(categoricalFeatures, values)
-  def getCategoricalFeatures: Seq[CategoricalFeature] = $(categoricalFeatures)
+  def setCategoricalFeatures(values: Seq[ICECategoricalFeature]): this.type = this.set(categoricalFeatures, values)
+  def getCategoricalFeatures: Seq[ICECategoricalFeature] = $(categoricalFeatures)
 
-  val numericFeatures = new TypedArrayParam[NumericFeature] (
+  val numericFeatures = new TypedArrayParam[ICENumericFeature] (
     this,
     "numericFeatures",
     "The list of numeric features to explain.",
     {_.forall(_.validate)}
   )
 
-  def setNumericFeatures(values: Seq[NumericFeature]): this.type = this.set(numericFeatures, values)
-  def getNumericFeatures: Seq[NumericFeature] = $(numericFeatures)
+  def setNumericFeatures(values: Seq[ICENumericFeature]): this.type = this.set(numericFeatures, values)
+  def getNumericFeatures: Seq[ICENumericFeature] = $(numericFeatures)
 
   val kind = new Param[String] (
     this,
     "kind",
-    "Whether to return the partial dependence averaged across all the samples in the " +
-      "dataset or individual feature importance per sample.",
+    "Whether to return the partial dependence plot (PDP) averaged across all the samples in the " +
+      "dataset or individual feature importance (ICE) per sample. " +
+      "Allowed values are \"average\" for PDP and \"individual\" for ICE.",
     ParamValidators.inArray(Array("average", "individual"))
   )
+
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-  setDefault(kind -> "individual", numericFeatures -> Seq.empty[NumericFeature],
-    categoricalFeatures -> Seq.empty[CategoricalFeature])
+  setDefault(kind -> "individual", numericFeatures -> Seq.empty[ICENumericFeature],
+    categoricalFeatures -> Seq.empty[ICECategoricalFeature])
 }
 
 class ICETransformer(override val uid: String) extends Transformer
@@ -141,13 +57,13 @@ class ICETransformer(override val uid: String) extends Transformer
     this(Identifiable.randomUID("ICETransformer"))
   }
 
-  def processFeature(sampled: DataFrame, idCol: String, targetClassesColumn: String,
-                                feature: String, values: Array[_]): DataFrame = {
+  private def processFeature(df: DataFrame, idCol: String, targetClassesColumn: String,
+                             feature: String, values: Array[_]): DataFrame = {
 
-    val dataType = sampled.schema(feature).dataType
+    val dataType = df.schema(feature).dataType
     val explodeFunc = explode(array(values.map(v => lit(v)): _*).cast(ArrayType(dataType)))
 
-    val predicted = getModel.transform(sampled.withColumn(feature, explodeFunc))
+    val predicted = getModel.transform(df.withColumn(feature, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
 
     val explainTarget = extractTarget(predicted.schema, targetClassesColumn)
@@ -155,23 +71,31 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val featImpName = feature + "__imp"
 
-    // output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
     getKind.toLowerCase match {
       case "average" =>
+        // PDP output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
+
+        // TODO: define the temp string column names from DatasetExtensions.findUnusedColumnName
         result
           .groupBy(feature)
-          .agg(Summarizer.mean(col(targetCol)).alias("__feature__importance__"))
-          .agg(collect_list(feature).alias("feature_value_list"),
-            collect_list("__feature__importance__").alias("feature_imp_list"))
-          .withColumn(featImpName, map_from_arrays(col("feature_value_list"), col("feature_imp_list")))
-          .select(featImpName)
-      // output schema: rows * (cols + 1) (ice for the given feature: array(feature_value -> explanations))
+          .agg(Summarizer.mean(col(targetCol)).alias("__feature__dependence__"))
+          .agg(
+            map_from_arrays(
+              collect_list(feature),
+              collect_list("__feature__dependence__")
+            ).alias(feature)
+          )
+
       case "individual" =>
-        val iceFeatures = result.groupBy(idCol)
-          .agg(collect_list(feature).alias("feature_list"), collect_list(targetCol).alias("target_list"))
-          .withColumn(featImpName, map_from_arrays(col("feature_list"), col("target_list")))
-          .select(idCol, featImpName)
-      iceFeatures.select(idCol, featImpName)
+        // ICE output schema: n rows * 2 cols (idCol + ice for the given feature: map(feature_value -> explanations))
+        result
+          .groupBy(idCol)
+          .agg(
+            map_from_arrays(
+              collect_list(feature),
+              collect_list(targetCol)
+            ).alias(featImpName)
+          )
     }
   }
 
@@ -201,24 +125,28 @@ class ICETransformer(override val uid: String) extends Transformer
       s => dfWithId.orderBy(rand()).limit(s)
     }.getOrElse(dfWithId).cache()
 
-    val processCategoricalFunc: CategoricalFeature => DataFrame = {
-      f: CategoricalFeature =>
+    val processCategoricalFunc: ICECategoricalFeature => DataFrame = {
+      f: ICECategoricalFeature =>
         processFeature(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
     }
 
-    val processNumericFunc: NumericFeature => DataFrame = {
-      f: NumericFeature =>
+    val processNumericFunc: ICENumericFeature => DataFrame = {
+      f: ICENumericFeature =>
         processFeature(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
     }
 
-    val stage1 = (categoricalFeatures map processCategoricalFunc) union (numericFeatures map processNumericFunc)
+    val stage1 = (categoricalFeatures map processCategoricalFunc) ++ (numericFeatures map processNumericFunc)
 
     getKind.toLowerCase match {
       case "individual" =>
         val stage2: DataFrame =
           stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.join(currDF, Seq(idCol), "inner"))
 
-        sampled.join(stage2, idCol).drop(idCol)
+        val stage3 = (categoricalFeatures ++ numericFeatures).foldLeft(stage2){
+          case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dep")
+        }
+
+        sampled.join(stage3, idCol).drop(idCol)
 
       case "average" =>
         val stage2: DataFrame = stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.crossJoin(currDF))
@@ -226,7 +154,7 @@ class ICETransformer(override val uid: String) extends Transformer
     }
   }
 
-  private def collectCategoricalValues[_](df: DataFrame, feature: CategoricalFeature): Array[_] = {
+  private def collectCategoricalValues[_](df: DataFrame, feature: ICECategoricalFeature): Array[_] = {
     val values = df
       .groupBy(col(feature.name))
       .agg(count("*").as("__feature__count__"))
@@ -242,7 +170,7 @@ class ICETransformer(override val uid: String) extends Transformer
     }
   }
 
-  private def collectSplits(df: DataFrame, numericFeature: NumericFeature): Array[Double] = {
+  private def collectSplits(df: DataFrame, numericFeature: ICENumericFeature): Array[Double] = {
     val (feature, nSplits, rangeMin, rangeMax) = (numericFeature.name, numericFeature.getNumSplits,
       numericFeature.rangeMin, numericFeature.rangeMax)
     val featureCol = df.schema(feature)
diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEFeature.scala
new file mode 100644
index 0000000000..9249f05786
--- /dev/null
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEFeature.scala
@@ -0,0 +1,41 @@
+package com.microsoft.ml.spark.explainers
+
+import spray.json._
+import DefaultJsonProtocol._
+
+private[explainers] abstract class ICEFeature(val name: String) {
+  def validate: Boolean
+}
+
+case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
+  extends ICEFeature(name) {
+  override def validate: Boolean = {
+    numTopValues.forall(_ > 0)
+  }
+
+  private val defaultNumTopValue = 100
+  def getNumTopValue: Int = {
+    this.numTopValues.getOrElse(defaultNumTopValue)
+  }
+}
+
+object ICECategoricalFeature {
+  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = jsonFormat2(ICECategoricalFeature.apply)
+}
+
+case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
+                             rangeMin: Option[Double] = None, rangeMax: Option[Double] = None)
+  extends ICEFeature(name) {
+  override def validate: Boolean = {
+    numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
+  }
+
+  private val defaultNumSplits = 10
+  def getNumSplits: Int = {
+    this.numSplits.getOrElse(defaultNumSplits)
+  }
+}
+
+object ICENumericFeature {
+  implicit val JsonFormat: JsonFormat[ICENumericFeature] = jsonFormat4(ICENumericFeature.apply)
+}
\ No newline at end of file
diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
index 81a36464a2..fb202beac6 100644
--- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/ICEExplainerSuite.scala
@@ -6,7 +6,7 @@ import com.microsoft.ml.spark.core.test.base.TestBase
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
-import com.microsoft.ml.spark.explainers.{CategoricalFeature, ICETransformer, NumericFeature}
+import com.microsoft.ml.spark.explainers.{ICECategoricalFeature, ICETransformer, ICENumericFeature}
 
 
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
@@ -35,7 +35,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   ice.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setCategoricalFeatures(Array(CategoricalFeature("col1", Some(100)), CategoricalFeature("col4", Some(4))))
+    .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col4", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
   output.show(false)
@@ -44,8 +44,8 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   iceAvg.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setCategoricalFeatures(Array(CategoricalFeature("col1", Some(100)), CategoricalFeature("col2")))
-    .setNumericFeatures(Array(NumericFeature("col4"), NumericFeature("col4", Some(3), Some(0.0), Some(100.0))))
+    .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2")))
+    .setNumericFeatures(Array(ICENumericFeature("col4"), ICENumericFeature("col4", Some(3), Some(0.0), Some(100.0))))
     .setTargetClasses(Array(1))
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)

From 9b379e8b294c79f602a284706964ce0e03ca9ea8 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@microsoft.com>
Date: Wed, 20 Oct 2021 17:12:04 -0700
Subject: [PATCH 09/32] WIP

---
 .../ml/spark/explainers/ICEExplainer.scala    | 71 ++++++++++++-------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
index a5c28061fd..d7d8246e6e 100644
--- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/ml/spark/explainers/ICEExplainer.scala
@@ -11,6 +11,10 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 
 trait ICEFeatureParams extends Params with HasNumSamples {
+
+  val averageKind = "average"
+  val individualKind = "individual"
+
   val categoricalFeatures = new TypedArrayParam[ICECategoricalFeature] (
     this,
     "categoricalFeatures",
@@ -37,7 +41,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     "Whether to return the partial dependence plot (PDP) averaged across all the samples in the " +
       "dataset or individual feature importance (ICE) per sample. " +
       "Allowed values are \"average\" for PDP and \"individual\" for ICE.",
-    ParamValidators.inArray(Array("average", "individual"))
+    ParamValidators.inArray(Array(averageKind, individualKind))
   )
 
   def getKind: String = $(kind)
@@ -57,7 +61,7 @@ class ICETransformer(override val uid: String) extends Transformer
     this(Identifiable.randomUID("ICETransformer"))
   }
 
-  private def processFeature(df: DataFrame, idCol: String, targetClassesColumn: String,
+  private def calcDependence(df: DataFrame, idCol: String, targetClassesColumn: String,
                              feature: String, values: Array[_]): DataFrame = {
 
     val dataType = df.schema(feature).dataType
@@ -69,10 +73,8 @@ class ICETransformer(override val uid: String) extends Transformer
     val explainTarget = extractTarget(predicted.schema, targetClassesColumn)
     val result = predicted.withColumn(targetCol, explainTarget)
 
-    val featImpName = feature + "__imp"
-
     getKind.toLowerCase match {
-      case "average" =>
+      case super.averageKind =>
         // PDP output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
 
         // TODO: define the temp string column names from DatasetExtensions.findUnusedColumnName
@@ -86,7 +88,7 @@ class ICETransformer(override val uid: String) extends Transformer
             ).alias(feature)
           )
 
-      case "individual" =>
+      case super.individualKind =>
         // ICE output schema: n rows * 2 cols (idCol + ice for the given feature: map(feature_value -> explanations))
         result
           .groupBy(idCol)
@@ -94,25 +96,35 @@ class ICETransformer(override val uid: String) extends Transformer
             map_from_arrays(
               collect_list(feature),
               collect_list(targetCol)
-            ).alias(featImpName)
+            ).alias(feature)
           )
     }
   }
 
-
   def transform(ds: Dataset[_]): DataFrame = {
+    transformSchema(ds.schema)
+
     val df = ds.toDF
     val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
     val dfWithId = df
       .withColumn(idCol, monotonically_increasing_id())
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
-    transformSchema(df.schema)
+
 
     // collect feature values for all features from original dataset - dfWithId
     val categoricalFeatures = this.getCategoricalFeatures
     val numericFeatures = this.getNumericFeatures
 
+    // TODO: Move the check into transformSchema
+    // Check for duplicate feature specification
+    val featureNames = categoricalFeatures.map(_.name) ++ numericFeatures.map(_.name)
+
+    val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 0).keys.toArray
+    if (duplicateFeatureNames.nonEmpty) {
+      throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
+    }
+
     val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
       feature => (feature.name, collectCategoricalValues(dfWithId, feature))
     }.toMap
@@ -125,32 +137,37 @@ class ICETransformer(override val uid: String) extends Transformer
       s => dfWithId.orderBy(rand()).limit(s)
     }.getOrElse(dfWithId).cache()
 
-    val processCategoricalFunc: ICECategoricalFeature => DataFrame = {
+    val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
-        processFeature(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
+        calcDependence(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
     }
 
-    val processNumericFunc: ICENumericFeature => DataFrame = {
+    val calcNumericFunc: ICENumericFeature => DataFrame = {
       f: ICENumericFeature =>
-        processFeature(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
+        calcDependence(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
     }
 
-    val stage1 = (categoricalFeatures map processCategoricalFunc) ++ (numericFeatures map processNumericFunc)
+    val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
     getKind.toLowerCase match {
-      case "individual" =>
-        val stage2: DataFrame =
-          stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.join(currDF, Seq(idCol), "inner"))
-
-        val stage3 = (categoricalFeatures ++ numericFeatures).foldLeft(stage2){
-          case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dep")
-        }
-
-        sampled.join(stage3, idCol).drop(idCol)
-
-      case "average" =>
-        val stage2: DataFrame = stage1.tail.foldLeft(stage1.head)((accDF, currDF) => accDF.crossJoin(currDF))
-        stage2
+      case super.individualKind =>
+        dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
+          .map {
+            df =>
+              (categoricalFeatures ++ numericFeatures).foldLeft(df) {
+                case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
+              }
+          }
+          .map(sampled.join(_, idCol)).getOrElse(
+          throw new Exception("No categorical features or numeric features are set to the explainer. " +
+            "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
+        )
+
+      case super.averageKind =>
+        dependenceDfs.reduceOption(_ crossJoin _).getOrElse(
+          throw new Exception("No categorical features or numeric features are set to the explainer. " +
+            "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
+        )
     }
   }
 

From 5ba0bece255cf47125047bbdba88f581eb29ce1a Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 21 Oct 2021 17:35:40 -0700
Subject: [PATCH 10/32] small fix

---
 .../synapse/ml/explainers/ICEExplainer.scala  | 31 +++++++++----------
 .../synapse/ml/explainers/SharedParams.scala  |  2 +-
 .../explainers/split1/ICEExplainerSuite.scala |  6 ++--
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 18ac671c1c..55774c9946 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -70,26 +70,25 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val predicted = getModel.transform(df.withColumn(feature, explodeFunc))
     val targetCol = DatasetExtensions.findUnusedColumnName("target", predicted)
+    val dependenceCol = DatasetExtensions.findUnusedColumnName("feature__dependence", predicted)
 
     val explainTarget = extractTarget(predicted.schema, targetClassesColumn)
     val result = predicted.withColumn(targetCol, explainTarget)
 
     getKind.toLowerCase match {
-      case super.averageKind =>
+      case this.averageKind =>
         // PDP output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
-
-        // TODO: define the temp string column names from DatasetExtensions.findUnusedColumnName
         result
           .groupBy(feature)
-          .agg(Summarizer.mean(col(targetCol)).alias("__feature__dependence__"))
+          .agg(Summarizer.mean(col(targetCol)).alias(dependenceCol))
           .agg(
             map_from_arrays(
               collect_list(feature),
-              collect_list("__feature__dependence__")
+              collect_list(dependenceCol)
             ).alias(feature)
           )
 
-      case super.individualKind =>
+      case this.individualKind =>
         // ICE output schema: n rows * 2 cols (idCol + ice for the given feature: map(feature_value -> explanations))
         result
           .groupBy(idCol)
@@ -117,15 +116,6 @@ class ICETransformer(override val uid: String) extends Transformer
     val categoricalFeatures = this.getCategoricalFeatures
     val numericFeatures = this.getNumericFeatures
 
-    // TODO: Move the check into transformSchema
-    // Check for duplicate feature specification
-    val featureNames = categoricalFeatures.map(_.name) ++ numericFeatures.map(_.name)
-
-    val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 0).keys.toArray
-    if (duplicateFeatureNames.nonEmpty) {
-      throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
-    }
-
     val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
       feature => (feature.name, collectCategoricalValues(dfWithId, feature))
     }.toMap
@@ -151,7 +141,7 @@ class ICETransformer(override val uid: String) extends Transformer
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
     getKind.toLowerCase match {
-      case super.individualKind =>
+      case this.individualKind =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
           .map {
             df =>
@@ -164,7 +154,7 @@ class ICETransformer(override val uid: String) extends Transformer
             "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
         )
 
-      case super.averageKind =>
+      case this.averageKind =>
         dependenceDfs.reduceOption(_ crossJoin _).getOrElse(
           throw new Exception("No categorical features or numeric features are set to the explainer. " +
             "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
@@ -242,6 +232,13 @@ class ICETransformer(override val uid: String) extends Transformer
   override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
 
   override def transformSchema(schema: StructType): StructType = {
+    // Check for duplicate feature specification
+    val featureNames = getCategoricalFeatures.map(_.name) ++ getNumericFeatures.map(_.name)
+    val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 0).keys.toArray
+    if (duplicateFeatureNames.nonEmpty) {
+      throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
+    }
+
     this.validateSchema(schema)
     schema.add(getOutputCol, ArrayType(VectorType))
   }
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala
index a895115f42..3a3884baa5 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala
@@ -3,7 +3,7 @@
 
 package com.microsoft.azure.synapse.ml.explainers
 
-import com.microsoft.ml.spark.core.utils.SlicerFunctions
+import com.microsoft.azure.synapse.ml.core.utils.SlicerFunctions
 import org.apache.spark.injections.UDFUtils
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index fb202beac6..5296700edf 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -1,12 +1,12 @@
-package com.microsoft.ml.spark.explainers.split1
+package com.microsoft.azure.synapse.ml.explainers.split1
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler}
-import com.microsoft.ml.spark.core.test.base.TestBase
+import com.microsoft.azure.synapse.ml.core.test.base.TestBase
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
-import com.microsoft.ml.spark.explainers.{ICECategoricalFeature, ICETransformer, ICENumericFeature}
+import com.microsoft.azure.synapse.ml.explainers.{ICETransformer, ICECategoricalFeature, ICENumericFeature}
 
 
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {

From c0c9ddf4399e86f6700d4cedce091b4185a6ef99 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 5 Nov 2021 17:42:13 -0700
Subject: [PATCH 11/32] added some unit tests

---
 .../synapse/ml/explainers/ICEExplainer.scala  | 14 +--
 .../synapse/ml/explainers/ICEFeature.scala    | 93 ++++++++++++++++++-
 .../explainers/split1/ICEExplainerSuite.scala | 46 +++++++--
 3 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 55774c9946..878c1ad4a6 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -103,7 +103,6 @@ class ICETransformer(override val uid: String) extends Transformer
 
   def transform(ds: Dataset[_]): DataFrame = {
     transformSchema(ds.schema)
-
     val df = ds.toDF
     val idCol = DatasetExtensions.findUnusedColumnName("idCol", df)
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
@@ -111,7 +110,6 @@ class ICETransformer(override val uid: String) extends Transformer
       .withColumn(idCol, monotonically_increasing_id())
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
-
     // collect feature values for all features from original dataset - dfWithId
     val categoricalFeatures = this.getCategoricalFeatures
     val numericFeatures = this.getNumericFeatures
@@ -119,7 +117,6 @@ class ICETransformer(override val uid: String) extends Transformer
     val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
       feature => (feature.name, collectCategoricalValues(dfWithId, feature))
     }.toMap
-    
     val collectedNumFeatureValues: Map[String, Array[_]] = numericFeatures.map {
       feature => (feature.name, collectSplits(dfWithId, feature))
     }.toMap
@@ -132,7 +129,6 @@ class ICETransformer(override val uid: String) extends Transformer
       f: ICECategoricalFeature =>
         calcDependence(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
     }
-
     val calcNumericFunc: ICENumericFeature => DataFrame = {
       f: ICENumericFeature =>
         calcDependence(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
@@ -144,16 +140,14 @@ class ICETransformer(override val uid: String) extends Transformer
       case this.individualKind =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
           .map {
-            df =>
-              (categoricalFeatures ++ numericFeatures).foldLeft(df) {
-                case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
-              }
+            df => (categoricalFeatures ++ numericFeatures).foldLeft(df) {
+              case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
+            }
           }
           .map(sampled.join(_, idCol)).getOrElse(
           throw new Exception("No categorical features or numeric features are set to the explainer. " +
             "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
         )
-
       case this.averageKind =>
         dependenceDfs.reduceOption(_ crossJoin _).getOrElse(
           throw new Exception("No categorical features or numeric features are set to the explainer. " +
@@ -234,7 +228,7 @@ class ICETransformer(override val uid: String) extends Transformer
   override def transformSchema(schema: StructType): StructType = {
     // Check for duplicate feature specification
     val featureNames = getCategoricalFeatures.map(_.name) ++ getNumericFeatures.map(_.name)
-    val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 0).keys.toArray
+    val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 1).keys.toArray
     if (duplicateFeatureNames.nonEmpty) {
       throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
     }
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index 61fd3feba6..b154201acf 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -7,8 +7,41 @@ private[explainers] abstract class ICEFeature(val name: String) {
   def validate: Boolean
 }
 
+//case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
+//  extends ICEFeature(name) {
+//  override def validate: Boolean = {
+//    numTopValues.forall(_ > 0)
+//  }
+//
+//  private val defaultNumTopValue = 100
+//  def getNumTopValue: Int = {
+//    this.numTopValues.getOrElse(defaultNumTopValue)
+//  }
+//}
+//
+//object ICECategoricalFeature {
+//  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = jsonFormat2(ICECategoricalFeature.apply)
+//}
+//
+//case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
+//                             rangeMin: Option[Double] = None, rangeMax: Option[Double] = None)
+//  extends ICEFeature(name) {
+//  override def validate: Boolean = {
+//    numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
+//  }
+//
+//  private val defaultNumSplits = 10
+//  def getNumSplits: Int = {
+//    this.numSplits.getOrElse(defaultNumSplits)
+//  }
+//}
+//
+//object ICENumericFeature {
+//  implicit val JsonFormat: JsonFormat[ICENumericFeature] = jsonFormat4(ICENumericFeature.apply)
+//}
+
 case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
-  extends ICEFeature(name) {
+ extends ICEFeature(name) {
   override def validate: Boolean = {
     numTopValues.forall(_ > 0)
   }
@@ -20,7 +53,27 @@ case class ICECategoricalFeature(override val name: String, numTopValues: Option
 }
 
 object ICECategoricalFeature {
-  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = jsonFormat2(ICECategoricalFeature.apply)
+  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = new JsonFormat[ICECategoricalFeature] {
+    override def read(json: JsValue): ICECategoricalFeature = {
+      val fields = json.asJsObject.fields
+      val name = fields("name") match {
+        case JsString(value) => value
+        case _ => throw new Exception("The name field must be a JsString.")
+      }
+      val numTopValues = fields.get("numTopValues") match {
+        case Some(JsNumber(value)) => Some(value.toInt)
+        case _ => None
+      }
+
+      ICECategoricalFeature(name, numTopValues)
+
+    }
+    override def write(obj: ICECategoricalFeature): JsValue = {
+      val map = Map("name" -> JsString(obj.name))++
+        obj.numTopValues.map("numTopValues" -> JsNumber(_))
+      JsObject(map)
+    }
+  }
 }
 
 case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
@@ -37,5 +90,37 @@ case class ICENumericFeature(override val name: String, numSplits: Option[Int] =
 }
 
 object ICENumericFeature {
-  implicit val JsonFormat: JsonFormat[ICENumericFeature] = jsonFormat4(ICENumericFeature.apply)
-}
\ No newline at end of file
+  implicit val JsonFormat: JsonFormat[ICENumericFeature] = new JsonFormat[ICENumericFeature] {
+    override def read(json: JsValue): ICENumericFeature = {
+      val fields = json.asJsObject.fields
+      val name = fields("name") match {
+        case JsString(value) => value
+        case _ => throw new Exception("The name field must be a JsString.")
+      }
+
+      val numSplits = fields.get("numSplits") match {
+        case Some(JsNumber(value)) => Some(value.toInt)
+        case _ => None
+      }
+
+      val rangeMin = fields.get("rangeMin").map {
+        case JsNumber(value) => value.toDouble
+      }
+
+      val rangeMax = fields.get("rangeMax").map {
+        case JsNumber(value) => value.toDouble
+      }
+
+      ICENumericFeature(name, numSplits, rangeMin, rangeMax)
+
+    }
+
+    override def write(obj: ICENumericFeature): JsValue = {
+      val map = Map("name" -> JsString(obj.name))++
+        obj.numSplits.map("numSplits" -> JsNumber(_))++
+        obj.rangeMin.map("rangeMin" -> JsNumber(_))++
+        obj.rangeMax.map("rangeMax" -> JsNumber(_))
+      JsObject(map)
+    }
+  }
+}
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 5296700edf..341d5e61a5 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -3,10 +3,12 @@ package com.microsoft.azure.synapse.ml.explainers.split1
 import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler}
 import com.microsoft.azure.synapse.ml.core.test.base.TestBase
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
-import com.microsoft.azure.synapse.ml.explainers.{ICETransformer, ICECategoricalFeature, ICENumericFeature}
+import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENumericFeature, ICETransformer}
+import org.apache.spark.ml.linalg.Vector
+
 
 
 class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
@@ -21,7 +23,6 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
-
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
@@ -35,22 +36,53 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
   ice.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col4", Some(4))))
+    .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col4", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
-  output.show(false)
 
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
     .setOutputCol("iceValues")
     .setTargetCol("probability")
     .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2")))
-    .setNumericFeatures(Array(ICENumericFeature("col4"), ICENumericFeature("col4", Some(3), Some(0.0), Some(100.0))))
+    .setNumericFeatures(Array(ICENumericFeature("col4", Some(5))))
     .setTargetClasses(Array(1))
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)
-  outputAvg.show(false)
 
+  test("col2 doesn't contribute to the prediction") {
+
+    val outputCol2: Map[String, Vector] = outputAvg.select("col2").collect().map {
+      case Row(map: Map[String, Vector]) =>
+        map
+    }.head
+
+    val impA: Double = outputCol2.get("a").head.toArray.head
+    val impB: Double = outputCol2.get("b").head.toArray.head
+
+    assert(0.4 < impA && impA < 0.6)
+    assert(0.4 < impB && impB < 0.6)
+
+  }
+
+  test("The length of explainer map for numeric feature is equal to it's numSplits") {
+
+    val outputCol1: Map[Double, Vector] = outputAvg.select("col4").collect().map {
+      case Row(map: Map[Double, Vector]) =>
+        map
+    }.head
+
+    assert(outputCol1.size == iceAvg.getNumericFeatures.head.getNumSplits + 1)
+
+  }
+
+  test("The length of explainer map for categorical feature is equal to it's numTopValues") {
+    val outputCol: Map[Double, Vector] = output.select("col4_dependence").collect().map {
+      case Row(map: Map[Double, Vector]) =>
+        map
+    }.head
 
+    assert(outputCol.size === ice.getCategoricalFeatures.last.getNumTopValue)
 
+  }
 }

From 51e3d4ffba93aca9a3a96fa3b285db077b814e52 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 18 Nov 2021 14:06:19 -0800
Subject: [PATCH 12/32] added python code

---
 .../synapse/ml/explainers/ICEFeature.py       | 23 +++++++++++++++++++
 .../synapse/ml/explainers/ICETransformer.py   | 23 +++++++++++++++++++
 .../python/synapse/ml/explainers/__init__.py  |  0
 .../synapse/ml/explainers/ICEExplainer.scala  | 10 ++++++--
 .../synapse/ml/explainers/ICEFeature.scala    |  2 +-
 .../explainers/split1/ICEExplainerSuite.scala | 10 ++++++++
 .../ml/core/test/fuzzing/FuzzingTest.scala    |  3 ++-
 7 files changed, 67 insertions(+), 4 deletions(-)
 create mode 100644 core/src/main/python/synapse/ml/explainers/ICEFeature.py
 create mode 100644 core/src/main/python/synapse/ml/explainers/ICETransformer.py
 create mode 100644 core/src/main/python/synapse/ml/explainers/__init__.py

diff --git a/core/src/main/python/synapse/ml/explainers/ICEFeature.py b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
new file mode 100644
index 0000000000..6bc8f5ce0d
--- /dev/null
+++ b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
@@ -0,0 +1,23 @@
+from pyspark.ml.wrapper import JavaWrapper
+from pyspark import SparkContext
+
+class ICECategoricalFeature(JavaWrapper):
+    def __init__(self, col: str, numTopValues: int = None):
+        sc = SparkContext._active_spark_context
+        numTopValues = sc._jvm.scala.Some(numTopValues) if numTopValues else sc._jvm.scala.Option.empty()
+        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature", col, numTopValues)
+
+    def getObject(self):
+      return self._java_obj
+
+class ICENumericFeature(JavaWrapper):
+    def __init__(self, col: str, numSplits: int = None, rangeMin: float = None, rangeMax: float = None):
+        sc = SparkContext._active_spark_context
+        numSplits = sc._jvm.scala.Some(numSplits) if numSplits else sc._jvm.scala.Option.empty()
+        rangeMin = sc._jvm.scala.Some(rangeMin) if rangeMin else sc._jvm.scala.Option.empty()
+        rangeMax = sc._jvm.scala.Some(rangeMax) if rangeMax else sc._jvm.scala.Option.empty()
+        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICENumericFeature", col, numSplits, rangeMin, rangeMax)
+
+
+    def getObject(self):
+        return self._java_obj
diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
new file mode 100644
index 0000000000..7559e59708
--- /dev/null
+++ b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
@@ -0,0 +1,23 @@
+from synapse.ml.explainers._ICETransformer import _ICETransformer
+from pyspark.ml.common import inherit_doc
+from pyspark import SparkContext
+
+@inherit_doc
+class ICETransformer(_ICETransformer):
+    def setCategoricalFeatures(self, value):
+        """
+        Args:
+        categoricalFeatures: The list of categorical features to explain.
+        """
+        sc = SparkContext._active_spark_context
+        feature_list = [v.getObject() for v in value]
+        return super().setCategoricalFeatures(sc._jvm.PythonUtils.toSeq(feature_list))
+
+    def setNumericFeatures(self, value):
+        """
+        Args:
+        categoricalFeatures: The list of categorical features to explain.
+        """
+        sc = SparkContext._active_spark_context
+        feature_list = [v.getObject() for v in value]
+        return super().setNumericFeatures(sc._jvm.PythonUtils.toSeq(feature_list))
diff --git a/core/src/main/python/synapse/ml/explainers/__init__.py b/core/src/main/python/synapse/ml/explainers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 878c1ad4a6..3b860e7f98 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -2,7 +2,7 @@ package com.microsoft.azure.synapse.ml.explainers
 
 import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol
 import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions
-import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.{ComplexParamsWritable, Transformer}
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.ml.param.{ParamMap, ParamValidators, Params, _}
 import org.apache.spark.ml.util.Identifiable
@@ -10,6 +10,8 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
+import com.microsoft.azure.synapse.ml.codegen.Wrappable
+
 
 trait ICEFeatureParams extends Params with HasNumSamples {
 
@@ -56,7 +58,11 @@ class ICETransformer(override val uid: String) extends Transformer
   with HasExplainTarget
   with HasModel
   with ICEFeatureParams
-  with HasOutputCol {
+  with HasOutputCol
+  with Wrappable
+  with ComplexParamsWritable {
+
+  override protected lazy val pyInternalWrapper = true
 
   def this() = {
     this(Identifiable.randomUID("ICETransformer"))
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index b154201acf..ca9a4b31dd 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -123,4 +123,4 @@ object ICENumericFeature {
       JsObject(map)
     }
   }
-}
+}
\ No newline at end of file
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 341d5e61a5..51be4cd64e 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -3,11 +3,13 @@ package com.microsoft.azure.synapse.ml.explainers.split1
 import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler}
 import com.microsoft.azure.synapse.ml.core.test.base.TestBase
+import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENumericFeature, ICETransformer}
 import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.util.MLReadable
 
 
 
@@ -23,6 +25,8 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
+  data.show()
+
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
@@ -40,6 +44,8 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
 
+  output.show()
+
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
     .setOutputCol("iceValues")
@@ -85,4 +91,8 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     assert(outputCol.size === ice.getCategoricalFeatures.last.getNumTopValue)
 
   }
+
+  //override def testObjects(): Seq[TestObject[ICETransformer]] = Seq(new TestObject(ice, data))
+
+  //override def reader: MLReadable[_] = ICETransformer
 }
diff --git a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
index 0871c13aae..d1e61b73a1 100644
--- a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
+++ b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
@@ -240,7 +240,8 @@ class FuzzingTest extends TestBase {
       "com.microsoft.azure.synapse.ml.explainers.TextLIME",
       "com.microsoft.azure.synapse.ml.explainers.TextSHAP",
       "com.microsoft.azure.synapse.ml.explainers.VectorLIME",
-      "com.microsoft.azure.synapse.ml.explainers.VectorSHAP"
+      "com.microsoft.azure.synapse.ml.explainers.VectorSHAP",
+      "com.microsoft.azure.synapse.ml.explainers.ICETransformer"
     )
 
     pipelineStages.foreach { stage =>

From bda78823a0da40f087e618ea1fa0b3b659ef8de1 Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 19 Nov 2021 16:45:15 -0800
Subject: [PATCH 13/32] Update
 core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala

Co-authored-by: Jason Wang <jasonwang_83@hotmail.com>
---
 .../microsoft/azure/synapse/ml/explainers/ICEExplainer.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 3b860e7f98..f1d3a859fa 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -22,7 +22,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     this,
     "categoricalFeatures",
     "The list of categorical features to explain.",
-    {_.forall(_.validate)}
+    _.forall(_.validate)
   )
 
   def setCategoricalFeatures(values: Seq[ICECategoricalFeature]): this.type = this.set(categoricalFeatures, values)

From fa0aa6fbed379303f618d1f9e631119a6f426bc6 Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 19 Nov 2021 16:45:33 -0800
Subject: [PATCH 14/32] Update
 core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala

Co-authored-by: Jason Wang <jasonwang_83@hotmail.com>
---
 .../microsoft/azure/synapse/ml/explainers/ICEExplainer.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index f1d3a859fa..8121d87de7 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -32,7 +32,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     this,
     "numericFeatures",
     "The list of numeric features to explain.",
-    {_.forall(_.validate)}
+    _.forall(_.validate)
   )
 
   def setNumericFeatures(values: Seq[ICENumericFeature]): this.type = this.set(numericFeatures, values)

From adc4301bb82ab117422b8e3146804cf5d73dbb10 Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 19 Nov 2021 16:45:55 -0800
Subject: [PATCH 15/32] Update
 core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala

Co-authored-by: Jason Wang <jasonwang_83@hotmail.com>
---
 .../microsoft/azure/synapse/ml/explainers/ICEExplainer.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 8121d87de7..492dcec782 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -117,8 +117,7 @@ class ICETransformer(override val uid: String) extends Transformer
       .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
     // collect feature values for all features from original dataset - dfWithId
-    val categoricalFeatures = this.getCategoricalFeatures
-    val numericFeatures = this.getNumericFeatures
+    val (categoricalFeatures, numericFeatures) = (this.getCategoricalFeatures, this.getNumericFeatures)
 
     val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
       feature => (feature.name, collectCategoricalValues(dfWithId, feature))

From 058f27bd9e87a3b3bcfb675ee5b2636c23bc9f5a Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 19 Nov 2021 16:46:10 -0800
Subject: [PATCH 16/32] fix1

---
 .../synapse/ml/explainers/ICEExplainer.scala  |  2 +-
 .../synapse/ml/explainers/ICEFeature.scala    | 57 ++++++++-----------
 2 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 3b860e7f98..edb0ed2196 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -242,4 +242,4 @@ class ICETransformer(override val uid: String) extends Transformer
     this.validateSchema(schema)
     schema.add(getOutputCol, ArrayType(VectorType))
   }
-}
+}
\ No newline at end of file
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index ca9a4b31dd..2f8f7153c5 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -1,45 +1,17 @@
 package com.microsoft.azure.synapse.ml.explainers
 
 import spray.json._
-import DefaultJsonProtocol._
 
 private[explainers] abstract class ICEFeature(val name: String) {
   def validate: Boolean
 }
 
-//case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
-//  extends ICEFeature(name) {
-//  override def validate: Boolean = {
-//    numTopValues.forall(_ > 0)
-//  }
-//
-//  private val defaultNumTopValue = 100
-//  def getNumTopValue: Int = {
-//    this.numTopValues.getOrElse(defaultNumTopValue)
-//  }
-//}
-//
-//object ICECategoricalFeature {
-//  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = jsonFormat2(ICECategoricalFeature.apply)
-//}
-//
-//case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
-//                             rangeMin: Option[Double] = None, rangeMax: Option[Double] = None)
-//  extends ICEFeature(name) {
-//  override def validate: Boolean = {
-//    numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
-//  }
-//
-//  private val defaultNumSplits = 10
-//  def getNumSplits: Int = {
-//    this.numSplits.getOrElse(defaultNumSplits)
-//  }
-//}
-//
-//object ICENumericFeature {
-//  implicit val JsonFormat: JsonFormat[ICENumericFeature] = jsonFormat4(ICENumericFeature.apply)
-//}
-
+/**
+  * Represents a single categorical feature to be explained by ICE explainer.
+  * @param name The name of the categorical feature.
+  * @param numTopValues The max number of top-occurring values to be included in the categorical feature.
+  *                     Default: 100.
+  */
 case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
  extends ICEFeature(name) {
   override def validate: Boolean = {
@@ -52,6 +24,9 @@ case class ICECategoricalFeature(override val name: String, numTopValues: Option
   }
 }
 
+/**
+  * Companion object to provide JSON serializer and deserializer for ICECategoricalFeature .
+  */
 object ICECategoricalFeature {
   implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = new JsonFormat[ICECategoricalFeature] {
     override def read(json: JsValue): ICECategoricalFeature = {
@@ -76,10 +51,21 @@ object ICECategoricalFeature {
   }
 }
 
+/**
+  * Represents a single numeric feature to be explained by ICE explainer.
+  * @param name The name of the numeric feature.
+  * @param numSplits The number of splits for the value range for the numeric feature.
+  *                  Default: 10.0
+  * @param rangeMin Specifies the min value of the range for the numeric feature. If not specified,
+  *                 it will be computed from the background dataset.
+  * @param rangeMax Specifies the max value of the range for the numeric feature. If not specified,
+  *                 it will be computed from the background dataset.
+  */
 case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
                              rangeMin: Option[Double] = None, rangeMax: Option[Double] = None)
   extends ICEFeature(name) {
   override def validate: Boolean = {
+    // rangeMax and rangeMin may not be specified, but if specified: rangeMin <= rangeMax.
     numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
   }
 
@@ -89,6 +75,9 @@ case class ICENumericFeature(override val name: String, numSplits: Option[Int] =
   }
 }
 
+/**
+  * Companion object to provide JSON serializer and deserializer for ICENumericFeature.
+  */
 object ICENumericFeature {
   implicit val JsonFormat: JsonFormat[ICENumericFeature] = new JsonFormat[ICENumericFeature] {
     override def read(json: JsValue): ICENumericFeature = {

From 5d3d38ee203903a83cdbdcd563ee0bc9ae044b8c Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 19 Nov 2021 16:47:00 -0800
Subject: [PATCH 17/32] Update
 core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala

Co-authored-by: Jason Wang <jasonwang_83@hotmail.com>
---
 .../microsoft/azure/synapse/ml/explainers/ICEExplainer.scala  | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 492dcec782..8ed713a31b 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -126,9 +126,7 @@ class ICETransformer(override val uid: String) extends Transformer
       feature => (feature.name, collectSplits(dfWithId, feature))
     }.toMap
 
-    val sampled = this.get(numSamples).map {
-      s => dfWithId.orderBy(rand()).limit(s)
-    }.getOrElse(dfWithId).cache()
+    val sampled = this.get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>

From 172a050c4ee5853c5f2dcf91bbd711be6c924835 Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 19 Nov 2021 17:02:16 -0800
Subject: [PATCH 18/32] Update
 core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala

Co-authored-by: Jason Wang <jasonwang_83@hotmail.com>
---
 .../azure/synapse/ml/explainers/ICEExplainer.scala          | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 8ed713a31b..30889203b3 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -130,11 +130,13 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
-        calcDependence(sampled, idCol, targetClasses, f.name, collectedCatFeatureValues(f.name))
+        val values = collectCategoricalValues(dfWithId, f)
+        calcDependence(sampled, idCol, targetClasses, f.name, values)
     }
     val calcNumericFunc: ICENumericFeature => DataFrame = {
       f: ICENumericFeature =>
-        calcDependence(sampled, idCol, targetClasses, f.name, collectedNumFeatureValues(f.name))
+        val values = collectSplits(dfWithId, f)
+        calcDependence(sampled, idCol, targetClasses, f.name, values)
     }
 
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)

From 69486ed81dd849c89978d82ef2e35255b74bbf02 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 19 Nov 2021 17:07:18 -0800
Subject: [PATCH 19/32] fix 2

---
 .../synapse/ml/explainers/ICEExplainer.scala      | 15 +++++++++++----
 .../azure/synapse/ml/explainers/ICEFeature.scala  |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index c1cb0950ed..f353790b52 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -54,6 +54,12 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     categoricalFeatures -> Seq.empty[ICECategoricalFeature])
 }
 
+/**
+  * Transformer which displays the model dependence on specified features with the given dataframe
+  * as background dataset. It supports 2 types of plots: individual - dependence per instance and
+  * average - across all the samples in the dataset.
+  * Note: This transformer only supports one-way dependence plot.
+  */
 class ICETransformer(override val uid: String) extends Transformer
   with HasExplainTarget
   with HasModel
@@ -139,6 +145,9 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
+    val errorMessage = "No categorical features or numeric features are set to the explainer. " +
+      "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained."
+
     getKind.toLowerCase match {
       case this.individualKind =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
@@ -148,13 +157,11 @@ class ICETransformer(override val uid: String) extends Transformer
             }
           }
           .map(sampled.join(_, idCol)).getOrElse(
-          throw new Exception("No categorical features or numeric features are set to the explainer. " +
-            "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
+          throw new Exception(errorMessage)
         )
       case this.averageKind =>
         dependenceDfs.reduceOption(_ crossJoin _).getOrElse(
-          throw new Exception("No categorical features or numeric features are set to the explainer. " +
-            "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
+          throw new Exception(errorMessage)
         )
     }
   }
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index 2f8f7153c5..2023bb489b 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -25,7 +25,7 @@ case class ICECategoricalFeature(override val name: String, numTopValues: Option
 }
 
 /**
-  * Companion object to provide JSON serializer and deserializer for ICECategoricalFeature .
+  * Companion object to provide JSON serializer and deserializer for ICECategoricalFeature.
   */
 object ICECategoricalFeature {
   implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = new JsonFormat[ICECategoricalFeature] {

From 1d658d5b1be54235cb7eb4200829bcc418a1e2b1 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Sat, 20 Nov 2021 14:51:16 -0800
Subject: [PATCH 20/32] Fixed comments

---
 .../synapse/ml/explainers/ICEExplainer.scala  | 50 ++++++++-----------
 .../explainers/split1/ICEExplainerSuite.scala | 50 ++++++++++++++-----
 2 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index dc42ac5100..d26d057896 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -2,7 +2,7 @@ package com.microsoft.azure.synapse.ml.explainers
 
 import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol
 import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions
-import org.apache.spark.ml.{ComplexParamsWritable, Transformer}
+import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer}
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.ml.param.{ParamMap, ParamValidators, Params, _}
 import org.apache.spark.ml.util.Identifiable
@@ -55,7 +55,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
 }
 
 /**
-  * Transformer which displays the model dependence on specified features with the given dataframe
+  * ICETransformer displays the model dependence on specified features with the given dataframe
   * as background dataset. It supports 2 types of plots: individual - dependence per instance and
   * average - across all the samples in the dataset.
   * Note: This transformer only supports one-way dependence plot.
@@ -124,15 +124,8 @@ class ICETransformer(override val uid: String) extends Transformer
 
     // collect feature values for all features from original dataset - dfWithId
     val (categoricalFeatures, numericFeatures) = (this.getCategoricalFeatures, this.getNumericFeatures)
-
-    val collectedCatFeatureValues: Map[String, Array[_]] = categoricalFeatures.map {
-      feature => (feature.name, collectCategoricalValues(dfWithId, feature))
-    }.toMap
-    val collectedNumFeatureValues: Map[String, Array[_]] = numericFeatures.map {
-      feature => (feature.name, collectSplits(dfWithId, feature))
-    }.toMap
-
-    val sampled = this.get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
+    
+    val sampled: Dataset[Row] = this.get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
@@ -147,32 +140,27 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
-    val errorMessage = "No categorical features or numeric features are set to the explainer. " +
-      "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained."
-
     getKind.toLowerCase match {
       case this.individualKind =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
           .map {
-            df => (categoricalFeatures ++ numericFeatures).foldLeft(df) {
-              case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
-            }
+            df =>
+              (categoricalFeatures ++ numericFeatures).foldLeft(df) {
+                case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
+              }
           }
-          .map(sampled.join(_, idCol)).getOrElse(
-          throw new Exception(errorMessage)
-        )
+          .map(sampled.join(_, Seq(idCol), "inner").drop(idCol)).get
       case this.averageKind =>
-        dependenceDfs.reduceOption(_ crossJoin _).getOrElse(
-          throw new Exception(errorMessage)
-        )
+        dependenceDfs.reduceOption(_ crossJoin _).get
     }
   }
 
   private def collectCategoricalValues[_](df: DataFrame, feature: ICECategoricalFeature): Array[_] = {
+    val featureCountCol = DatasetExtensions.findUnusedColumnName("__feature__count__", df)
     val values = df
       .groupBy(col(feature.name))
-      .agg(count("*").as("__feature__count__"))
-      .orderBy(col("__feature__count__").desc)
+      .agg(count("*").as(featureCountCol))
+      .orderBy(col(featureCountCol).desc)
       .head(feature.getNumTopValue)
       .map(row => row.get(0))
     values
@@ -238,14 +226,20 @@ class ICETransformer(override val uid: String) extends Transformer
   override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
 
   override def transformSchema(schema: StructType): StructType = {
-    // Check for duplicate feature specification
+    // Check if features are specified
     val featureNames = getCategoricalFeatures.map(_.name) ++ getNumericFeatures.map(_.name)
+    if (featureNames.isEmpty) {
+      throw new Exception("No categorical features or numeric features are set to the explainer. " +
+        "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
+    }
+    // Check for duplicate feature specification
     val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 1).keys.toArray
     if (duplicateFeatureNames.nonEmpty) {
       throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
     }
-
     this.validateSchema(schema)
     schema.add(getOutputCol, ArrayType(VectorType))
   }
-}
\ No newline at end of file
+}
+
+object ICETransformer extends ComplexParamsReadable[ICETransformer]
\ No newline at end of file
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 51be4cd64e..d1f11d1ba1 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -12,8 +12,7 @@ import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.MLReadable
 
 
-
-class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransformer] {
+class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer] {
 
   import spark.implicits._
   val dataDF: DataFrame = (1 to 100).flatMap(_ => Seq(
@@ -25,8 +24,6 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
-  data.show()
-
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
@@ -44,8 +41,6 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
 
-  output.show()
-
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
     .setOutputCol("iceValues")
@@ -56,7 +51,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)
 
-  test("col2 doesn't contribute to the prediction") {
+  test("col2 doesn't contribute to the prediction.") {
 
     val outputCol2: Map[String, Vector] = outputAvg.select("col2").collect().map {
       case Row(map: Map[String, Vector]) =>
@@ -71,7 +66,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   }
 
-  test("The length of explainer map for numeric feature is equal to it's numSplits") {
+  test("The length of explainer map for numeric feature is equal to it's numSplits.") {
 
     val outputCol1: Map[Double, Vector] = outputAvg.select("col4").collect().map {
       case Row(map: Map[Double, Vector]) =>
@@ -82,7 +77,7 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   }
 
-  test("The length of explainer map for categorical feature is equal to it's numTopValues") {
+  test("The length of explainer map for categorical feature is equal to it's numTopValues.") {
     val outputCol: Map[Double, Vector] = output.select("col4_dependence").collect().map {
       case Row(map: Map[Double, Vector]) =>
         map
@@ -92,7 +87,38 @@ class ICEExplainerSuite extends TestBase {// with TransformerFuzzing[ICETransfor
 
   }
 
-  //override def testObjects(): Seq[TestObject[ICETransformer]] = Seq(new TestObject(ice, data))
+  test("No features specified.") {
+    val ice = new ICETransformer()
+    ice.setModel(model)
+      .setOutputCol("iceValues")
+      .setTargetCol("probability")
+      .setTargetClasses(Array(1))
+    assertThrows[Exception](ice.transform(data))
+  }
+
+  test("Duplicate features specified.") {
+    val ice = new ICETransformer()
+    ice.setModel(model)
+      .setOutputCol("iceValues")
+      .setTargetCol("probability")
+      .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)),
+        ICECategoricalFeature("col2"), ICECategoricalFeature("col1")))
+      .setTargetClasses(Array(1))
+    assertThrows[Exception](ice.transform(data))
+  }
+
+  test("When setNumSamples is called, ICE returns correct number of rows.") {
+    val ice = new ICETransformer()
+    ice.setNumSamples(2)
+      .setModel(model)
+      .setOutputCol("iceValues")
+      .setTargetCol("probability")
+      .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col4", Some(4))))
+      .setTargetClasses(Array(1))
+    val output = ice.transform(data)
+    assert(output.count() == 2)
+  }
 
-  //override def reader: MLReadable[_] = ICETransformer
-}
+  override def testObjects(): Seq[TestObject[ICETransformer]] = Seq(new TestObject(ice, data))
+  override def reader: MLReadable[_] = ICETransformer
+}
\ No newline at end of file

From 25ad8fa0814b4d2ecdac8f7da943b8ac524d453f Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Mon, 29 Nov 2021 13:35:13 -0800
Subject: [PATCH 21/32] fix comments

---
 .../synapse/ml/explainers/ICEExplainer.scala  | 36 ++++++++++------
 .../synapse/ml/explainers/ICEFeature.scala    | 41 ++++++++++++++-----
 .../explainers/split1/ICEExplainerSuite.scala | 33 +++++++--------
 3 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index d26d057896..121ce9a07a 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -1,9 +1,10 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
 package com.microsoft.azure.synapse.ml.explainers
 
-import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol
 import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions
 import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer}
-import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.ml.param.{ParamMap, ParamValidators, Params, _}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.functions._
@@ -64,7 +65,6 @@ class ICETransformer(override val uid: String) extends Transformer
   with HasExplainTarget
   with HasModel
   with ICEFeatureParams
-  with HasOutputCol
   with Wrappable
   with ComplexParamsWritable {
 
@@ -75,7 +75,7 @@ class ICETransformer(override val uid: String) extends Transformer
   }
 
   private def calcDependence(df: DataFrame, idCol: String, targetClassesColumn: String,
-                             feature: String, values: Array[_]): DataFrame = {
+                             feature: String, values: Array[_], outputColName: String): DataFrame = {
 
     val dataType = df.schema(feature).dataType
     val explodeFunc = explode(array(values.map(v => lit(v)): _*).cast(ArrayType(dataType)))
@@ -97,7 +97,7 @@ class ICETransformer(override val uid: String) extends Transformer
             map_from_arrays(
               collect_list(feature),
               collect_list(dependenceCol)
-            ).alias(feature)
+            ).alias(outputColName)
           )
 
       case this.individualKind =>
@@ -108,7 +108,7 @@ class ICETransformer(override val uid: String) extends Transformer
             map_from_arrays(
               collect_list(feature),
               collect_list(targetCol)
-            ).alias(feature)
+            ).alias(outputColName)
           )
     }
   }
@@ -124,18 +124,18 @@ class ICETransformer(override val uid: String) extends Transformer
 
     // collect feature values for all features from original dataset - dfWithId
     val (categoricalFeatures, numericFeatures) = (this.getCategoricalFeatures, this.getNumericFeatures)
-    
+
     val sampled: Dataset[Row] = this.get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
         val values = collectCategoricalValues(dfWithId, f)
-        calcDependence(sampled, idCol, targetClasses, f.name, values)
+        calcDependence(sampled, idCol, targetClasses, f.name, values, f.getOutputColName)
     }
     val calcNumericFunc: ICENumericFeature => DataFrame = {
       f: ICENumericFeature =>
         val values = collectSplits(dfWithId, f)
-        calcDependence(sampled, idCol, targetClasses, f.name, values)
+        calcDependence(sampled, idCol, targetClasses, f.name, values, f.getOutputColName)
     }
 
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
@@ -146,12 +146,12 @@ class ICETransformer(override val uid: String) extends Transformer
           .map {
             df =>
               (categoricalFeatures ++ numericFeatures).foldLeft(df) {
-                case (accDf, feature) => accDf.withColumnRenamed(feature.name, feature.name + "_dependence")
+                case (accDf, feature) => accDf//.withColumnRenamed(feature.name, feature.getOutputColName)
               }
           }
           .map(sampled.join(_, Seq(idCol), "inner").drop(idCol)).get
       case this.averageKind =>
-        dependenceDfs.reduceOption(_ crossJoin _).get
+        dependenceDfs.reduce(_ crossJoin _)
     }
   }
 
@@ -226,6 +226,18 @@ class ICETransformer(override val uid: String) extends Transformer
   override def copy(extra: ParamMap): Transformer = this.defaultCopy(extra)
 
   override def transformSchema(schema: StructType): StructType = {
+    // Check the data type for categorical features
+    val categoricalFeaturesTypes= getCategoricalFeatures.map(_.name).map(schema(_).dataType)
+    val allowedCategoricalTypes = Array(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType)
+    require(categoricalFeaturesTypes.forall(allowedCategoricalTypes.contains(_)),
+      s"Data type for categorical features must be String, Boolean, Byte, Short, Integer or Long type.")
+
+    // Check the data type for numeric features
+    val numericFeaturesTypes= getNumericFeatures.map(_.name).map(schema(_).dataType)
+    val allowedNumericTypes = Array(FloatType, DoubleType, DecimalType)
+    require(numericFeaturesTypes.forall(allowedNumericTypes.contains(_)),
+      s"Data type for numeric features must be Float, Double or Decimal type.")
+
     // Check if features are specified
     val featureNames = getCategoricalFeatures.map(_.name) ++ getNumericFeatures.map(_.name)
     if (featureNames.isEmpty) {
@@ -238,7 +250,7 @@ class ICETransformer(override val uid: String) extends Transformer
       throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
     }
     this.validateSchema(schema)
-    schema.add(getOutputCol, ArrayType(VectorType))
+    schema
   }
 }
 
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index 2023bb489b..b12293fe32 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -1,9 +1,14 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
 package com.microsoft.azure.synapse.ml.explainers
 
 import spray.json._
 
-private[explainers] abstract class ICEFeature(val name: String) {
+private[explainers] abstract class ICEFeature(val name: String, outputColName: Option[String] = None) {
   def validate: Boolean
+  private val defaultOutputColName = name + "_dependence"
+  def getOutputColName: String = this.outputColName.getOrElse(defaultOutputColName)
 }
 
 /**
@@ -11,9 +16,12 @@ private[explainers] abstract class ICEFeature(val name: String) {
   * @param name The name of the categorical feature.
   * @param numTopValues The max number of top-occurring values to be included in the categorical feature.
   *                     Default: 100.
+  * @param outputColName The name for output column with explanations for the feature.
+  *                      Default: input name of the feature + _dependence.
   */
-case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None)
- extends ICEFeature(name) {
+case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None,
+                                 outputColName: Option[String] = None)
+ extends ICEFeature(name, outputColName) {
   override def validate: Boolean = {
     numTopValues.forall(_ > 0)
   }
@@ -39,13 +47,17 @@ object ICECategoricalFeature {
         case Some(JsNumber(value)) => Some(value.toInt)
         case _ => None
       }
-
-      ICECategoricalFeature(name, numTopValues)
+      val outputColName = fields.get("outputColName") match {
+        case Some(JsString(value)) => Some(value)
+        case _ => None
+      }
+      ICECategoricalFeature(name, numTopValues, outputColName)
 
     }
     override def write(obj: ICECategoricalFeature): JsValue = {
       val map = Map("name" -> JsString(obj.name))++
-        obj.numTopValues.map("numTopValues" -> JsNumber(_))
+        obj.numTopValues.map("numTopValues" -> JsNumber(_))++
+        obj.outputColName.map("outputColName" -> JsString(_))
       JsObject(map)
     }
   }
@@ -60,10 +72,13 @@ object ICECategoricalFeature {
   *                 it will be computed from the background dataset.
   * @param rangeMax Specifies the max value of the range for the numeric feature. If not specified,
   *                 it will be computed from the background dataset.
+  * @param outputColName The name for output column with explanations for the feature.
+  *                      Default: input name of the feature + "_dependence"
   */
 case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
-                             rangeMin: Option[Double] = None, rangeMax: Option[Double] = None)
-  extends ICEFeature(name) {
+                             rangeMin: Option[Double] = None, rangeMax: Option[Double] = None,
+                             outputColName: Option[String] = None)
+  extends ICEFeature(name, outputColName) {
   override def validate: Boolean = {
     // rangeMax and rangeMin may not be specified, but if specified: rangeMin <= rangeMax.
     numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
@@ -100,7 +115,12 @@ object ICENumericFeature {
         case JsNumber(value) => value.toDouble
       }
 
-      ICENumericFeature(name, numSplits, rangeMin, rangeMax)
+      val outputColName = fields.get("outputColName") match {
+        case Some(JsString(value)) => Some(value)
+        case _ => None
+      }
+
+      ICENumericFeature(name, numSplits, rangeMin, rangeMax, outputColName)
 
     }
 
@@ -108,7 +128,8 @@ object ICENumericFeature {
       val map = Map("name" -> JsString(obj.name))++
         obj.numSplits.map("numSplits" -> JsNumber(_))++
         obj.rangeMin.map("rangeMin" -> JsNumber(_))++
-        obj.rangeMax.map("rangeMax" -> JsNumber(_))
+        obj.rangeMax.map("rangeMax" -> JsNumber(_))++
+        obj.outputColName.map("outputColName" -> JsString(_))
       JsObject(map)
     }
   }
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index d1f11d1ba1..1d81fdca53 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -1,3 +1,6 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
 package com.microsoft.azure.synapse.ml.explainers.split1
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
@@ -6,7 +9,7 @@ import com.microsoft.azure.synapse.ml.core.test.base.TestBase
 import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
-import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
+import org.apache.spark.ml.classification.LogisticRegression
 import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENumericFeature, ICETransformer}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.MLReadable
@@ -16,10 +19,10 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   import spark.implicits._
   val dataDF: DataFrame = (1 to 100).flatMap(_ => Seq(
-    (-5d, "a", -5d, 0),
-    (-5d, "b", -5d, 0),
-    (5d, "a", 5d, 1),
-    (5d, "b", 5d, 1)
+    (-5, "a", -5, 0),
+    (-5, "b", -5, 0),
+    (5, "a", 5, 1),
+    (5, "b", 5, 1)
   )).toDF("col1", "col2", "col3", "label")
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
@@ -32,18 +35,15 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
   ))
   val model: PipelineModel = pipeline.fit(data)
 
-
   val ice = new ICETransformer()
   ice.setModel(model)
-    .setOutputCol("iceValues")
     .setTargetCol("probability")
-    .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col4", Some(4))))
+    .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col3", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
 
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
-    .setOutputCol("iceValues")
     .setTargetCol("probability")
     .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2")))
     .setNumericFeatures(Array(ICENumericFeature("col4", Some(5))))
@@ -53,7 +53,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   test("col2 doesn't contribute to the prediction.") {
 
-    val outputCol2: Map[String, Vector] = outputAvg.select("col2").collect().map {
+    val outputCol2: Map[String, Vector] = outputAvg.select("col2_dependence").collect().map {
       case Row(map: Map[String, Vector]) =>
         map
     }.head
@@ -68,7 +68,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   test("The length of explainer map for numeric feature is equal to it's numSplits.") {
 
-    val outputCol1: Map[Double, Vector] = outputAvg.select("col4").collect().map {
+    val outputCol1: Map[Double, Vector] = outputAvg.select("col4_dependence").collect().map {
       case Row(map: Map[Double, Vector]) =>
         map
     }.head
@@ -77,20 +77,19 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   }
 
-  test("The length of explainer map for categorical feature is equal to it's numTopValues.") {
-    val outputCol: Map[Double, Vector] = output.select("col4_dependence").collect().map {
+  test("The length of explainer map for categorical feature is less or equal to it's numTopValues.") {
+    val outputCol: Map[Double, Vector] = output.select("col3_dependence").collect().map {
       case Row(map: Map[Double, Vector]) =>
         map
     }.head
 
-    assert(outputCol.size === ice.getCategoricalFeatures.last.getNumTopValue)
+    assert(outputCol.size <= ice.getCategoricalFeatures.last.getNumTopValue)
 
   }
 
   test("No features specified.") {
     val ice = new ICETransformer()
     ice.setModel(model)
-      .setOutputCol("iceValues")
       .setTargetCol("probability")
       .setTargetClasses(Array(1))
     assertThrows[Exception](ice.transform(data))
@@ -99,7 +98,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
   test("Duplicate features specified.") {
     val ice = new ICETransformer()
     ice.setModel(model)
-      .setOutputCol("iceValues")
       .setTargetCol("probability")
       .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)),
         ICECategoricalFeature("col2"), ICECategoricalFeature("col1")))
@@ -111,9 +109,8 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     val ice = new ICETransformer()
     ice.setNumSamples(2)
       .setModel(model)
-      .setOutputCol("iceValues")
       .setTargetCol("probability")
-      .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col4", Some(4))))
+      .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col3", Some(4))))
       .setTargetClasses(Array(1))
     val output = ice.transform(data)
     assert(output.count() == 2)

From 8045357a13eeafebed0a9af4e42458099bbfc1fc Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Mon, 29 Nov 2021 14:15:28 -0800
Subject: [PATCH 22/32] fix comments 2

---
 .../azure/synapse/ml/explainers/ICEExplainer.scala          | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 121ce9a07a..235c1185c0 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -143,12 +143,6 @@ class ICETransformer(override val uid: String) extends Transformer
     getKind.toLowerCase match {
       case this.individualKind =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
-          .map {
-            df =>
-              (categoricalFeatures ++ numericFeatures).foldLeft(df) {
-                case (accDf, feature) => accDf//.withColumnRenamed(feature.name, feature.getOutputColName)
-              }
-          }
           .map(sampled.join(_, Seq(idCol), "inner").drop(idCol)).get
       case this.averageKind =>
         dependenceDfs.reduce(_ crossJoin _)

From 2c207d30d735a5dddc7dd48f6f04c03dd75d1e72 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 2 Dec 2021 17:27:01 -0800
Subject: [PATCH 23/32] last fix

---
 .../python/synapse/ml/explainers/ICEFeature.py    | 10 ++++++----
 .../synapse/ml/explainers/ICEExplainer.scala      | 15 ++++++++-------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/core/src/main/python/synapse/ml/explainers/ICEFeature.py b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
index 6bc8f5ce0d..40438c4926 100644
--- a/core/src/main/python/synapse/ml/explainers/ICEFeature.py
+++ b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
@@ -2,21 +2,23 @@
 from pyspark import SparkContext
 
 class ICECategoricalFeature(JavaWrapper):
-    def __init__(self, col: str, numTopValues: int = None):
+    def __init__(self, col: str, numTopValues: int = None, outputColName: str = None):
         sc = SparkContext._active_spark_context
         numTopValues = sc._jvm.scala.Some(numTopValues) if numTopValues else sc._jvm.scala.Option.empty()
-        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature", col, numTopValues)
+        outputColName = sc._jvm.scala.Some(outputColName) if outputColName else sc._jvm.scala.Option.empty()
+        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature", col, numTopValues, outputColName)
 
     def getObject(self):
       return self._java_obj
 
 class ICENumericFeature(JavaWrapper):
-    def __init__(self, col: str, numSplits: int = None, rangeMin: float = None, rangeMax: float = None):
+    def __init__(self, col: str, numSplits: int = None, rangeMin: float = None, rangeMax: float = None, outputColName: str = None):
         sc = SparkContext._active_spark_context
         numSplits = sc._jvm.scala.Some(numSplits) if numSplits else sc._jvm.scala.Option.empty()
         rangeMin = sc._jvm.scala.Some(rangeMin) if rangeMin else sc._jvm.scala.Option.empty()
         rangeMax = sc._jvm.scala.Some(rangeMax) if rangeMax else sc._jvm.scala.Option.empty()
-        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICENumericFeature", col, numSplits, rangeMin, rangeMax)
+        outputColName = sc._jvm.scala.Some(outputColName) if outputColName else sc._jvm.scala.Option.empty()
+        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICENumericFeature", col, numSplits, rangeMin, rangeMax, outputColName)
 
 
     def getObject(self):
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 235c1185c0..7f48903d14 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -61,6 +61,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   * average - across all the samples in the dataset.
   * Note: This transformer only supports one-way dependence plot.
   */
+@org.apache.spark.annotation.Experimental
 class ICETransformer(override val uid: String) extends Transformer
   with HasExplainTarget
   with HasModel
@@ -221,19 +222,19 @@ class ICETransformer(override val uid: String) extends Transformer
 
   override def transformSchema(schema: StructType): StructType = {
     // Check the data type for categorical features
-    val categoricalFeaturesTypes= getCategoricalFeatures.map(_.name).map(schema(_).dataType)
+    val categoricalFeaturesTypes= getCategoricalFeatures.map(f => schema(f.name).dataType)
     val allowedCategoricalTypes = Array(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType)
-    require(categoricalFeaturesTypes.forall(allowedCategoricalTypes.contains(_)),
-      s"Data type for categorical features must be String, Boolean, Byte, Short, Integer or Long type.")
+    require(categoricalFeaturesTypes.forall(allowedCategoricalTypes.contains),
+      s"Data type for categorical features must be ${allowedCategoricalTypes.mkString("[", ",", "]")}.")
 
     // Check the data type for numeric features
-    val numericFeaturesTypes= getNumericFeatures.map(_.name).map(schema(_).dataType)
+    val numericFeaturesTypes= getNumericFeatures.map(f => schema(f.name).dataType)
     val allowedNumericTypes = Array(FloatType, DoubleType, DecimalType)
-    require(numericFeaturesTypes.forall(allowedNumericTypes.contains(_)),
-      s"Data type for numeric features must be Float, Double or Decimal type.")
+    require(numericFeaturesTypes.forall(allowedNumericTypes.contains),
+      s"Data type for numeric features must be ${allowedNumericTypes.mkString("[", ",", "]")}.")
 
     // Check if features are specified
-    val featureNames = getCategoricalFeatures.map(_.name) ++ getNumericFeatures.map(_.name)
+    val featureNames = (getCategoricalFeatures ++ getNumericFeatures).map(_.name)
     if (featureNames.isEmpty) {
       throw new Exception("No categorical features or numeric features are set to the explainer. " +
         "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")

From fa87e5cc699d226b4fa1b15267270d3196b9b82f Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 2 Dec 2021 18:01:00 -0800
Subject: [PATCH 24/32] added copyright to py files

---
 core/src/main/python/synapse/ml/explainers/ICEFeature.py     | 3 +++
 core/src/main/python/synapse/ml/explainers/ICETransformer.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/core/src/main/python/synapse/ml/explainers/ICEFeature.py b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
index 40438c4926..10196de191 100644
--- a/core/src/main/python/synapse/ml/explainers/ICEFeature.py
+++ b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
@@ -1,3 +1,6 @@
+# Copyright (C) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See LICENSE in project root for information.
+
 from pyspark.ml.wrapper import JavaWrapper
 from pyspark import SparkContext
 
diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
index 7559e59708..c45f9dd87c 100644
--- a/core/src/main/python/synapse/ml/explainers/ICETransformer.py
+++ b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
@@ -1,3 +1,6 @@
+# Copyright (C) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See LICENSE in project root for information.
+
 from synapse.ml.explainers._ICETransformer import _ICETransformer
 from pyspark.ml.common import inherit_doc
 from pyspark import SparkContext

From 77b62679e62c2ce01995820190d4e98b2364b532 Mon Sep 17 00:00:00 2001
From: ezherdeva <82470223+ezherdeva@users.noreply.github.com>
Date: Fri, 3 Dec 2021 10:58:25 -0800
Subject: [PATCH 25/32] Update
 src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala

Co-authored-by: Kashyap Patel <64443771+ms-kashyap@users.noreply.github.com>
---
 .../azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
index 303526f7fd..cbefa8665b 100644
--- a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
+++ b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala
@@ -241,7 +241,7 @@ class FuzzingTest extends TestBase {
       "com.microsoft.azure.synapse.ml.explainers.TextSHAP",
       "com.microsoft.azure.synapse.ml.explainers.VectorLIME",
       "com.microsoft.azure.synapse.ml.explainers.VectorSHAP",
-      "com.microsoft.azure.synapse.ml.explainers.ICETransformer"
+      "com.microsoft.azure.synapse.ml.explainers.ICETransformer",
       "com.microsoft.azure.synapse.ml.exploratory.AggregateBalanceMeasure",
       "com.microsoft.azure.synapse.ml.exploratory.DistributionBalanceMeasure",
       "com.microsoft.azure.synapse.ml.exploratory.FeatureBalanceMeasure"

From 7c25c577518d297eb43464750e5084a49f0657c7 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Mon, 6 Dec 2021 22:26:01 -0800
Subject: [PATCH 26/32] fix 2

---
 .../synapse/ml/explainers/ICEFeature.py       | 28 --------
 .../synapse/ml/explainers/ICETransformer.py   | 26 --------
 .../python/synapse/ml/explainers/__init__.py  |  0
 .../synapse/ml/explainers/ICEExplainer.scala  | 65 +++++++++++++------
 .../synapse/ml/explainers/ICEFeature.scala    | 36 +++++++++-
 .../explainers/split1/ICEExplainerSuite.scala | 47 ++++++++++++--
 6 files changed, 120 insertions(+), 82 deletions(-)
 delete mode 100644 core/src/main/python/synapse/ml/explainers/ICEFeature.py
 delete mode 100644 core/src/main/python/synapse/ml/explainers/ICETransformer.py
 delete mode 100644 core/src/main/python/synapse/ml/explainers/__init__.py

diff --git a/core/src/main/python/synapse/ml/explainers/ICEFeature.py b/core/src/main/python/synapse/ml/explainers/ICEFeature.py
deleted file mode 100644
index 10196de191..0000000000
--- a/core/src/main/python/synapse/ml/explainers/ICEFeature.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (C) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See LICENSE in project root for information.
-
-from pyspark.ml.wrapper import JavaWrapper
-from pyspark import SparkContext
-
-class ICECategoricalFeature(JavaWrapper):
-    def __init__(self, col: str, numTopValues: int = None, outputColName: str = None):
-        sc = SparkContext._active_spark_context
-        numTopValues = sc._jvm.scala.Some(numTopValues) if numTopValues else sc._jvm.scala.Option.empty()
-        outputColName = sc._jvm.scala.Some(outputColName) if outputColName else sc._jvm.scala.Option.empty()
-        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature", col, numTopValues, outputColName)
-
-    def getObject(self):
-      return self._java_obj
-
-class ICENumericFeature(JavaWrapper):
-    def __init__(self, col: str, numSplits: int = None, rangeMin: float = None, rangeMax: float = None, outputColName: str = None):
-        sc = SparkContext._active_spark_context
-        numSplits = sc._jvm.scala.Some(numSplits) if numSplits else sc._jvm.scala.Option.empty()
-        rangeMin = sc._jvm.scala.Some(rangeMin) if rangeMin else sc._jvm.scala.Option.empty()
-        rangeMax = sc._jvm.scala.Some(rangeMax) if rangeMax else sc._jvm.scala.Option.empty()
-        outputColName = sc._jvm.scala.Some(outputColName) if outputColName else sc._jvm.scala.Option.empty()
-        self._java_obj = JavaWrapper._new_java_obj("com.microsoft.azure.synapse.ml.explainers.ICENumericFeature", col, numSplits, rangeMin, rangeMax, outputColName)
-
-
-    def getObject(self):
-        return self._java_obj
diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
deleted file mode 100644
index c45f9dd87c..0000000000
--- a/core/src/main/python/synapse/ml/explainers/ICETransformer.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (C) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See LICENSE in project root for information.
-
-from synapse.ml.explainers._ICETransformer import _ICETransformer
-from pyspark.ml.common import inherit_doc
-from pyspark import SparkContext
-
-@inherit_doc
-class ICETransformer(_ICETransformer):
-    def setCategoricalFeatures(self, value):
-        """
-        Args:
-        categoricalFeatures: The list of categorical features to explain.
-        """
-        sc = SparkContext._active_spark_context
-        feature_list = [v.getObject() for v in value]
-        return super().setCategoricalFeatures(sc._jvm.PythonUtils.toSeq(feature_list))
-
-    def setNumericFeatures(self, value):
-        """
-        Args:
-        categoricalFeatures: The list of categorical features to explain.
-        """
-        sc = SparkContext._active_spark_context
-        feature_list = [v.getObject() for v in value]
-        return super().setNumericFeatures(sc._jvm.PythonUtils.toSeq(feature_list))
diff --git a/core/src/main/python/synapse/ml/explainers/__init__.py b/core/src/main/python/synapse/ml/explainers/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 7f48903d14..274927c618 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -13,6 +13,8 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import com.microsoft.azure.synapse.ml.codegen.Wrappable
 
+//import scala.collection.JavaConverters
+import scala.jdk.CollectionConverters.asScalaBufferConverter
 
 trait ICEFeatureParams extends Params with HasNumSamples {
 
@@ -26,9 +28,17 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     _.forall(_.validate)
   )
 
+
   def setCategoricalFeatures(values: Seq[ICECategoricalFeature]): this.type = this.set(categoricalFeatures, values)
   def getCategoricalFeatures: Seq[ICECategoricalFeature] = $(categoricalFeatures)
 
+  def setCategoricalFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
+    val features: Seq[ICECategoricalFeature] = values.asScala.toSeq.map(ICECategoricalFeature.fromMap)
+
+    this.setCategoricalFeatures(features)
+    //this.set(categoricalFeatures, features)
+  }
+
   val numericFeatures = new TypedArrayParam[ICENumericFeature] (
     this,
     "numericFeatures",
@@ -39,6 +49,14 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def setNumericFeatures(values: Seq[ICENumericFeature]): this.type = this.set(numericFeatures, values)
   def getNumericFeatures: Seq[ICENumericFeature] = $(numericFeatures)
 
+  def setNumericFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
+    val features: Seq[ICENumericFeature] = values.asScala.toSeq.map(ICENumericFeature.fromMap)
+
+    //this.set(numericFeatures, features)
+
+    this.setNumericFeatures(features)
+  }
+
   val kind = new Param[String] (
     this,
     "kind",
@@ -51,7 +69,8 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getKind: String = $(kind)
   def setKind(value: String): this.type = set(kind, value)
 
-  setDefault(kind -> "individual", numericFeatures -> Seq.empty[ICENumericFeature],
+  setDefault(kind -> "individual",
+    numericFeatures -> Seq.empty[ICENumericFeature],
     categoricalFeatures -> Seq.empty[ICECategoricalFeature])
 }
 
@@ -69,11 +88,9 @@ class ICETransformer(override val uid: String) extends Transformer
   with Wrappable
   with ComplexParamsWritable {
 
-  override protected lazy val pyInternalWrapper = true
+  //override protected lazy val pyInternalWrapper = true
 
-  def this() = {
-    this(Identifiable.randomUID("ICETransformer"))
-  }
+  def this() = this(Identifiable.randomUID("ICETransformer"))
 
   private def calcDependence(df: DataFrame, idCol: String, targetClassesColumn: String,
                              feature: String, values: Array[_], outputColName: String): DataFrame = {
@@ -121,12 +138,12 @@ class ICETransformer(override val uid: String) extends Transformer
     val targetClasses = DatasetExtensions.findUnusedColumnName("targetClasses", df)
     val dfWithId = df
       .withColumn(idCol, monotonically_increasing_id())
-      .withColumn(targetClasses, this.get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
+      .withColumn(targetClasses, get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
     // collect feature values for all features from original dataset - dfWithId
-    val (categoricalFeatures, numericFeatures) = (this.getCategoricalFeatures, this.getNumericFeatures)
+    val (categoricalFeatures, numericFeatures) = (getCategoricalFeatures, getNumericFeatures)
 
-    val sampled: Dataset[Row] = this.get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
+    val sampled: Dataset[Row] = get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
@@ -152,13 +169,11 @@ class ICETransformer(override val uid: String) extends Transformer
 
   private def collectCategoricalValues[_](df: DataFrame, feature: ICECategoricalFeature): Array[_] = {
     val featureCountCol = DatasetExtensions.findUnusedColumnName("__feature__count__", df)
-    val values = df
-      .groupBy(col(feature.name))
+    df.groupBy(col(feature.name))
       .agg(count("*").as(featureCountCol))
       .orderBy(col(featureCountCol).desc)
       .head(feature.getNumTopValue)
       .map(row => row.get(0))
-    values
   }
 
   private def createNSplits(n: Int)(from: Double, to: Double): Seq[Double] = {
@@ -222,17 +237,25 @@ class ICETransformer(override val uid: String) extends Transformer
 
   override def transformSchema(schema: StructType): StructType = {
     // Check the data type for categorical features
-    val categoricalFeaturesTypes= getCategoricalFeatures.map(f => schema(f.name).dataType)
     val allowedCategoricalTypes = Array(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType)
-    require(categoricalFeaturesTypes.forall(allowedCategoricalTypes.contains),
-      s"Data type for categorical features must be ${allowedCategoricalTypes.mkString("[", ",", "]")}.")
-
-    // Check the data type for numeric features
-    val numericFeaturesTypes= getNumericFeatures.map(f => schema(f.name).dataType)
-    val allowedNumericTypes = Array(FloatType, DoubleType, DecimalType)
-    require(numericFeaturesTypes.forall(allowedNumericTypes.contains),
-      s"Data type for numeric features must be ${allowedNumericTypes.mkString("[", ",", "]")}.")
-
+    getCategoricalFeatures.foreach {
+      f =>
+        schema(f.name).dataType match {
+          case StringType| BooleanType | ByteType | ShortType | IntegerType | LongType =>
+          case _ => throw new
+              Exception(s"Data type for categorical features" +
+                s" must be ${allowedCategoricalTypes.mkString("[", ",", "]")}.")
+        }
+    }
+    val allowedNumericTypes = Array(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
+    getNumericFeatures.foreach {
+      f =>
+        schema(f.name).dataType match {
+          case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | _: DecimalType =>
+          case _ => throw new
+              Exception(s"Data type for numeric features must be ${allowedNumericTypes.mkString("[", ",", "]")}.")
+        }
+    }
     // Check if features are specified
     val featureNames = (getCategoricalFeatures ++ getNumericFeatures).map(_.name)
     if (featureNames.isEmpty) {
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index b12293fe32..10b05a2f1d 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -52,7 +52,6 @@ object ICECategoricalFeature {
         case _ => None
       }
       ICECategoricalFeature(name, numTopValues, outputColName)
-
     }
     override def write(obj: ICECategoricalFeature): JsValue = {
       val map = Map("name" -> JsString(obj.name))++
@@ -61,6 +60,19 @@ object ICECategoricalFeature {
       JsObject(map)
     }
   }
+  def fromMap(inputMap: java.util.HashMap[String, Any]): ICECategoricalFeature = {
+    val name: String = inputMap.get("name").toString
+    val numTopValues: Option[Int] = inputMap.get("numTopValues") match {
+      case value: Integer => Some(Integer2int(value))
+      case _ => None
+    }
+    val outputColName: Option[String] = inputMap.get("outputColName") match {
+      case value: String => Some(value)
+      case _ => None
+    }
+
+    ICECategoricalFeature(name, numTopValues, outputColName)
+  }
 }
 
 /**
@@ -121,7 +133,6 @@ object ICENumericFeature {
       }
 
       ICENumericFeature(name, numSplits, rangeMin, rangeMax, outputColName)
-
     }
 
     override def write(obj: ICENumericFeature): JsValue = {
@@ -133,4 +144,25 @@ object ICENumericFeature {
       JsObject(map)
     }
   }
+  def fromMap(inputMap: java.util.HashMap[String, Any]): ICENumericFeature = {
+    val name: String = inputMap.get("name").toString
+    val numSplits: Option[Int] = inputMap.get("numSplits") match {
+      case value: Integer => Some(Integer2int(value))
+      case _ => None
+    }
+    val rangeMin: Option[Double] = inputMap.get("rangeMin") match {
+      case value: java.lang.Double => Some(value.doubleValue())
+      case _ => None
+    }
+    val rangeMax: Option[Double] = inputMap.get("rangeMax") match {
+      case value: java.lang.Double => Some(value.doubleValue())
+      case _ => None
+    }
+    val outputColName = inputMap.get("outputColName") match {
+      case value: String => Some(value)
+      case _ => None
+    }
+
+    ICENumericFeature(name, numSplits, rangeMin, rangeMax, outputColName)
+  }
 }
\ No newline at end of file
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 1d81fdca53..19f77a3b85 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -14,6 +14,8 @@ import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENume
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.MLReadable
 
+import scala.jdk.CollectionConverters.mapAsJavaMapConverter
+
 
 class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer] {
 
@@ -27,6 +29,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
+  data.show()
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
@@ -41,15 +44,18 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col3", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
+  output.show(truncate = false)
 
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
     .setTargetCol("probability")
-    .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2")))
+    .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2"),
+      ICECategoricalFeature("col3")))
     .setNumericFeatures(Array(ICENumericFeature("col4", Some(5))))
     .setTargetClasses(Array(1))
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)
+  outputAvg.show(truncate = false)
 
   test("col2 doesn't contribute to the prediction.") {
 
@@ -61,9 +67,21 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     val impA: Double = outputCol2.get("a").head.toArray.head
     val impB: Double = outputCol2.get("b").head.toArray.head
 
-    assert(0.4 < impA && impA < 0.6)
-    assert(0.4 < impB && impB < 0.6)
+    val eps = 0.01
+    assert((impA - impB).abs < eps)
+  }
+
+  test("col3 contribute to the prediction.") {
+
+    val outputCol3: Map[Int, Vector] = outputAvg.select("col3_dependence").collect().map {
+      case Row(map: Map[Int, Vector]) =>
+        map
+    }.head
+
+    val impFirst: Double = outputCol3.get(-5).head.toArray.head
+    val impSec: Double = outputCol3.get(5).head.toArray.head
 
+    assert((impFirst - impSec).abs > 0.4)
   }
 
   test("The length of explainer map for numeric feature is equal to it's numSplits.") {
@@ -74,7 +92,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     }.head
 
     assert(outputCol1.size == iceAvg.getNumericFeatures.head.getNumSplits + 1)
-
   }
 
   test("The length of explainer map for categorical feature is less or equal to it's numTopValues.") {
@@ -84,7 +101,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     }.head
 
     assert(outputCol.size <= ice.getCategoricalFeatures.last.getNumTopValue)
-
   }
 
   test("No features specified.") {
@@ -116,6 +132,27 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     assert(output.count() == 2)
   }
 
+  test("ICECategoricalFeature is successfully created from java.util.Map") {
+    //val map = Map("name" -> "my_name", "numTopValues" -> 100).asJava
+    val map = new java.util.HashMap[String, Any]()
+    map.put("name", "my_name")
+    map.put("numTopValues", 100)
+    val feature = ICECategoricalFeature.fromMap(map)
+    println(feature)
+    assert(feature.name == map.get("name"))
+    assert(feature.numTopValues.contains(map.get("numTopValues")))
+    assert(feature.outputColName.isEmpty)
+  }
+
+  test("Set categorical") {
+    val map = new java.util.HashMap[String, Any]()
+    map.put("name", "col2")
+    map.put("numTopValues", 2)
+    val feature = ICECategoricalFeature.fromMap(map)
+    ice.setCategoricalFeatures(Array(feature))
+    assert(ice.getCategoricalFeatures.head == feature)
+  }
+
   override def testObjects(): Seq[TestObject[ICETransformer]] = Seq(new TestObject(ice, data))
   override def reader: MLReadable[_] = ICETransformer
 }
\ No newline at end of file

From a11c718c772b80e257a58f2e9182ee0e775ba3c3 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Wed, 8 Dec 2021 17:39:56 -0800
Subject: [PATCH 27/32] fix python issue

---
 .../synapse/ml/explainers/ICETransformer.py   | 24 +++++++++++++++++++
 .../synapse/ml/explainers/ICEExplainer.scala  | 19 +++++++--------
 .../explainers/split1/ICEExplainerSuite.scala |  4 ++--
 3 files changed, 34 insertions(+), 13 deletions(-)
 create mode 100644 core/src/main/python/synapse/ml/explainers/ICETransformer.py

diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
new file mode 100644
index 0000000000..7b5ccd929c
--- /dev/null
+++ b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
@@ -0,0 +1,24 @@
+# Copyright (C) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See LICENSE in project root for information.
+
+from synapse.ml.explainers._ICETransformer import _ICETransformer
+from pyspark.ml.common import inherit_doc
+from typing import List, Dict
+
+@inherit_doc
+class ICETransformer(_ICETransformer):
+    def setCategoricalFeatures(self, value: List[Dict]):
+        """
+        Args:
+        value: The list of dicts with parameters for categorical features to explain.
+        """
+        self._java_obj.setCategoricalFeatures(value)
+        return self
+
+    def setNumericFeatures(self, value: List[Dict]):
+        """
+        Args:
+        value: The list of dicts with parameters for numeric features to explain.
+        """
+        self._java_obj.setNumericFeatures(value)
+        return self
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 274927c618..dcbb0f0641 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -13,6 +13,8 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import com.microsoft.azure.synapse.ml.codegen.Wrappable
 
+import scala.collection.mutable
+
 //import scala.collection.JavaConverters
 import scala.jdk.CollectionConverters.asScalaBufferConverter
 
@@ -28,15 +30,12 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     _.forall(_.validate)
   )
 
-
   def setCategoricalFeatures(values: Seq[ICECategoricalFeature]): this.type = this.set(categoricalFeatures, values)
   def getCategoricalFeatures: Seq[ICECategoricalFeature] = $(categoricalFeatures)
 
   def setCategoricalFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
-    val features: Seq[ICECategoricalFeature] = values.asScala.toSeq.map(ICECategoricalFeature.fromMap)
-
+    val features: Seq[ICECategoricalFeature] = values.asScala.toSeq.map(f => ICECategoricalFeature.fromMap(f))
     this.setCategoricalFeatures(features)
-    //this.set(categoricalFeatures, features)
   }
 
   val numericFeatures = new TypedArrayParam[ICENumericFeature] (
@@ -51,9 +50,6 @@ trait ICEFeatureParams extends Params with HasNumSamples {
 
   def setNumericFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
     val features: Seq[ICENumericFeature] = values.asScala.toSeq.map(ICENumericFeature.fromMap)
-
-    //this.set(numericFeatures, features)
-
     this.setNumericFeatures(features)
   }
 
@@ -88,7 +84,7 @@ class ICETransformer(override val uid: String) extends Transformer
   with Wrappable
   with ComplexParamsWritable {
 
-  //override protected lazy val pyInternalWrapper = true
+  override protected lazy val pyInternalWrapper = true
 
   def this() = this(Identifiable.randomUID("ICETransformer"))
 
@@ -237,8 +233,9 @@ class ICETransformer(override val uid: String) extends Transformer
 
   override def transformSchema(schema: StructType): StructType = {
     // Check the data type for categorical features
+    val (categoricalFeatures, numericFeatures) = (getCategoricalFeatures, getNumericFeatures)
     val allowedCategoricalTypes = Array(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType)
-    getCategoricalFeatures.foreach {
+    categoricalFeatures.foreach {
       f =>
         schema(f.name).dataType match {
           case StringType| BooleanType | ByteType | ShortType | IntegerType | LongType =>
@@ -248,7 +245,7 @@ class ICETransformer(override val uid: String) extends Transformer
         }
     }
     val allowedNumericTypes = Array(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
-    getNumericFeatures.foreach {
+    numericFeatures.foreach {
       f =>
         schema(f.name).dataType match {
           case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | _: DecimalType =>
@@ -257,7 +254,7 @@ class ICETransformer(override val uid: String) extends Transformer
         }
     }
     // Check if features are specified
-    val featureNames = (getCategoricalFeatures ++ getNumericFeatures).map(_.name)
+    val featureNames = (categoricalFeatures ++ numericFeatures).map(_.name)
     if (featureNames.isEmpty) {
       throw new Exception("No categorical features or numeric features are set to the explainer. " +
         "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 19f77a3b85..777f343f67 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -14,7 +14,7 @@ import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENume
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.MLReadable
 
-import scala.jdk.CollectionConverters.mapAsJavaMapConverter
+import scala.jdk.CollectionConverters._
 
 
 class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer] {
@@ -149,7 +149,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     map.put("name", "col2")
     map.put("numTopValues", 2)
     val feature = ICECategoricalFeature.fromMap(map)
-    ice.setCategoricalFeatures(Array(feature))
+    ice.setCategoricalFeatures(List(map).asJava)
     assert(ice.getCategoricalFeatures.head == feature)
   }
 

From 6483daf37e286cb3929806327072c49027c9940f Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Wed, 8 Dec 2021 17:48:35 -0800
Subject: [PATCH 28/32] fix python issue (small fix)

---
 .../microsoft/azure/synapse/ml/explainers/ICEExplainer.scala  | 4 ----
 .../synapse/ml/explainers/split1/ICEExplainerSuite.scala      | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index dcbb0f0641..7de1100a95 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -12,10 +12,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import com.microsoft.azure.synapse.ml.codegen.Wrappable
-
-import scala.collection.mutable
-
-//import scala.collection.JavaConverters
 import scala.jdk.CollectionConverters.asScalaBufferConverter
 
 trait ICEFeatureParams extends Params with HasNumSamples {
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index 777f343f67..c11668f176 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -13,7 +13,6 @@ import org.apache.spark.ml.classification.LogisticRegression
 import com.microsoft.azure.synapse.ml.explainers.{ICECategoricalFeature, ICENumericFeature, ICETransformer}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util.MLReadable
-
 import scala.jdk.CollectionConverters._
 
 
@@ -29,7 +28,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
-  data.show()
   val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
@@ -44,7 +42,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col3", Some(4))))
     .setTargetClasses(Array(1))
   val output: DataFrame = ice.transform(data)
-  output.show(truncate = false)
 
   val iceAvg = new ICETransformer()
   iceAvg.setModel(model)
@@ -55,7 +52,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     .setTargetClasses(Array(1))
     .setKind("average")
   val outputAvg: DataFrame = iceAvg.transform(data)
-  outputAvg.show(truncate = false)
 
   test("col2 doesn't contribute to the prediction.") {
 

From ef2c35e30628b1ec6464e0334e26728aee4539d0 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Thu, 9 Dec 2021 19:29:12 -0800
Subject: [PATCH 29/32] fixed python issue

---
 .../synapse/ml/explainers/ICETransformer.py   | 40 +++++++++++++++----
 .../synapse/ml/explainers/ICEExplainer.scala  | 10 ++++-
 .../explainers/split1/ICEExplainerSuite.scala |  2 +-
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
index 7b5ccd929c..d860fbf13e 100644
--- a/core/src/main/python/synapse/ml/explainers/ICETransformer.py
+++ b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
@@ -3,22 +3,46 @@
 
 from synapse.ml.explainers._ICETransformer import _ICETransformer
 from pyspark.ml.common import inherit_doc
-from typing import List, Dict
+from typing import List, Dict, Union
 
 @inherit_doc
 class ICETransformer(_ICETransformer):
-    def setCategoricalFeatures(self, value: List[Dict]):
+    def setCategoricalFeatures(self, values: Union[List[str], List[Dict]]):
         """
         Args:
-        value: The list of dicts with parameters for categorical features to explain.
+        values: The list of values that represent categorical features to explain.
+        Values are list of dicts with parameters or just a list of names of categorical features
         """
-        self._java_obj.setCategoricalFeatures(value)
+        if len(values) == 0:
+            pass
+        else:
+            list_values = []
+            for value in values:
+                if isinstance(value, str):
+                    list_values.append({"name": value})
+                elif isinstance(value, dict):
+                    list_values.append(value)
+                else:
+                    pass
+            self._java_obj.setCategoricalFeaturesPy(list_values)
         return self
 
-    def setNumericFeatures(self, value: List[Dict]):
+    def setNumericFeatures(self, values: List[Dict]):
         """
         Args:
-        value: The list of dicts with parameters for numeric features to explain.
+        values: The list of values that represent numeric features to explain.
+        Values are list of dicts with parameters or just a list of names of numeric features
         """
-        self._java_obj.setNumericFeatures(value)
-        return self
+        if len(values) == 0:
+            pass
+        else:
+            list_values = []
+            for value in values:
+                if isinstance(value, str):
+                    list_values.append({"name": value})
+                elif isinstance(value, dict):
+                    list_values.append(value)
+                else:
+                    pass
+            self._java_obj.setNumericFeaturesPy(list_values)
+        return self
\ No newline at end of file
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 7de1100a95..1cfa0eddaa 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -12,7 +12,12 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import com.microsoft.azure.synapse.ml.codegen.Wrappable
+import org.apache.spark.sql.execution.r.MapPartitionsRWrapper
+
+import scala.collection.generic.SeqForwarder
+import scala.collection.{AbstractSeq, LinearSeq, SeqProxy, SeqViewLike, immutable, mutable}
 import scala.jdk.CollectionConverters.asScalaBufferConverter
+import scala.reflect.ClassTag.AnyVal
 
 trait ICEFeatureParams extends Params with HasNumSamples {
 
@@ -29,7 +34,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def setCategoricalFeatures(values: Seq[ICECategoricalFeature]): this.type = this.set(categoricalFeatures, values)
   def getCategoricalFeatures: Seq[ICECategoricalFeature] = $(categoricalFeatures)
 
-  def setCategoricalFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
+  def setCategoricalFeaturesPy(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
     val features: Seq[ICECategoricalFeature] = values.asScala.toSeq.map(f => ICECategoricalFeature.fromMap(f))
     this.setCategoricalFeatures(features)
   }
@@ -44,11 +49,12 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def setNumericFeatures(values: Seq[ICENumericFeature]): this.type = this.set(numericFeatures, values)
   def getNumericFeatures: Seq[ICENumericFeature] = $(numericFeatures)
 
-  def setNumericFeatures(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
+  def setNumericFeaturesPy(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
     val features: Seq[ICENumericFeature] = values.asScala.toSeq.map(ICENumericFeature.fromMap)
     this.setNumericFeatures(features)
   }
 
+
   val kind = new Param[String] (
     this,
     "kind",
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index c11668f176..e4d4d0ea6d 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -145,7 +145,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
     map.put("name", "col2")
     map.put("numTopValues", 2)
     val feature = ICECategoricalFeature.fromMap(map)
-    ice.setCategoricalFeatures(List(map).asJava)
+    ice.setCategoricalFeaturesPy(List(map).asJava)
     assert(ice.getCategoricalFeatures.head == feature)
   }
 

From 8c3a6dcd5d1d9949d4a283d81f1309a50ac443bb Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Fri, 10 Dec 2021 16:55:56 -0800
Subject: [PATCH 30/32] fixed comments and add more docs

---
 .../synapse/ml/explainers/ICEExplainer.scala  | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 1cfa0eddaa..d2ddd85478 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -12,12 +12,8 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.ml.stat.Summarizer
 import com.microsoft.azure.synapse.ml.codegen.Wrappable
-import org.apache.spark.sql.execution.r.MapPartitionsRWrapper
-
-import scala.collection.generic.SeqForwarder
-import scala.collection.{AbstractSeq, LinearSeq, SeqProxy, SeqViewLike, immutable, mutable}
 import scala.jdk.CollectionConverters.asScalaBufferConverter
-import scala.reflect.ClassTag.AnyVal
+
 
 trait ICEFeatureParams extends Params with HasNumSamples {
 
@@ -104,7 +100,7 @@ class ICETransformer(override val uid: String) extends Transformer
     val result = predicted.withColumn(targetCol, explainTarget)
 
     getKind.toLowerCase match {
-      case this.averageKind =>
+      case `averageKind` =>
         // PDP output schema: 1 row * 1 col (pdp for the given feature: feature_value -> explanations)
         result
           .groupBy(feature)
@@ -116,7 +112,7 @@ class ICETransformer(override val uid: String) extends Transformer
             ).alias(outputColName)
           )
 
-      case this.individualKind =>
+      case `individualKind` =>
         // ICE output schema: n rows * 2 cols (idCol + ice for the given feature: map(feature_value -> explanations))
         result
           .groupBy(idCol)
@@ -138,11 +134,13 @@ class ICETransformer(override val uid: String) extends Transformer
       .withColumn(idCol, monotonically_increasing_id())
       .withColumn(targetClasses, get(targetClassesCol).map(col).getOrElse(lit(getTargetClasses)))
 
-    // collect feature values for all features from original dataset - dfWithId
+    // Collect feature values for all features from original dataset - dfWithId
     val (categoricalFeatures, numericFeatures) = (getCategoricalFeatures, getNumericFeatures)
 
+    // If numSamples is specified, randomly pick numSamples instances from the input dataset
     val sampled: Dataset[Row] = get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
+    // Collect values from the input dataframe and create dependenceDF from them
     val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
       f: ICECategoricalFeature =>
         val values = collectCategoricalValues(dfWithId, f)
@@ -156,11 +154,14 @@ class ICETransformer(override val uid: String) extends Transformer
 
     val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
+    // In the case of ICE, the function will return the initial df with columns corresponding to each feature to explain
+    // In the case of PDP the function will return df with a shape (1 row * number of features to explain)
+
     getKind.toLowerCase match {
-      case this.individualKind =>
+      case `individualKind` =>
         dependenceDfs.reduceOption(_.join(_, Seq(idCol), "inner"))
           .map(sampled.join(_, Seq(idCol), "inner").drop(idCol)).get
-      case this.averageKind =>
+      case `averageKind` =>
         dependenceDfs.reduce(_ crossJoin _)
     }
   }

From e49201442aa2c99d2b976909dd603233c2dab857 Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Wed, 15 Dec 2021 15:11:01 -0800
Subject: [PATCH 31/32] fix comments

---
 .../synapse/ml/explainers/ICETransformer.py   |  4 +-
 .../synapse/ml/explainers/ICEExplainer.scala  | 34 ++++----
 .../synapse/ml/explainers/ICEFeature.scala    | 84 ++++---------------
 .../explainers/split1/ICEExplainerSuite.scala | 65 +++++++-------
 4 files changed, 64 insertions(+), 123 deletions(-)

diff --git a/core/src/main/python/synapse/ml/explainers/ICETransformer.py b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
index d860fbf13e..24e947af65 100644
--- a/core/src/main/python/synapse/ml/explainers/ICETransformer.py
+++ b/core/src/main/python/synapse/ml/explainers/ICETransformer.py
@@ -7,7 +7,7 @@
 
 @inherit_doc
 class ICETransformer(_ICETransformer):
-    def setCategoricalFeatures(self, values: Union[List[str], List[Dict]]):
+    def setCategoricalFeatures(self, values: List[Union[str, Dict]]):
         """
         Args:
         values: The list of values that represent categorical features to explain.
@@ -27,7 +27,7 @@ def setCategoricalFeatures(self, values: Union[List[str], List[Dict]]):
             self._java_obj.setCategoricalFeaturesPy(list_values)
         return self
 
-    def setNumericFeatures(self, values: List[Dict]):
+    def setNumericFeatures(self, values: List[Union[str, Dict]]):
         """
         Args:
         values: The list of values that represent numeric features to explain.
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index d2ddd85478..9f7664f71d 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -31,7 +31,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getCategoricalFeatures: Seq[ICECategoricalFeature] = $(categoricalFeatures)
 
   def setCategoricalFeaturesPy(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
-    val features: Seq[ICECategoricalFeature] = values.asScala.toSeq.map(f => ICECategoricalFeature.fromMap(f))
+    val features: Seq[ICECategoricalFeature] = values.asScala.map(f => ICECategoricalFeature.fromMap(f))
     this.setCategoricalFeatures(features)
   }
 
@@ -46,7 +46,7 @@ trait ICEFeatureParams extends Params with HasNumSamples {
   def getNumericFeatures: Seq[ICENumericFeature] = $(numericFeatures)
 
   def setNumericFeaturesPy(values: java.util.List[java.util.HashMap[String, Any]]): this.type = {
-    val features: Seq[ICENumericFeature] = values.asScala.toSeq.map(ICENumericFeature.fromMap)
+    val features: Seq[ICENumericFeature] = values.asScala.map(ICENumericFeature.fromMap)
     this.setNumericFeatures(features)
   }
 
@@ -141,18 +141,16 @@ class ICETransformer(override val uid: String) extends Transformer
     val sampled: Dataset[Row] = get(numSamples).map(dfWithId.orderBy(rand()).limit).getOrElse(dfWithId).cache
 
     // Collect values from the input dataframe and create dependenceDF from them
-    val calcCategoricalFunc: ICECategoricalFeature => DataFrame = {
-      f: ICECategoricalFeature =>
-        val values = collectCategoricalValues(dfWithId, f)
-        calcDependence(sampled, idCol, targetClasses, f.name, values, f.getOutputColName)
+    val features = categoricalFeatures ++ numericFeatures
+    val dependenceDfs= features.map {
+      case f: ICECategoricalFeature =>
+        (f, collectCategoricalValues(dfWithId, f))
+      case f: ICENumericFeature =>
+        (f, collectSplits(dfWithId, f))
+    }.map {
+      case (f, values) =>
+        calcDependence(sampled, idCol, targetClasses, f.getName, values, f.getOutputColName)
     }
-    val calcNumericFunc: ICENumericFeature => DataFrame = {
-      f: ICENumericFeature =>
-        val values = collectSplits(dfWithId, f)
-        calcDependence(sampled, idCol, targetClasses, f.name, values, f.getOutputColName)
-    }
-
-    val dependenceDfs = (categoricalFeatures map calcCategoricalFunc) ++ (numericFeatures map calcNumericFunc)
 
     // In the case of ICE, the function will return the initial df with columns corresponding to each feature to explain
     // In the case of PDP the function will return df with a shape (1 row * number of features to explain)
@@ -167,10 +165,10 @@ class ICETransformer(override val uid: String) extends Transformer
   }
 
   private def collectCategoricalValues[_](df: DataFrame, feature: ICECategoricalFeature): Array[_] = {
-    val featureCountCol = DatasetExtensions.findUnusedColumnName("__feature__count__", df)
+    val featureCount = DatasetExtensions.findUnusedColumnName("__feature__count__", df)
     df.groupBy(col(feature.name))
-      .agg(count("*").as(featureCountCol))
-      .orderBy(col(featureCountCol).desc)
+      .agg(count("*").as(featureCount))
+      .orderBy(col(featureCount).desc)
       .head(feature.getNumTopValue)
       .map(row => row.get(0))
   }
@@ -257,7 +255,7 @@ class ICETransformer(override val uid: String) extends Transformer
         }
     }
     // Check if features are specified
-    val featureNames = (categoricalFeatures ++ numericFeatures).map(_.name)
+    val featureNames = (categoricalFeatures ++ numericFeatures).map(_.getName)
     if (featureNames.isEmpty) {
       throw new Exception("No categorical features or numeric features are set to the explainer. " +
         "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
@@ -267,7 +265,7 @@ class ICETransformer(override val uid: String) extends Transformer
     if (duplicateFeatureNames.nonEmpty) {
       throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
     }
-    this.validateSchema(schema)
+    validateSchema(schema)
     schema
   }
 }
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index 10b05a2f1d..afb32cfd35 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -3,12 +3,14 @@
 
 package com.microsoft.azure.synapse.ml.explainers
 
+import spray.json.DefaultJsonProtocol._
 import spray.json._
 
-private[explainers] abstract class ICEFeature(val name: String, outputColName: Option[String] = None) {
+private[explainers] abstract class ICEFeature(name: String, outputColName: Option[String] = None) {
   def validate: Boolean
   private val defaultOutputColName = name + "_dependence"
   def getOutputColName: String = this.outputColName.getOrElse(defaultOutputColName)
+  def getName: String = name
 }
 
 /**
@@ -19,16 +21,15 @@ private[explainers] abstract class ICEFeature(val name: String, outputColName: O
   * @param outputColName The name for output column with explanations for the feature.
   *                      Default: input name of the feature + _dependence.
   */
-case class ICECategoricalFeature(override val name: String, numTopValues: Option[Int] = None,
+case class ICECategoricalFeature(name: String, numTopValues: Option[Int] = None,
                                  outputColName: Option[String] = None)
  extends ICEFeature(name, outputColName) {
   override def validate: Boolean = {
     numTopValues.forall(_ > 0)
   }
 
-  private val defaultNumTopValue = 100
   def getNumTopValue: Int = {
-    this.numTopValues.getOrElse(defaultNumTopValue)
+    this.numTopValues.getOrElse(ICECategoricalFeature.DefaultNumTopValue)
   }
 }
 
@@ -36,36 +37,15 @@ case class ICECategoricalFeature(override val name: String, numTopValues: Option
   * Companion object to provide JSON serializer and deserializer for ICECategoricalFeature.
   */
 object ICECategoricalFeature {
-  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = new JsonFormat[ICECategoricalFeature] {
-    override def read(json: JsValue): ICECategoricalFeature = {
-      val fields = json.asJsObject.fields
-      val name = fields("name") match {
-        case JsString(value) => value
-        case _ => throw new Exception("The name field must be a JsString.")
-      }
-      val numTopValues = fields.get("numTopValues") match {
-        case Some(JsNumber(value)) => Some(value.toInt)
-        case _ => None
-      }
-      val outputColName = fields.get("outputColName") match {
-        case Some(JsString(value)) => Some(value)
-        case _ => None
-      }
-      ICECategoricalFeature(name, numTopValues, outputColName)
-    }
-    override def write(obj: ICECategoricalFeature): JsValue = {
-      val map = Map("name" -> JsString(obj.name))++
-        obj.numTopValues.map("numTopValues" -> JsNumber(_))++
-        obj.outputColName.map("outputColName" -> JsString(_))
-      JsObject(map)
-    }
-  }
+  val DefaultNumTopValue: Int = 100
+  implicit val JsonFormat: JsonFormat[ICECategoricalFeature] = jsonFormat3(ICECategoricalFeature.apply)
   def fromMap(inputMap: java.util.HashMap[String, Any]): ICECategoricalFeature = {
     val name: String = inputMap.get("name").toString
     val numTopValues: Option[Int] = inputMap.get("numTopValues") match {
       case value: Integer => Some(Integer2int(value))
       case _ => None
     }
+
     val outputColName: Option[String] = inputMap.get("outputColName") match {
       case value: String => Some(value)
       case _ => None
@@ -87,7 +67,7 @@ object ICECategoricalFeature {
   * @param outputColName The name for output column with explanations for the feature.
   *                      Default: input name of the feature + "_dependence"
   */
-case class ICENumericFeature(override val name: String, numSplits: Option[Int] = None,
+case class ICENumericFeature(name: String, numSplits: Option[Int] = None,
                              rangeMin: Option[Double] = None, rangeMax: Option[Double] = None,
                              outputColName: Option[String] = None)
   extends ICEFeature(name, outputColName) {
@@ -96,9 +76,8 @@ case class ICENumericFeature(override val name: String, numSplits: Option[Int] =
     numSplits.forall(_ > 0) && (rangeMax.isEmpty || rangeMin.isEmpty || rangeMin.get <= rangeMax.get)
   }
 
-  private val defaultNumSplits = 10
   def getNumSplits: Int = {
-    this.numSplits.getOrElse(defaultNumSplits)
+    this.numSplits.getOrElse(ICENumericFeature.DefaultNumSplits)
   }
 }
 
@@ -106,58 +85,25 @@ case class ICENumericFeature(override val name: String, numSplits: Option[Int] =
   * Companion object to provide JSON serializer and deserializer for ICENumericFeature.
   */
 object ICENumericFeature {
-  implicit val JsonFormat: JsonFormat[ICENumericFeature] = new JsonFormat[ICENumericFeature] {
-    override def read(json: JsValue): ICENumericFeature = {
-      val fields = json.asJsObject.fields
-      val name = fields("name") match {
-        case JsString(value) => value
-        case _ => throw new Exception("The name field must be a JsString.")
-      }
-
-      val numSplits = fields.get("numSplits") match {
-        case Some(JsNumber(value)) => Some(value.toInt)
-        case _ => None
-      }
-
-      val rangeMin = fields.get("rangeMin").map {
-        case JsNumber(value) => value.toDouble
-      }
-
-      val rangeMax = fields.get("rangeMax").map {
-        case JsNumber(value) => value.toDouble
-      }
-
-      val outputColName = fields.get("outputColName") match {
-        case Some(JsString(value)) => Some(value)
-        case _ => None
-      }
-
-      ICENumericFeature(name, numSplits, rangeMin, rangeMax, outputColName)
-    }
-
-    override def write(obj: ICENumericFeature): JsValue = {
-      val map = Map("name" -> JsString(obj.name))++
-        obj.numSplits.map("numSplits" -> JsNumber(_))++
-        obj.rangeMin.map("rangeMin" -> JsNumber(_))++
-        obj.rangeMax.map("rangeMax" -> JsNumber(_))++
-        obj.outputColName.map("outputColName" -> JsString(_))
-      JsObject(map)
-    }
-  }
+  val DefaultNumSplits: Int = 10
+  implicit val JsonFormat: JsonFormat[ICENumericFeature] = jsonFormat5(ICENumericFeature.apply)
   def fromMap(inputMap: java.util.HashMap[String, Any]): ICENumericFeature = {
     val name: String = inputMap.get("name").toString
     val numSplits: Option[Int] = inputMap.get("numSplits") match {
       case value: Integer => Some(Integer2int(value))
       case _ => None
     }
+
     val rangeMin: Option[Double] = inputMap.get("rangeMin") match {
       case value: java.lang.Double => Some(value.doubleValue())
       case _ => None
     }
+
     val rangeMax: Option[Double] = inputMap.get("rangeMax") match {
       case value: java.lang.Double => Some(value.doubleValue())
       case _ => None
     }
+
     val outputColName = inputMap.get("outputColName") match {
       case value: String => Some(value)
       case _ => None
diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
index e4d4d0ea6d..08376dc3f0 100644
--- a/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
+++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/ICEExplainerSuite.scala
@@ -19,88 +19,86 @@ import scala.jdk.CollectionConverters._
 class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer] {
 
   import spark.implicits._
-  val dataDF: DataFrame = (1 to 100).flatMap(_ => Seq(
+  lazy val dataDF: DataFrame = (1 to 100).flatMap(_ => Seq(
     (-5, "a", -5, 0),
     (-5, "b", -5, 0),
     (5, "a", 5, 1),
     (5, "b", 5, 1)
   )).toDF("col1", "col2", "col3", "label")
 
-  val data: DataFrame = dataDF.withColumn("col4", rand()*100)
+  lazy val data: DataFrame = dataDF.withColumn("col4", rand()*100)
 
-  val pipeline: Pipeline = new Pipeline().setStages(Array(
+  lazy val pipeline: Pipeline = new Pipeline().setStages(Array(
     new StringIndexer().setInputCol("col2").setOutputCol("col2_ind"),
     new OneHotEncoder().setInputCol("col2_ind").setOutputCol("col2_enc"),
     new VectorAssembler().setInputCols(Array("col1", "col2_enc", "col3", "col4")).setOutputCol("features"),
     new LogisticRegression().setLabelCol("label").setFeaturesCol("features")
   ))
-  val model: PipelineModel = pipeline.fit(data)
+  lazy val model: PipelineModel = pipeline.fit(data)
 
-  val ice = new ICETransformer()
-  ice.setModel(model)
+  lazy val ice: ICETransformer = new ICETransformer()
+    .setModel(model)
     .setTargetCol("probability")
     .setCategoricalFeatures(Array(ICECategoricalFeature("col2", Some(2)), ICECategoricalFeature("col3", Some(4))))
     .setTargetClasses(Array(1))
-  val output: DataFrame = ice.transform(data)
+  lazy val output: DataFrame = ice.transform(data).cache()
 
-  val iceAvg = new ICETransformer()
-  iceAvg.setModel(model)
+  lazy val iceAvg: ICETransformer = new ICETransformer()
+    .setModel(model)
     .setTargetCol("probability")
     .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)), ICECategoricalFeature("col2"),
       ICECategoricalFeature("col3")))
     .setNumericFeatures(Array(ICENumericFeature("col4", Some(5))))
     .setTargetClasses(Array(1))
     .setKind("average")
-  val outputAvg: DataFrame = iceAvg.transform(data)
+  lazy val outputAvg: DataFrame = iceAvg.transform(data).cache()
+
+  // Helper function which returns value from first row in a column specified by "colName".
+  def getFirstValueFromOutput(output: DataFrame, colName: String): Map[_, Vector] = {
+    output.select(colName).collect().map {
+      case Row(map: Map[String, Vector]) => map
+      case Row(map: Map[Int, Vector]) => map
+      case Row(map: Map[Double, Vector]) => map
+    }.head
+  }
 
   test("col2 doesn't contribute to the prediction.") {
-
-    val outputCol2: Map[String, Vector] = outputAvg.select("col2_dependence").collect().map {
-      case Row(map: Map[String, Vector]) =>
-        map
-    }.head
+    val outputCol2: Map[String, Vector] =
+      getFirstValueFromOutput(outputAvg, "col2_dependence").asInstanceOf[Map[String, Vector]]
 
     val impA: Double = outputCol2.get("a").head.toArray.head
     val impB: Double = outputCol2.get("b").head.toArray.head
-
     val eps = 0.01
     assert((impA - impB).abs < eps)
   }
 
   test("col3 contribute to the prediction.") {
 
-    val outputCol3: Map[Int, Vector] = outputAvg.select("col3_dependence").collect().map {
-      case Row(map: Map[Int, Vector]) =>
-        map
-    }.head
+    val outputCol3: Map[Int, Vector] =
+      getFirstValueFromOutput(outputAvg, "col3_dependence").asInstanceOf[Map[Int, Vector]]
 
     val impFirst: Double = outputCol3.get(-5).head.toArray.head
     val impSec: Double = outputCol3.get(5).head.toArray.head
-
     assert((impFirst - impSec).abs > 0.4)
   }
 
   test("The length of explainer map for numeric feature is equal to it's numSplits.") {
 
-    val outputCol1: Map[Double, Vector] = outputAvg.select("col4_dependence").collect().map {
-      case Row(map: Map[Double, Vector]) =>
-        map
-    }.head
+    val outputCol1: Map[Double, Vector] =
+      getFirstValueFromOutput(outputAvg, "col4_dependence").asInstanceOf[Map[Double, Vector]]
 
     assert(outputCol1.size == iceAvg.getNumericFeatures.head.getNumSplits + 1)
   }
 
   test("The length of explainer map for categorical feature is less or equal to it's numTopValues.") {
-    val outputCol: Map[Double, Vector] = output.select("col3_dependence").collect().map {
-      case Row(map: Map[Double, Vector]) =>
-        map
-    }.head
+    val outputCol: Map[Double, Vector] =
+      getFirstValueFromOutput(output, "col3_dependence").asInstanceOf[Map[Double, Vector]]
 
     assert(outputCol.size <= ice.getCategoricalFeatures.last.getNumTopValue)
   }
 
   test("No features specified.") {
-    val ice = new ICETransformer()
+    val ice: ICETransformer = new ICETransformer()
     ice.setModel(model)
       .setTargetCol("probability")
       .setTargetClasses(Array(1))
@@ -108,7 +106,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
   }
 
   test("Duplicate features specified.") {
-    val ice = new ICETransformer()
+    val ice: ICETransformer = new ICETransformer()
     ice.setModel(model)
       .setTargetCol("probability")
       .setCategoricalFeatures(Array(ICECategoricalFeature("col1", Some(100)),
@@ -118,7 +116,7 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
   }
 
   test("When setNumSamples is called, ICE returns correct number of rows.") {
-    val ice = new ICETransformer()
+    val ice: ICETransformer = new ICETransformer()
     ice.setNumSamples(2)
       .setModel(model)
       .setTargetCol("probability")
@@ -129,7 +127,6 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
   }
 
   test("ICECategoricalFeature is successfully created from java.util.Map") {
-    //val map = Map("name" -> "my_name", "numTopValues" -> 100).asJava
     val map = new java.util.HashMap[String, Any]()
     map.put("name", "my_name")
     map.put("numTopValues", 100)
@@ -151,4 +148,4 @@ class ICEExplainerSuite extends TestBase with TransformerFuzzing[ICETransformer]
 
   override def testObjects(): Seq[TestObject[ICETransformer]] = Seq(new TestObject(ice, data))
   override def reader: MLReadable[_] = ICETransformer
-}
\ No newline at end of file
+}

From 61624ded4ee447f0225ce44e2161a31508523e7f Mon Sep 17 00:00:00 2001
From: Elena Zherdeva <ezherdeva@microsoft.com>
Date: Wed, 15 Dec 2021 15:54:54 -0800
Subject: [PATCH 32/32] fix code style

---
 .../azure/synapse/ml/explainers/ICEExplainer.scala    | 11 ++++++++---
 .../azure/synapse/ml/explainers/ICEFeature.scala      |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
index 9f7664f71d..9bf5f2650c 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala
@@ -50,7 +50,6 @@ trait ICEFeatureParams extends Params with HasNumSamples {
     this.setNumericFeatures(features)
   }
 
-
   val kind = new Param[String] (
     this,
     "kind",
@@ -197,6 +196,7 @@ class ICETransformer(override val uid: String) extends Transformer
           } else {
             createSplits(mi, ma)
           }
+
         case _ =>
           createSplits(mi, ma)
       }
@@ -227,6 +227,7 @@ class ICETransformer(override val uid: String) extends Transformer
           createSplits(mi, ma)
       }
     }
+
     values.toArray
   }
 
@@ -239,12 +240,13 @@ class ICETransformer(override val uid: String) extends Transformer
     categoricalFeatures.foreach {
       f =>
         schema(f.name).dataType match {
-          case StringType| BooleanType | ByteType | ShortType | IntegerType | LongType =>
+          case StringType | BooleanType | ByteType | ShortType | IntegerType | LongType =>
           case _ => throw new
               Exception(s"Data type for categorical features" +
                 s" must be ${allowedCategoricalTypes.mkString("[", ",", "]")}.")
         }
     }
+
     val allowedNumericTypes = Array(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
     numericFeatures.foreach {
       f =>
@@ -254,20 +256,23 @@ class ICETransformer(override val uid: String) extends Transformer
               Exception(s"Data type for numeric features must be ${allowedNumericTypes.mkString("[", ",", "]")}.")
         }
     }
+
     // Check if features are specified
     val featureNames = (categoricalFeatures ++ numericFeatures).map(_.getName)
     if (featureNames.isEmpty) {
       throw new Exception("No categorical features or numeric features are set to the explainer. " +
         "Call setCategoricalFeatures or setNumericFeatures to set the features to be explained.")
     }
+
     // Check for duplicate feature specification
     val duplicateFeatureNames = featureNames.groupBy(identity).mapValues(_.length).filter(_._2 > 1).keys.toArray
     if (duplicateFeatureNames.nonEmpty) {
       throw new Exception(s"Duplicate features specified: ${duplicateFeatureNames.mkString(", ")}")
     }
+
     validateSchema(schema)
     schema
   }
 }
 
-object ICETransformer extends ComplexParamsReadable[ICETransformer]
\ No newline at end of file
+object ICETransformer extends ComplexParamsReadable[ICETransformer]
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
index afb32cfd35..7cd1a2ebab 100644
--- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
+++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEFeature.scala
@@ -111,4 +111,4 @@ object ICENumericFeature {
 
     ICENumericFeature(name, numSplits, rangeMin, rangeMax, outputColName)
   }
-}
\ No newline at end of file
+}