salesforce · tovbinm · Apr 9, 2019 · Apr 8, 2019 · Apr 8, 2019 · Apr 9, 2019
@@ -465,7 +465,7 @@ class OpWorkflow(val uid: String = UID[OpWorkflow]) extends OpWorkflowCore {
    * @param path to the trained workflow model
    * @return workflow model
    */
-  def loadModel(path: String): OpWorkflowModel = new OpWorkflowModelReader(this).load(path)
+  def loadModel(path: String): OpWorkflowModel = new OpWorkflowModelReader(Some(this)).load(path)
 
   /**
    * Returns a dataframe containing all the columns generated up to and including the feature input

@@ -428,4 +428,12 @@ case object OpWorkflowModel {
   val PersistEveryKStages = 5
   val PersistScores = true
 
+  /**
+   * Load a previously trained workflow model from path
+   *
+   * @param path to the trained workflow model
+   * @return workflow model
+   */
+  def load(path: String): OpWorkflowModel = new OpWorkflowModelReader(None).load(path)
+
 }
@@ -47,9 +47,10 @@ import scala.util.{Failure, Success, Try}
  * This will only work if the features were serialized in topological order.
  * NOTE: The FeatureGeneratorStages will not be recovered into the Model object, because they are part of each feature.
  *
- * @param workflow the workflow that produced the trained model
+ * @param workflowOpt optional workflow that produced the trained model
  */
-class OpWorkflowModelReader(val workflow: OpWorkflow) extends MLReader[OpWorkflowModel] {
+class OpWorkflowModelReader(val workflowOpt: Option[OpWorkflow]) extends MLReader[OpWorkflowModel] {
+
 
   /**
    * Load a previously trained workflow model from path
@@ -81,24 +82,27 @@ class OpWorkflowModelReader(val workflow: OpWorkflow) extends MLReader[OpWorkflo
    * @param path to the trained workflow model
    * @return workflow model instance
    */
-  def loadJson(json: JValue, path: String): Try[OpWorkflowModel] = {
-
-    for {
-      trainParams <- OpParams.fromString((json \ TrainParameters.entryName).extract[String])
-      params <- OpParams.fromString((json \ Parameters.entryName).extract[String])
-      model <- Try(new OpWorkflowModel(uid = (json \ Uid.entryName).extract[String], trainParams))
-      (stages, resultFeatures) <- Try(resolveFeaturesAndStages(json, path))
-      blacklist <- Try(resolveBlacklist(json))
-      results <- resolveRawFeatureFilterResults(json)
-    } yield model
-      .setStages(stages.filterNot(_.isInstanceOf[FeatureGeneratorStage[_, _]]))
-      .setFeatures(resultFeatures)
-      .setParameters(params)
-      .setBlacklist(blacklist)
-      .setRawFeatureFilterResults(results)
+  def loadJson(json: JValue, path: String): Try[OpWorkflowModel] = workflowOpt match {
+    case None =>
+      throw new NotImplementedError("Loading models without the original workflow is currently not supported")
+
+    case Some(workflow) =>
+      for {
+        trainParams <- OpParams.fromString((json \ TrainParameters.entryName).extract[String])
+        params <- OpParams.fromString((json \ Parameters.entryName).extract[String])
+        model <- Try(new OpWorkflowModel(uid = (json \ Uid.entryName).extract[String], trainParams))
+        (stages, resultFeatures) <- Try(resolveFeaturesAndStages(workflow, json, path))
+        blacklist <- Try(resolveBlacklist(workflow, json))
+        results <- resolveRawFeatureFilterResults(json)
+      } yield model
+        .setStages(stages.filterNot(_.isInstanceOf[FeatureGeneratorStage[_, _]]))
+        .setFeatures(resultFeatures)
+        .setParameters(params)
+        .setBlacklist(blacklist)
+        .setRawFeatureFilterResults(results)
   }
 
-  private def resolveBlacklist(json: JValue): Array[OPFeature] = {
+  private def resolveBlacklist(workflow: OpWorkflow, json: JValue): Array[OPFeature] = {
     if ((json \ BlacklistedFeaturesUids.entryName) != JNothing) { // for backwards compatibility
       val blacklistIds = (json \ BlacklistedFeaturesUids.entryName).extract[JArray].arr
       val allFeatures = workflow.rawFeatures ++ workflow.blacklistedFeatures ++
@@ -110,8 +114,13 @@ class OpWorkflowModelReader(val workflow: OpWorkflow) extends MLReader[OpWorkflo
     }
   }
 
-  private def resolveFeaturesAndStages(json: JValue, path: String): (Array[OPStage], Array[OPFeature]) = {
-    val stages = loadStages(json, path)
+  private def resolveFeaturesAndStages
+  (
+    workflow: OpWorkflow,
+    json: JValue,
+    path: String
+  ): (Array[OPStage], Array[OPFeature]) = {
+    val stages = loadStages(workflow, json, path)
     val stagesMap = stages.map(stage => stage.uid -> stage).toMap[String, OPStage]
     val featuresMap = resolveFeatures(json, stagesMap)
     resolveStages(stages, featuresMap)
@@ -122,7 +131,7 @@ class OpWorkflowModelReader(val workflow: OpWorkflow) extends MLReader[OpWorkflo
     stages.toArray -> resultFeatures.toArray
   }
 
-  private def loadStages(json: JValue, path: String): Seq[OPStage] = {
+  private def loadStages(workflow: OpWorkflow, json: JValue, path: String): Seq[OPStage] = {
     val stagesJs = (json \ Stages.entryName).extract[JArray].arr
     val recoveredStages = stagesJs.map(j => {
       val stageUid = (j \ FieldNames.Uid.entryName).extract[String]

@@ -45,7 +45,8 @@ import scala.reflect.runtime.universe.TypeTag
  * @tparam I1 first input feature type
  * @tparam I2 second input feature type
  */
-class SubstringTransformer[I1 <: Text, I2 <: Text](
+class SubstringTransformer[I1 <: Text, I2 <: Text]
+(
   uid: String = UID[SubstringTransformer[_, _]]
 )(
   implicit override val tti1: TypeTag[I1],

@@ -209,7 +209,7 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest {
   it should "find the sanity checker metadata even if the model has been serialized" in {
     val path = tempDir.toString + "/model-insights-test-" + System.currentTimeMillis()
     val json = OpWorkflowModelWriter.toJson(workflowModel, path)
-    val loadedModel = new OpWorkflowModelReader(workflow).loadJson(json, path)
+    val loadedModel = new OpWorkflowModelReader(Some(workflow)).loadJson(json, path)
     val insights = loadedModel.get.modelInsights(checked)
     val ageInsights = insights.features.filter(_.featureName == age.name).head
     val genderInsights = insights.features.filter(_.featureName == genderPL.name).head

@@ -291,6 +291,13 @@ class OpWorkflowModelReaderWriterTest
     wfM.getBlacklist().isEmpty shouldBe true
   }
 
+  it should "error on loading a model without workflow" in {
+    val error = intercept[RuntimeException](OpWorkflowModel.load(saveFlowPathStable))
+    error.getMessage should startWith("Failed to load Workflow from path")
+    error.getCause.isInstanceOf[NotImplementedError] shouldBe true
+    error.getCause.getMessage shouldBe "Loading models without the original workflow is currently not supported"
+  }
+
   def compareFeatures(f1: Array[OPFeature], f2: Array[OPFeature]): Unit = {
     f1.length shouldBe f2.length
     f1.sortBy(_.uid) should contain theSameElementsAs f2.sortBy(_.uid)