fix: [Workaround] CNTKModel does not output correct result (#1076)

* fix: [Workaround] CNTKModel does not output correct result for ResNet50 model when running on Databricks * Add code comment to reference issue 1075 * Only cache for non-streaming df
microsoft · Jun 9, 2021 · 0632f1b · 0632f1b
1 parent 36ee274
commit 0632f1b
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala b/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala
@@ -532,7 +532,10 @@ class CNTKModel(override val uid: String) extends Model[CNTKModel] with ComplexP
       val droppedDF = outputDF.drop(outputDF.columns.filter(_.startsWith(coercionPrefix)): _*)
 
       val unbatchedDF = if (getBatchInput) {
-        new FlattenBatch().transform(droppedDF)
+        // TODO: The cache call is a workaround for issue 1075:
+        //  https://github.com/Azure/mmlspark/issues/1075
+        val cacheAttempted = if (droppedDF.isStreaming) droppedDF else droppedDF.cache()
+        new FlattenBatch().transform(cacheAttempted)
       } else {
         droppedDF
       }