lihaofd · lihaofd · Apr 19, 2019 · Apr 19, 2019 · Apr 19, 2019 · Apr 19, 2019
diff --git a/3rdparty/googletest b/3rdparty/googletest
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -236,6 +236,7 @@ List of Contributors
 * [Zhennan Qin](https://github.com/ZhennanQin)
 * [Zhiyuan Huang](https://github.com/huangzhiyuan)
 * [Zak Jost](https://github.com/zjost)
+* [Zach Kimberg](https://github.com/zachgk)
 
 Label Bot
 ---------

diff --git a/LICENSE b/LICENSE
@@ -207,7 +207,7 @@
     The Apache MXNET (incubating) project contains subcomponents with separate copyright
     notices and license terms. Your use of the source code for the these
     subcomponents is subject to the terms and conditions of the following
-    licenses.
+    licenses (full text in Appendix).
 
     =======================================================================================
     Apache-2.0 licenses
@@ -706,3 +706,43 @@
     FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     DEALINGS IN THE SOFTWARE.
+
+
+
+    =======================================================================================
+    Appendix
+    =======================================================================================
+
+    MIT License
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   3-clause BSD
+
+    =======================================================================================
+
+    3-Clause BSD License
+
+    Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+    3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    =======================================================================================
+
+    2-Clause BSD License
+
+    Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/python/mxnet/model.py b/python/mxnet/model.py
@@ -884,6 +884,8 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc',
                                    rescale_grad=(1.0/batch_size),
                                    **(self.kwargs))
         elif isinstance(self.optimizer, opt.Optimizer):
+            if not optimizer.idx2name:
+                optimizer.idx2name = param_idx2name.copy()
             optimizer = self.optimizer
 
         # do training

diff --git a/python/mxnet/module/module.py b/python/mxnet/module/module.py
@@ -505,14 +505,14 @@ def init_optimizer(self, kvstore='local', optimizer='sgd',
             batch_size *= kvstore.num_workers
         rescale_grad = 1.0/batch_size
 
+        idx2name = {}
+        if update_on_kvstore:
+            idx2name.update(enumerate(self._exec_group.param_names))
+        else:
+            for k in range(len(self._context)):
+                idx2name.update({i*len(self._context)+k: n
+                                 for i, n in enumerate(self._exec_group.param_names)})
         if isinstance(optimizer, str):
-            idx2name = {}
-            if update_on_kvstore:
-                idx2name.update(enumerate(self._exec_group.param_names))
-            else:
-                for k in range(len(self._context)):
-                    idx2name.update({i*len(self._context)+k: n
-                                     for i, n in enumerate(self._exec_group.param_names)})
             optimizer_params = dict(optimizer_params)
             if 'rescale_grad' not in optimizer_params:
                 optimizer_params['rescale_grad'] = rescale_grad
@@ -528,6 +528,8 @@ def init_optimizer(self, kvstore='local', optimizer='sgd',
                     "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%(
                         optimizer.rescale_grad, rescale_grad) +
                     "Is this intended?", stacklevel=2)
+            if not optimizer.idx2name:
+                optimizer.idx2name = idx2name.copy()
 
         self._optimizer = optimizer
         self._kvstore = kvstore

diff --git a/scala-package/assembly/src/main/assembly/assembly.xml b/scala-package/assembly/src/main/assembly/assembly.xml
@@ -66,10 +66,11 @@
       <outputDirectory>lib/native</outputDirectory>
     </fileSet>
     <fileSet>
-      <directory>${MXNET_DIR}/3rdparty</directory>
+      <directory>${MXNET_DIR}/licenses</directory>
       <includes>
-        <include>cub/LICENSE.TXT</include>
-        <include>mkldnn/external/mklml_mac_2019.0.1.20180928/license.txt</include>
+        <include>LICENSE.binary.dependencies</include>
+        <include>NOTICE</include>
+        <include>LICENSE</include>
       </includes>
       <outputDirectory>.</outputDirectory>
     </fileSet>

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/ExecutorManager.scala b/scala-package/core/src/main/scala/org/apache/mxnet/ExecutorManager.scala
@@ -395,17 +395,17 @@ private[mxnet] object ExecutorManager {
  * @param paramNames Names of all trainable parameters.
  * @param ctx List of devices for training (data parallel)
  * @param slices Describes how the data parallel splits data into different devices.
- * @param providedData training data shapes
- * @param providedLabel training label shapes
+ * @param providedDataDesc training data descriptions
+ * @param providedLabelDesc training label descriptions
  * @param sharedGroup: DataParallelExecutorGroup
  *                   An existing executor group, if to share parameters with it.
  *
  */
 private class DataParallelExecutorGroup private(sym: Symbol,
                                 argNames: IndexedSeq[String], paramNames: Set[String],
                                 ctx: Array[Context], private val slices: Array[(Int, Int)],
-                                providedData: Map[String, Shape],
-                                providedLabel: Map[String, Shape],
+                                providedDataDesc: IndexedSeq[DataDesc],
+                                providedLabelDesc: IndexedSeq[DataDesc],
                                 sharedGroup: DataParallelExecutorGroup)  {
   // make sure the architecture is valid
   ExecutorManager.checkArguments(sym)
@@ -417,8 +417,8 @@ private class DataParallelExecutorGroup private(sym: Symbol,
       sharedGroup.sharedDataArrays
     }
 
-  private[mxnet] val dataNames = providedData.map { case (k, _) => k }.toList
-  private[mxnet] val labelNames = providedLabel.map { case (k, _) => k }.toList
+  private[mxnet] val dataNames = providedDataDesc.map(_.name).toList
+  private[mxnet] val labelNames = providedLabelDesc.map(_.name).toList
   private[mxnet] val auxNames = sym.listAuxiliaryStates()
   private[mxnet] val paramIdx = argNames.zipWithIndex
     .filter { case (name, i) => paramNames.contains(name) }
@@ -428,9 +428,10 @@ private class DataParallelExecutorGroup private(sym: Symbol,
   private[mxnet] val trainExecs: Array[Executor] =
     ctx.zipWithIndex.map { case (ctxi, i) =>
       val dataShapes =
-        (providedData ++ providedLabel) map { case (name, shape) =>
-          name -> (Shape(slices(i)._2 - slices(i)._1) ++ shape.slice(1, shape.length))
-        }
+        (providedDataDesc ++ providedLabelDesc).map( desc => {
+          desc.name ->
+            (Shape(slices(i)._2 - slices(i)._1) ++ desc.shape.slice(1, desc.shape.length))
+        }).toMap
       val sharedExec: Executor = if (sharedGroup == null) null else sharedGroup.trainExecs(i)
       ExecutorManager.bindExec(sym, ctxi, dataShapes, paramNamesComb,
         needGrad = true, baseExec = sharedExec,
@@ -479,15 +480,15 @@ private class DataParallelExecutorGroup private(sym: Symbol,
       trainData: DataIter,
       sharedGroup: DataParallelExecutorGroup) {
     this(sym, argNames, paramNames, ctx, slices,
-      trainData.provideData, trainData.provideLabel, sharedGroup)
+      trainData.provideDataDesc, trainData.provideLabelDesc, sharedGroup)
   }
 
   def this(sym: Symbol,
            argNames: IndexedSeq[String], paramNames: Set[String],
            ctx: Array[Context], slices: Array[(Int, Int)],
            trainData: DataIter) {
     this(sym, argNames, paramNames, ctx, slices,
-      trainData.provideData, trainData.provideLabel, null)
+      trainData.provideDataDesc, trainData.provideLabelDesc, null)
   }
 
   /**
@@ -509,15 +510,15 @@ private class DataParallelExecutorGroup private(sym: Symbol,
       trainData: DataBatch,
       sharedGroup: DataParallelExecutorGroup) {
     this(sym, argNames, paramNames, ctx, slices,
-      trainData.provideData, trainData.provideLabel, sharedGroup)
+      trainData.provideDataDesc, trainData.provideLabelDesc, sharedGroup)
   }
 
   def this(sym: Symbol,
            argNames: IndexedSeq[String], paramNames: Set[String],
            ctx: Array[Context], slices: Array[(Int, Int)],
            trainData: DataBatch) {
     this(sym, argNames, paramNames, ctx, slices,
-      trainData.provideData, trainData.provideLabel, null)
+      trainData.provideDataDesc, trainData.provideLabelDesc, null)
   }
 
   // load data and labels into arrays

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/FeedForward.scala b/scala-package/core/src/main/scala/org/apache/mxnet/FeedForward.scala
@@ -129,11 +129,11 @@ class FeedForward private(
   // Initialize weight parameters and auxiliary states
   // The NDArrays associated with the _argParms and _auxParams are not disposed instead
   // they are passed a outer scope if available.
-  private def initParams(inputShapes: Map[String, Shape], overwrite: Boolean = false)
+  private def initParams(inputShapes: IndexedSeq[DataDesc], overwrite: Boolean = false)
   : (IndexedSeq[String], IndexedSeq[String], IndexedSeq[String]) = {
     val (argShapes, _, auxShapes) = symbol.inferShape(inputShapes)
     val argNames = symbol.listArguments()
-    val inputNames = inputShapes.keys.toSet
+    val inputNames = inputShapes.map(_.name).toSet
     val paramNames = argNames.filter(!inputNames.contains(_))
     val auxNames = symbol.listAuxiliaryStates()
 
@@ -179,7 +179,7 @@ class FeedForward private(
   }
 
   // Initialize the predictor module for running prediction.
-  private def initPredictor(inputShapes: Map[String, Shape]): Unit = {
+  private def initPredictor(inputShapes: IndexedSeq[DataDesc]): Unit = {
     var shouldInit = true
     if (this.predExec != null) {
       val (argShapes, _, _) = symbol.inferShape(inputShapes)
@@ -193,7 +193,7 @@ class FeedForward private(
     }
     if(shouldInit) {
       // for now only use the first device
-      val predExec = symbol.simpleBind(ctx(0), gradReq = "null", shapeDict = inputShapes)
+      val predExec = symbol.simpleBind(ctx(0), gradReq = "null", inputShapes)
       predExec.copyParamsFrom(_argParams, _auxParams)
       ExecutorManager.checkArguments(symbol)
       this.predExec = predExec
@@ -233,8 +233,8 @@ class FeedForward private(
    */
   def predict(data: DataIter, numBatch: Int = -1): Array[NDArray] = {
     data.reset()
-    val dataShapes = data.provideData
-    val dataNames = dataShapes.map(_._1).toArray
+    val dataShapes = data.provideDataDesc
+    val dataNames = dataShapes.map(_.name).toArray
     initPredictor(dataShapes)
     val batchSize = data.batchSize
     val dataArrays = dataNames.map(predExec.argDict(_))
@@ -363,7 +363,7 @@ class FeedForward private(
       this.symbol = symGen.generate(trainData.defaultBucketKey)
       checkArguments()
     }
-    initParams(trainData.provideData ++ trainData.provideLabel)
+    initParams(trainData.provideDataDesc ++ trainData.provideLabelDesc)
   }
 
   private def fit(trainData: DataIter, evalData: DataIter, evalMetric: EvalMetric = new Accuracy(),

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/IO.scala b/scala-package/core/src/main/scala/org/apache/mxnet/IO.scala
@@ -141,28 +141,46 @@ class DataBatch(val data: IndexedSeq[NDArray],
                 val pad: Int,
                 // the key for the bucket that should be used for this batch,
                 // for bucketing io only
-                val bucketKey: AnyRef,
+                val bucketKey: AnyRef = null,
                 // use DataDesc to indicate the order of data/label loading
                 // (must match the order of input data/label)
-                private val providedDataDesc: IndexedSeq[DataDesc],
-                private val providedLabelDesc: IndexedSeq[DataDesc]) {
+                private val providedDataDesc: IndexedSeq[DataDesc] = null,
+                private val providedLabelDesc: IndexedSeq[DataDesc] = null) {
   // TODO: change the data/label type into IndexedSeq[(NDArray, DataDesc)]
   // However, since the data and label can be accessed publicly (no getter and setter)
   // the change on this will break BC
+
+  @deprecated("Use provideDataDesc and provideDataLabel instead", "1.3.0")
+  def this(data: IndexedSeq[NDArray],
+           label: IndexedSeq[NDArray],
+           index: IndexedSeq[Long],
+           pad: Int,
+           // the key for the bucket that should be used for this batch,
+           // for bucketing io only
+           bucketKey: AnyRef,
+           // use ListMap to indicate the order of data/label loading
+           // (must match the order of input data/label)
+           providedData: ListMap[String, Shape]) {
+    this(data, label, index, pad, bucketKey,
+      DataDesc.ListMap2Descs(providedData))
+  }
+
+  @deprecated("Use provideDataDesc and provideDataLabel instead", "1.3.0")
   def this(data: IndexedSeq[NDArray],
             label: IndexedSeq[NDArray],
             index: IndexedSeq[Long],
             pad: Int,
             // the key for the bucket that should be used for this batch,
             // for bucketing io only
-            bucketKey: AnyRef = null,
+            bucketKey: AnyRef,
             // use ListMap to indicate the order of data/label loading
             // (must match the order of input data/label)
-            providedData: ListMap[String, Shape] = null,
-            providedLabel: ListMap[String, Shape] = null) {
+            providedData: ListMap[String, Shape],
+            providedLabel: ListMap[String, Shape]) {
     this(data, label, index, pad, bucketKey,
       DataDesc.ListMap2Descs(providedData), DataDesc.ListMap2Descs(providedLabel))
   }
+
   /**
    * Dispose its data and labels
    * The object shall never be used after it is disposed.
@@ -177,6 +195,7 @@ class DataBatch(val data: IndexedSeq[NDArray],
   }
 
   // The name and shape of data
+  @deprecated("Use provideDataDesc instead", "1.3.0")
   def provideData: ListMap[String, Shape] = {
     var temp = ListMap[String, Shape]()
     if (providedDataDesc == null) null
@@ -187,6 +206,7 @@ class DataBatch(val data: IndexedSeq[NDArray],
   }
 
   // The name and shape of label
+  @deprecated("Use provideLabelDesc instead", "1.3.0")
   def provideLabel: ListMap[String, Shape] = {
     var temp = ListMap[String, Shape]()
     if (providedLabelDesc == null) null
@@ -311,8 +331,7 @@ abstract class DataIter extends Iterator[DataBatch] {
    */
   @throws(classOf[NoSuchElementException])
   def next(): DataBatch = {
-    new DataBatch(getData(), getLabel(), getIndex(), getPad(),
-      null, null, null)
+    new DataBatch(getData(), getLabel(), getIndex(), getPad())
   }
 
   /**

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Image.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Image.scala
@@ -38,6 +38,7 @@ object Image {
     * @param flag   Convert decoded image to grayscale (0) or color (1).
     * @param to_rgb Whether to convert decoded image
     *               to mxnet's default RGB format (instead of opencv's default BGR).
+    * @param out    NDArray to store the output
     * @return NDArray in HWC format with DType [[DType.UInt8]]
     */
   def imDecode(buf: Array[Byte], flag: Int,
@@ -57,6 +58,10 @@ object Image {
   /**
     * Same imageDecode with InputStream
     * @param inputStream the inputStream of the image
+    * @param flag   Convert decoded image to grayscale (0) or color (1).
+    * @param to_rgb Whether to convert decoded image
+    *               to mxnet's default RGB format (instead of opencv's default BGR).
+    * @param out    NDArray to store the output
     * @return NDArray in HWC format with DType [[DType.UInt8]]
     */
   def imDecode(inputStream: InputStream, flag: Int = 1,
@@ -79,6 +84,7 @@ object Image {
     * @param flag     Convert decoded image to grayscale (0) or color (1).
     * @param to_rgb   Whether to convert decoded image to mxnet's default RGB format
     *                 (instead of opencv's default BGR).
+    * @param out    NDArray to store the output
     * @return org.apache.mxnet.NDArray in HWC format with DType [[DType.UInt8]]
     */
   def imRead(filename: String, flag: Option[Int] = None,
@@ -99,6 +105,7 @@ object Image {
     * @param w       Width of resized image.
     * @param h       Height of resized image.
     * @param interp  Interpolation method (default=cv2.INTER_LINEAR).
+    * @param out    NDArray to store the output
     * @return org.apache.mxnet.NDArray
     */
   def imResize(src: org.apache.mxnet.NDArray, w: Int, h: Int,
@@ -124,6 +131,7 @@ object Image {
     * @param typeOf Filling type (default=cv2.BORDER_CONSTANT).
     * @param value  (Deprecated! Use ``values`` instead.) Fill with single value.
     * @param values Fill with value(RGB[A] or gray), up to 4 channels.
+    * @param out    NDArray to store the output
     * @return org.apache.mxnet.NDArray
     */
   def copyMakeBorder(src: org.apache.mxnet.NDArray, top: Int, bot: Int,

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/MX_PRIMITIVES.scala b/scala-package/core/src/main/scala/org/apache/mxnet/MX_PRIMITIVES.scala
@@ -17,6 +17,8 @@
 
 package org.apache.mxnet
 
+import scala.language.implicitConversions
+
 object MX_PRIMITIVES {
 
   /**