Adding copy constructor in convolution layer (#3067)

* Adding copy constructor in convolution layer * Adding space in the end * Adding Spaces
mlpack · Oct 5, 2021 · 5d73046 · 5d73046
1 parent 1ed5987
commit 5d73046
Show file tree

Hide file tree

Showing 3 changed files with 307 additions and 0 deletions.
diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp
@@ -140,6 +140,18 @@ class Convolution
               const size_t inputHeight = 0,
               const std::string& paddingType = "None");
 
+  //! Copy constructor.
+  Convolution(const Convolution& layer);
+
+  //! Move constructor.
+  Convolution(Convolution&&);
+
+  //! Copy assignment operator.
+  Convolution& operator=(const Convolution& layer);
+
+  //! Move assignment operator.
+  Convolution& operator=(Convolution&& layer);
+
   /*
    * Set the weight and bias term.
    */

diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp
@@ -137,6 +137,167 @@ Convolution<
   padding = ann::Padding<>(padWLeft, padWRight, padHTop, padHBottom);
 }
 
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Convolution(
+    const Convolution& layer) :
+    inSize(layer.inSize),
+    outSize(layer.outSize),
+    kernelWidth(layer.kernelWidth),
+    kernelHeight(layer.kernelHeight),
+    strideWidth(layer.strideWidth),
+    strideHeight(layer.strideHeight),
+    padWLeft(layer.padWLeft),
+    padWRight(layer.padWRight),
+    padHBottom(layer.padHBottom),
+    padHTop(layer.padHTop),
+    inputWidth(layer.inputWidth),
+    inputHeight(layer.inputHeight),
+    outputWidth(layer.outputWidth),
+    outputHeight(layer.outputHeight),
+    padding(layer.padding),
+    weights(layer.weights)
+{
+  // Nothing to do here.
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::Convolution(
+    Convolution&& layer) :
+    inSize(0),
+    outSize(0),
+    kernelWidth(layer.kernelWidth),
+    kernelHeight(layer.kernelHeight),
+    strideWidth(layer.strideWidth),
+    strideHeight(layer.strideHeight),
+    padWLeft(layer.padWLeft),
+    padWRight(layer.padWRight),
+    padHBottom(layer.padHBottom),
+    padHTop(layer.padHTop),
+    inputWidth(layer.inputWidth),
+    inputHeight(layer.inputHeight),
+    outputWidth(layer.outputWidth),
+    outputHeight(layer.outputHeight),
+    padding(std::move(layer.padding)),
+    weights(std::move(layer.weights))
+{
+  // Nothing to do here.
+}
+
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>&
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::
+operator=(const Convolution& layer)
+{
+  if (this != &layer)
+  {
+    inSize = layer.inSize;
+    outSize = layer.outSize;
+    kernelWidth = layer.kernelWidth;
+    kernelHeight = layer.kernelHeight;
+    strideWidth = layer.strideWidth;
+    strideHeight = layer.strideHeight;
+    padWLeft = layer.padWLeft;
+    padWRight = layer.padWRight;
+    padHBottom = layer.padHBottom;
+    padHTop = layer.padHTop;
+    inputWidth = layer.inputWidth;
+    inputHeight = layer.inputHeight;
+    outputWidth = layer.outputWidth;
+    outputHeight = layer.outputHeight;
+    padding = layer.padding;
+    weights = layer.weights;
+  }
+
+  return *this;
+}
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>&
+Convolution<
+    ForwardConvolutionRule,
+    BackwardConvolutionRule,
+    GradientConvolutionRule,
+    InputDataType,
+    OutputDataType
+>::
+operator=(Convolution&& layer)
+{
+  if (this != &layer)
+  {
+    inSize = layer.inSize;
+    outSize = layer.outSize;
+    kernelWidth = layer.kernelWidth;
+    kernelHeight = layer.kernelHeight;
+    strideWidth = layer.strideWidth;
+    strideHeight = layer.strideHeight;
+    padWLeft = layer.padWLeft;
+    padWRight = layer.padWRight;
+    padHBottom = layer.padHBottom;
+    padHTop = layer.padHTop;
+    inputWidth = layer.inputWidth;
+    inputHeight = layer.inputHeight;
+    outputWidth = layer.outputWidth;
+    outputHeight = layer.outputHeight;
+    padding = std::move(layer.padding);
+    weights = std::move(layer.weights);
+  }
+
+  return *this;
+}
+
 template<
     typename ForwardConvolutionRule,
     typename BackwardConvolutionRule,

diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
@@ -25,6 +25,51 @@
 using namespace mlpack;
 using namespace mlpack::ann;
 
+// network1 should be allocated with `new`, and trained on some data.
+template<typename MatType = arma::mat, typename ModelType>
+void CheckCopyFunction(ModelType* network1,
+                       MatType& trainData,
+                       MatType& trainLabels,
+                       const size_t maxEpochs)
+{
+  ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
+  network1->Train(trainData, trainLabels, opt);
+
+  arma::mat predictions1;
+  network1->Predict(trainData, predictions1);
+  FFN<> network2;
+  network2 = *network1;
+  delete network1;
+
+  // Deallocating all of network1's memory, so that network2 does not use any
+  // of that memory.
+  arma::mat predictions2;
+  network2.Predict(trainData, predictions2);
+  CheckMatrices(predictions1, predictions2);
+}
+
+// network1 should be allocated with `new`, and trained on some data.
+template<typename MatType = arma::mat, typename ModelType>
+void CheckMoveFunction(ModelType* network1,
+                       MatType& trainData,
+                       MatType& trainLabels,
+                       const size_t maxEpochs)
+{
+  ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
+  network1->Train(trainData, trainLabels, opt);
+
+  arma::mat predictions1;
+  network1->Predict(trainData, predictions1);
+  FFN<> network2(std::move(*network1));
+  delete network1;
+
+  // Deallocating all of network1's memory, so that network2 does not use any
+  // of that memory.
+  arma::mat predictions2;
+  network2.Predict(trainData, predictions2);
+  CheckMatrices(predictions1, predictions2);
+}
+
 /**
  * Train the vanilla network on a larger dataset.
  */
@@ -124,3 +169,92 @@ TEST_CASE("VanillaNetworkTest", "[ConvolutionalNetworkTest]")
 
   REQUIRE(success == true);
 }
+
+/**
+ * Train the vanilla network on a larger dataset.
+ */
+TEST_CASE("CheckCopyVanillaNetworkTest", "[ConvolutionalNetworkTest]")
+{
+  arma::mat X;
+  X.load("mnist_first250_training_4s_and_9s.arm");
+
+  // Normalize each point since these are images.
+  arma::uword nPoints = X.n_cols;
+  for (arma::uword i = 0; i < nPoints; ++i)
+  {
+    X.col(i) /= norm(X.col(i), 2);
+  }
+
+  // Build the target matrix.
+  arma::mat Y = arma::zeros<arma::mat>(1, nPoints);
+  for (size_t i = 0; i < nPoints; ++i)
+  {
+    if (i < nPoints / 2)
+    {
+      // Assign label "0" to all samples with digit = 4
+      Y(i) = 0;
+    }
+    else
+    {
+      // Assign label "1" to all samples with digit = 9
+      Y(i) = 1;
+    }
+  }
+
+  /*
+   * Construct a convolutional neural network with a 28x28x1 input layer,
+   * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer
+   * and a 4x4x12 pooling layer which is fully connected with the output layer.
+   * The network structure looks like:
+   *
+   * Input    Convolution  Pooling      Convolution  Pooling      Output
+   * Layer    Layer        Layer        Layer        Layer        Layer
+   *
+   *          +---+        +---+        +---+        +---+
+   *          | +---+      | +---+      | +---+      | +---+
+   * +---+    | | +---+    | | +---+    | | +---+    | | +---+    +---+
+   * |   |    | | |   |    | | |   |    | | |   |    | | |   |    |   |
+   * |   +--> +-+ |   +--> +-+ |   +--> +-+ |   +--> +-+ |   +--> |   |
+   * |   |      +-+   |      +-+   |      +-+   |      +-+   |    |   |
+   * +---+        +---+        +---+        +---+        +---+    +---+
+   */
+  // It isn't guaranteed that the network will converge in the specified number
+  // of iterations using random weights. If this works 1 of 5 times, I'm fine
+  // with that. All I want to know is that the network is able to escape from
+  // local minima and to solve the task.
+  FFN<NegativeLogLikelihood<>, RandomInitialization> *model = new FFN<NegativeLogLikelihood<>, RandomInitialization>;
+
+  model->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
+  model->Add<ReLULayer<> >();
+  model->Add<MaxPooling<> >(8, 8, 2, 2);
+  model->Add<Convolution<> >(8, 12, 2, 2);
+  model->Add<ReLULayer<> >();
+  model->Add<MaxPooling<> >(2, 2, 2, 2);
+  model->Add<Linear<> >(192, 20);
+  model->Add<ReLULayer<> >();
+  model->Add<Linear<> >(20, 10);
+  model->Add<ReLULayer<> >();
+  model->Add<Linear<> >(10, 2);
+  model->Add<LogSoftMax<> >();
+
+  FFN<NegativeLogLikelihood<>, RandomInitialization> *model1 = new FFN<NegativeLogLikelihood<>, RandomInitialization>;
+
+  model1->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
+  model1->Add<ReLULayer<> >();
+  model1->Add<MaxPooling<> >(8, 8, 2, 2);
+  model1->Add<Convolution<> >(8, 12, 2, 2);
+  model1->Add<ReLULayer<> >();
+  model1->Add<MaxPooling<> >(2, 2, 2, 2);
+  model1->Add<Linear<> >(192, 20);
+  model1->Add<ReLULayer<> >();
+  model1->Add<Linear<> >(20, 10);
+  model1->Add<ReLULayer<> >();
+  model1->Add<Linear<> >(10, 2);
+  model1->Add<LogSoftMax<> >();
+
+  // Check whether copy constructor is working or not.
+  CheckCopyFunction<>(model, X, Y, 8);
+
+  // Check whether move constructor is working or not.
+  CheckMoveFunction<>(model1, X, Y, 8);
+}