Skip to content

Commit

Permalink
Adding copy constructor in convolution layer (#3067)
Browse files Browse the repository at this point in the history
* Adding copy constructor in convolution layer

* Adding space in the end

* Adding Spaces
  • Loading branch information
geekypathak21 committed Oct 5, 2021
1 parent 1ed5987 commit 5d73046
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/mlpack/methods/ann/layer/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ class Convolution
const size_t inputHeight = 0,
const std::string& paddingType = "None");

//! Copy constructor.
Convolution(const Convolution& layer);

//! Move constructor.
Convolution(Convolution&&);

//! Copy assignment operator.
Convolution& operator=(const Convolution& layer);

//! Move assignment operator.
Convolution& operator=(Convolution&& layer);

/*
* Set the weight and bias term.
*/
Expand Down
161 changes: 161 additions & 0 deletions src/mlpack/methods/ann/layer/convolution_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,167 @@ Convolution<
padding = ann::Padding<>(padWLeft, padWRight, padHTop, padHBottom);
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::Convolution(
const Convolution& layer) :
inSize(layer.inSize),
outSize(layer.outSize),
kernelWidth(layer.kernelWidth),
kernelHeight(layer.kernelHeight),
strideWidth(layer.strideWidth),
strideHeight(layer.strideHeight),
padWLeft(layer.padWLeft),
padWRight(layer.padWRight),
padHBottom(layer.padHBottom),
padHTop(layer.padHTop),
inputWidth(layer.inputWidth),
inputHeight(layer.inputHeight),
outputWidth(layer.outputWidth),
outputHeight(layer.outputHeight),
padding(layer.padding),
weights(layer.weights)
{
// Nothing to do here.
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::Convolution(
Convolution&& layer) :
inSize(0),
outSize(0),
kernelWidth(layer.kernelWidth),
kernelHeight(layer.kernelHeight),
strideWidth(layer.strideWidth),
strideHeight(layer.strideHeight),
padWLeft(layer.padWLeft),
padWRight(layer.padWRight),
padHBottom(layer.padHBottom),
padHTop(layer.padHTop),
inputWidth(layer.inputWidth),
inputHeight(layer.inputHeight),
outputWidth(layer.outputWidth),
outputHeight(layer.outputHeight),
padding(std::move(layer.padding)),
weights(std::move(layer.weights))
{
// Nothing to do here.
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>&
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::
operator=(const Convolution& layer)
{
if (this != &layer)
{
inSize = layer.inSize;
outSize = layer.outSize;
kernelWidth = layer.kernelWidth;
kernelHeight = layer.kernelHeight;
strideWidth = layer.strideWidth;
strideHeight = layer.strideHeight;
padWLeft = layer.padWLeft;
padWRight = layer.padWRight;
padHBottom = layer.padHBottom;
padHTop = layer.padHTop;
inputWidth = layer.inputWidth;
inputHeight = layer.inputHeight;
outputWidth = layer.outputWidth;
outputHeight = layer.outputHeight;
padding = layer.padding;
weights = layer.weights;
}

return *this;
}
template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>&
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::
operator=(Convolution&& layer)
{
if (this != &layer)
{
inSize = layer.inSize;
outSize = layer.outSize;
kernelWidth = layer.kernelWidth;
kernelHeight = layer.kernelHeight;
strideWidth = layer.strideWidth;
strideHeight = layer.strideHeight;
padWLeft = layer.padWLeft;
padWRight = layer.padWRight;
padHBottom = layer.padHBottom;
padHTop = layer.padHTop;
inputWidth = layer.inputWidth;
inputHeight = layer.inputHeight;
outputWidth = layer.outputWidth;
outputHeight = layer.outputHeight;
padding = std::move(layer.padding);
weights = std::move(layer.weights);
}

return *this;
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
Expand Down
134 changes: 134 additions & 0 deletions src/mlpack/tests/convolutional_network_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,51 @@
using namespace mlpack;
using namespace mlpack::ann;

// network1 should be allocated with `new`, and trained on some data.
template<typename MatType = arma::mat, typename ModelType>
void CheckCopyFunction(ModelType* network1,
MatType& trainData,
MatType& trainLabels,
const size_t maxEpochs)
{
ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
network1->Train(trainData, trainLabels, opt);

arma::mat predictions1;
network1->Predict(trainData, predictions1);
FFN<> network2;
network2 = *network1;
delete network1;

// Deallocating all of network1's memory, so that network2 does not use any
// of that memory.
arma::mat predictions2;
network2.Predict(trainData, predictions2);
CheckMatrices(predictions1, predictions2);
}

// network1 should be allocated with `new`, and trained on some data.
template<typename MatType = arma::mat, typename ModelType>
void CheckMoveFunction(ModelType* network1,
MatType& trainData,
MatType& trainLabels,
const size_t maxEpochs)
{
ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
network1->Train(trainData, trainLabels, opt);

arma::mat predictions1;
network1->Predict(trainData, predictions1);
FFN<> network2(std::move(*network1));
delete network1;

// Deallocating all of network1's memory, so that network2 does not use any
// of that memory.
arma::mat predictions2;
network2.Predict(trainData, predictions2);
CheckMatrices(predictions1, predictions2);
}

/**
* Train the vanilla network on a larger dataset.
*/
Expand Down Expand Up @@ -124,3 +169,92 @@ TEST_CASE("VanillaNetworkTest", "[ConvolutionalNetworkTest]")

REQUIRE(success == true);
}

/**
* Train the vanilla network on a larger dataset.
*/
TEST_CASE("CheckCopyVanillaNetworkTest", "[ConvolutionalNetworkTest]")
{
arma::mat X;
X.load("mnist_first250_training_4s_and_9s.arm");

// Normalize each point since these are images.
arma::uword nPoints = X.n_cols;
for (arma::uword i = 0; i < nPoints; ++i)
{
X.col(i) /= norm(X.col(i), 2);
}

// Build the target matrix.
arma::mat Y = arma::zeros<arma::mat>(1, nPoints);
for (size_t i = 0; i < nPoints; ++i)
{
if (i < nPoints / 2)
{
// Assign label "0" to all samples with digit = 4
Y(i) = 0;
}
else
{
// Assign label "1" to all samples with digit = 9
Y(i) = 1;
}
}

/*
* Construct a convolutional neural network with a 28x28x1 input layer,
* 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer
* and a 4x4x12 pooling layer which is fully connected with the output layer.
* The network structure looks like:
*
* Input Convolution Pooling Convolution Pooling Output
* Layer Layer Layer Layer Layer Layer
*
* +---+ +---+ +---+ +---+
* | +---+ | +---+ | +---+ | +---+
* +---+ | | +---+ | | +---+ | | +---+ | | +---+ +---+
* | | | | | | | | | | | | | | | | | | | |
* | +--> +-+ | +--> +-+ | +--> +-+ | +--> +-+ | +--> | |
* | | +-+ | +-+ | +-+ | +-+ | | |
* +---+ +---+ +---+ +---+ +---+ +---+
*/
// It isn't guaranteed that the network will converge in the specified number
// of iterations using random weights. If this works 1 of 5 times, I'm fine
// with that. All I want to know is that the network is able to escape from
// local minima and to solve the task.
FFN<NegativeLogLikelihood<>, RandomInitialization> *model = new FFN<NegativeLogLikelihood<>, RandomInitialization>;

model->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
model->Add<ReLULayer<> >();
model->Add<MaxPooling<> >(8, 8, 2, 2);
model->Add<Convolution<> >(8, 12, 2, 2);
model->Add<ReLULayer<> >();
model->Add<MaxPooling<> >(2, 2, 2, 2);
model->Add<Linear<> >(192, 20);
model->Add<ReLULayer<> >();
model->Add<Linear<> >(20, 10);
model->Add<ReLULayer<> >();
model->Add<Linear<> >(10, 2);
model->Add<LogSoftMax<> >();

FFN<NegativeLogLikelihood<>, RandomInitialization> *model1 = new FFN<NegativeLogLikelihood<>, RandomInitialization>;

model1->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
model1->Add<ReLULayer<> >();
model1->Add<MaxPooling<> >(8, 8, 2, 2);
model1->Add<Convolution<> >(8, 12, 2, 2);
model1->Add<ReLULayer<> >();
model1->Add<MaxPooling<> >(2, 2, 2, 2);
model1->Add<Linear<> >(192, 20);
model1->Add<ReLULayer<> >();
model1->Add<Linear<> >(20, 10);
model1->Add<ReLULayer<> >();
model1->Add<Linear<> >(10, 2);
model1->Add<LogSoftMax<> >();

// Check whether copy constructor is working or not.
CheckCopyFunction<>(model, X, Y, 8);

// Check whether move constructor is working or not.
CheckMoveFunction<>(model1, X, Y, 8);
}

0 comments on commit 5d73046

Please sign in to comment.