Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding copy constructor in convolution layer #3067

Merged
merged 3 commits into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/mlpack/methods/ann/layer/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ class Convolution
const size_t inputHeight = 0,
const std::string& paddingType = "None");

//! Copy constructor.
Convolution(const Convolution& layer);

//! Move constructor.
Convolution(Convolution&&);

//! Copy assignment operator.
Convolution& operator=(const Convolution& layer);

//! Move assignment operator.
Convolution& operator=(Convolution&& layer);

/*
* Set the weight and bias term.
*/
Expand Down
161 changes: 161 additions & 0 deletions src/mlpack/methods/ann/layer/convolution_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,167 @@ Convolution<
padding = ann::Padding<>(padWLeft, padWRight, padHTop, padHBottom);
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::Convolution(
const Convolution& layer) :
inSize(layer.inSize),
outSize(layer.outSize),
kernelWidth(layer.kernelWidth),
kernelHeight(layer.kernelHeight),
strideWidth(layer.strideWidth),
strideHeight(layer.strideHeight),
padWLeft(layer.padWLeft),
padWRight(layer.padWRight),
padHBottom(layer.padHBottom),
padHTop(layer.padHTop),
inputWidth(layer.inputWidth),
inputHeight(layer.inputHeight),
outputWidth(layer.outputWidth),
outputHeight(layer.outputHeight),
padding(layer.padding),
weights(layer.weights)
{
// Nothing to do here.
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::Convolution(
Convolution&& layer) :
inSize(0),
outSize(0),
kernelWidth(layer.kernelWidth),
kernelHeight(layer.kernelHeight),
strideWidth(layer.strideWidth),
strideHeight(layer.strideHeight),
padWLeft(layer.padWLeft),
padWRight(layer.padWRight),
padHBottom(layer.padHBottom),
padHTop(layer.padHTop),
inputWidth(layer.inputWidth),
inputHeight(layer.inputHeight),
outputWidth(layer.outputWidth),
outputHeight(layer.outputHeight),
padding(std::move(layer.padding)),
weights(std::move(layer.weights))
{
// Nothing to do here.
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>&
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::
operator=(const Convolution& layer)
{
if (this != &layer)
{
inSize = layer.inSize;
outSize = layer.outSize;
kernelWidth = layer.kernelWidth;
kernelHeight = layer.kernelHeight;
strideWidth = layer.strideWidth;
strideHeight = layer.strideHeight;
padWLeft = layer.padWLeft;
padWRight = layer.padWRight;
padHBottom = layer.padHBottom;
padHTop = layer.padHTop;
inputWidth = layer.inputWidth;
inputHeight = layer.inputHeight;
outputWidth = layer.outputWidth;
outputHeight = layer.outputHeight;
padding = layer.padding;
weights = layer.weights;
}

return *this;
}
template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
typename GradientConvolutionRule,
typename InputDataType,
typename OutputDataType
>
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>&
Convolution<
ForwardConvolutionRule,
BackwardConvolutionRule,
GradientConvolutionRule,
InputDataType,
OutputDataType
>::
operator=(Convolution&& layer)
{
if (this != &layer)
{
inSize = layer.inSize;
outSize = layer.outSize;
kernelWidth = layer.kernelWidth;
kernelHeight = layer.kernelHeight;
strideWidth = layer.strideWidth;
strideHeight = layer.strideHeight;
padWLeft = layer.padWLeft;
padWRight = layer.padWRight;
padHBottom = layer.padHBottom;
padHTop = layer.padHTop;
inputWidth = layer.inputWidth;
inputHeight = layer.inputHeight;
outputWidth = layer.outputWidth;
outputHeight = layer.outputHeight;
padding = std::move(layer.padding);
weights = std::move(layer.weights);
}

return *this;
}

template<
typename ForwardConvolutionRule,
typename BackwardConvolutionRule,
Expand Down
134 changes: 134 additions & 0 deletions src/mlpack/tests/convolutional_network_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,51 @@
using namespace mlpack;
using namespace mlpack::ann;

// network1 should be allocated with `new`, and trained on some data.
template<typename MatType = arma::mat, typename ModelType>
void CheckCopyFunction(ModelType* network1,
MatType& trainData,
MatType& trainLabels,
const size_t maxEpochs)
{
ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
network1->Train(trainData, trainLabels, opt);

arma::mat predictions1;
network1->Predict(trainData, predictions1);
FFN<> network2;
network2 = *network1;
delete network1;

// Deallocating all of network1's memory, so that network2 does not use any
// of that memory.
arma::mat predictions2;
network2.Predict(trainData, predictions2);
CheckMatrices(predictions1, predictions2);
}

// network1 should be allocated with `new`, and trained on some data.
template<typename MatType = arma::mat, typename ModelType>
void CheckMoveFunction(ModelType* network1,
MatType& trainData,
MatType& trainLabels,
const size_t maxEpochs)
{
ens::RMSProp opt(0.01, 1, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1);
network1->Train(trainData, trainLabels, opt);

arma::mat predictions1;
network1->Predict(trainData, predictions1);
FFN<> network2(std::move(*network1));
delete network1;

// Deallocating all of network1's memory, so that network2 does not use any
// of that memory.
arma::mat predictions2;
network2.Predict(trainData, predictions2);
CheckMatrices(predictions1, predictions2);
}

/**
* Train the vanilla network on a larger dataset.
*/
Expand Down Expand Up @@ -124,3 +169,92 @@ TEST_CASE("VanillaNetworkTest", "[ConvolutionalNetworkTest]")

REQUIRE(success == true);
}

/**
* Train the vanilla network on a larger dataset.
*/
TEST_CASE("CheckCopyVanillaNetworkTest", "[ConvolutionalNetworkTest]")
{
arma::mat X;
X.load("mnist_first250_training_4s_and_9s.arm");

// Normalize each point since these are images.
arma::uword nPoints = X.n_cols;
for (arma::uword i = 0; i < nPoints; ++i)
{
X.col(i) /= norm(X.col(i), 2);
}

// Build the target matrix.
arma::mat Y = arma::zeros<arma::mat>(1, nPoints);
for (size_t i = 0; i < nPoints; ++i)
{
if (i < nPoints / 2)
{
// Assign label "0" to all samples with digit = 4
Y(i) = 0;
}
else
{
// Assign label "1" to all samples with digit = 9
Y(i) = 1;
}
}

/*
* Construct a convolutional neural network with a 28x28x1 input layer,
* 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer
* and a 4x4x12 pooling layer which is fully connected with the output layer.
* The network structure looks like:
*
* Input Convolution Pooling Convolution Pooling Output
* Layer Layer Layer Layer Layer Layer
*
* +---+ +---+ +---+ +---+
* | +---+ | +---+ | +---+ | +---+
* +---+ | | +---+ | | +---+ | | +---+ | | +---+ +---+
* | | | | | | | | | | | | | | | | | | | |
* | +--> +-+ | +--> +-+ | +--> +-+ | +--> +-+ | +--> | |
* | | +-+ | +-+ | +-+ | +-+ | | |
* +---+ +---+ +---+ +---+ +---+ +---+
*/
// It isn't guaranteed that the network will converge in the specified number
// of iterations using random weights. If this works 1 of 5 times, I'm fine
// with that. All I want to know is that the network is able to escape from
// local minima and to solve the task.
FFN<NegativeLogLikelihood<>, RandomInitialization> *model = new FFN<NegativeLogLikelihood<>, RandomInitialization>;

model->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
model->Add<ReLULayer<> >();
model->Add<MaxPooling<> >(8, 8, 2, 2);
model->Add<Convolution<> >(8, 12, 2, 2);
model->Add<ReLULayer<> >();
model->Add<MaxPooling<> >(2, 2, 2, 2);
model->Add<Linear<> >(192, 20);
model->Add<ReLULayer<> >();
model->Add<Linear<> >(20, 10);
model->Add<ReLULayer<> >();
model->Add<Linear<> >(10, 2);
model->Add<LogSoftMax<> >();

FFN<NegativeLogLikelihood<>, RandomInitialization> *model1 = new FFN<NegativeLogLikelihood<>, RandomInitialization>;

model1->Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
model1->Add<ReLULayer<> >();
model1->Add<MaxPooling<> >(8, 8, 2, 2);
model1->Add<Convolution<> >(8, 12, 2, 2);
model1->Add<ReLULayer<> >();
model1->Add<MaxPooling<> >(2, 2, 2, 2);
model1->Add<Linear<> >(192, 20);
model1->Add<ReLULayer<> >();
model1->Add<Linear<> >(20, 10);
model1->Add<ReLULayer<> >();
model1->Add<Linear<> >(10, 2);
model1->Add<LogSoftMax<> >();

// Check whether copy constructor is working or not.
CheckCopyFunction<>(model, X, Y, 8);

// Check whether move constructor is working or not.
CheckMoveFunction<>(model1, X, Y, 8);
}