Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pixel Shuffle layer #2563

Merged
merged 13 commits into from
Feb 26, 2021
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
### mlpack ?.?.?
###### ????-??-??
* Added Pixel Shuffle layer (#2563).

* Added Spatial Dropout layer (#2564).

* Force CMake to show error when it didn't find Python/modules (#2568).
Expand Down
2 changes: 2 additions & 0 deletions src/mlpack/methods/ann/layer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ set(SOURCES
noisylinear_impl.hpp
parametric_relu.hpp
parametric_relu_impl.hpp
pixel_shuffle.hpp
pixel_shuffle_impl.hpp
positional_encoding.hpp
positional_encoding_impl.hpp
recurrent.hpp
Expand Down
1 change: 1 addition & 0 deletions src/mlpack/methods/ann/layer/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "noisylinear.hpp"
#include "padding.hpp"
#include "parametric_relu.hpp"
#include "pixel_shuffle.hpp"
#include "positional_encoding.hpp"
#include "recurrent_attention.hpp"
#include "recurrent.hpp"
Expand Down
2 changes: 2 additions & 0 deletions src/mlpack/methods/ann/layer/layer_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <mlpack/methods/ann/layer/adaptive_max_pooling.hpp>
#include <mlpack/methods/ann/layer/adaptive_mean_pooling.hpp>
#include <mlpack/methods/ann/layer/parametric_relu.hpp>
#include <mlpack/methods/ann/layer/pixel_shuffle.hpp>
#include <mlpack/methods/ann/layer/positional_encoding.hpp>
#include <mlpack/methods/ann/layer/reinforce_normal.hpp>
#include <mlpack/methods/ann/layer/reparametrization.hpp>
Expand Down Expand Up @@ -289,6 +290,7 @@ using LayerTypes = boost::variant<
NoisyLinear<arma::mat, arma::mat>*,
Padding<arma::mat, arma::mat>*,
PReLU<arma::mat, arma::mat>*,
PixelShuffle<arma::mat, arma::mat>*,
Softmax<arma::mat, arma::mat>*,
SpatialDropout<arma::mat, arma::mat>*,
TransposedConvolution<NaiveConvolution<ValidConvolution>,
Expand Down
179 changes: 179 additions & 0 deletions src/mlpack/methods/ann/layer/pixel_shuffle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/**
* @file methods/ann/layer/pixel_shuffle.hpp
* @author Anjishnu Mukherjee
*
* Definition of the PixelShuffle class.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_ANN_LAYER_PIXEL_SHUFFLE_HPP
#define MLPACK_METHODS_ANN_LAYER_PIXEL_SHUFFLE_HPP

#include <mlpack/prereqs.hpp>

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
* Implementation of the PixelShuffle layer.
*
* For more information, refer to the following paper,
*
* @code
* @article{Shi16,
* author = {Wenzhe Shi, Jose Caballero,Ferenc Huszár, Johannes Totz,
* Andrew P. Aitken, Rob Bishop, Daniel Rueckert, Zehan Wang},
* title = {Real-Time Single Image and Video Super-Resolution Using an
* Efficient Sub-Pixel Convolutional Neural Network},
* journal = {CoRR},
* volume = {abs/1609.05158},
* year = {2016},
* url = {https://arxiv.org/abs/1609.05158},
* eprint = {1609.05158},
* }
* @endcode
*
* @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
* @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
*/
template <
typename InputDataType = arma::mat,
typename OutputDataType = arma::mat
>
class PixelShuffle
{
public:
//! Create the PixelShuffle object.
PixelShuffle();
/**
* Create the PixelShuffle object using the specified parameters.
* The number of input channels should be an integral multiple of the square
* of the upscale factor.
*
* @param upscaleFactor The scaling factor for Pixel Shuffle.
* @param height The height of each input image.
* @param width The width of each input image.
* @param size The number of channels of each input image.
*/
PixelShuffle(const size_t upscaleFactor,
const size_t height,
const size_t width,
const size_t size);

/**
* Ordinary feed forward pass of the PixelShuffle layer.
*
* @param input Input data used for evaluating the specified function.
* @param output Resulting output activation.
*/
template<typename eT>
void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output);

/**
* Ordinary feed backward pass of the PixelShuffle layer.
*
* @param input The propagated input activation.
* @param gy The backpropagated error.
* @param g The calculated gradient.
*/
template<typename eT>
void Backward(const arma::Mat<eT>& input,
const arma::Mat<eT>& gy,
arma::Mat<eT>& g);

//! Get the output parameter.
OutputDataType const& OutputParameter() const { return outputParameter; }
//! Modify the output parameter.
OutputDataType& OutputParameter() { return outputParameter; }

//! Get the delta.
OutputDataType const& Delta() const { return delta; }
//! Modify the delta.
OutputDataType& Delta() { return delta; }

//! Get the upscale factor.
size_t UpscaleFactor() const { return upscaleFactor; }

//! Modify the upscale factor.
size_t& UpscaleFactor() { return upscaleFactor; }

//! Get the input image height.
size_t InputHeight() const { return height; }

//! Modify the input image height.
size_t& InputHeight() { return height; }

//! Get the input image width.
size_t InputWidth() const { return width; }

//! Modify the input image width.
size_t& InputWidth() { return width; }

//! Get the number of input channels.
size_t InputChannels() const { return size; }

//! Modify the number of input channels.
size_t& InputChannels() { return size; }

//! Get the output image height.
size_t OutputHeight() const { return outputHeight; }

//! Get the output image width.
size_t OutputWidth() const { return outputWidth; }

//! Get the number of output channels.
size_t OutputChannels() const { return sizeOut; }

/**
* Serialize the layer.
*/
template<typename Archive>
void serialize(Archive& ar, const unsigned int /* version */);

private:
//! Locally-stored delta object.
OutputDataType delta;

//! Locally-stored output parameter object.
OutputDataType outputParameter;

//! The scaling factor for Pixel Shuffle.
size_t upscaleFactor;

//! The height of each input image.
size_t height;

//! The width of each input image.
size_t width;

//! The number of channels of each input image.
size_t size;

//! The number of images in the batch.
size_t batchSize;

//! The height of each output image.
size_t outputHeight;

//! The width of each output image.
size_t outputWidth;

//! The number of channels of each output image.
size_t sizeOut;

//! A boolean used to do some internal calculations once initially.
bool reset;
}; // class PixelShuffle

} // namespace ann
} // namespace mlpack

// Include implementation.
#include "pixel_shuffle_impl.hpp"

#endif
151 changes: 151 additions & 0 deletions src/mlpack/methods/ann/layer/pixel_shuffle_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/**
* @file methods/ann/layer/pixel_shuffle_impl.hpp
* @author Anjishnu Mukherjee
*
* Implementation of the PixelShuffle class.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_ANN_LAYER_PIXEL_SHUFFLE_IMPL_HPP
#define MLPACK_METHODS_ANN_LAYER_PIXEL_SHUFFLE_IMPL_HPP

// In case it hasn't yet been included.
#include "pixel_shuffle.hpp"

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

template<typename InputDataType, typename OutputDataType>
PixelShuffle<InputDataType, OutputDataType>::PixelShuffle() :
upscaleFactor(0),
height(0),
width(0),
size(0),
batchSize(0),
outputHeight(0),
outputWidth(0),
sizeOut(0),
reset(false)
{
// Nothing to do here.
}

template<typename InputDataType, typename OutputDataType>
PixelShuffle<InputDataType, OutputDataType>::PixelShuffle(
const size_t upscaleFactor,
const size_t height,
const size_t width,
const size_t size) :
upscaleFactor(upscaleFactor),
height(height),
width(width),
size(size),
batchSize(0),
outputHeight(0),
outputWidth(0),
sizeOut(0),
reset(false)
{
// Nothing to do here.
}

template<typename InputDataType, typename OutputDataType>
template<typename eT>
void PixelShuffle<InputDataType, OutputDataType>::Forward(
const arma::Mat<eT>& input, arma::Mat<eT>& output)
{
if (!reset)
{
batchSize = input.n_cols;
sizeOut = size / std::pow(upscaleFactor, 2);
outputHeight = height * upscaleFactor;
outputWidth = width * upscaleFactor;
reset = true;
}
output.zeros(outputHeight * outputWidth * sizeOut, batchSize);
for (size_t n = 0; n < batchSize; n++)
{
arma::mat inputImage = input.col(n);
arma::mat outputImage = output.col(n);
arma::cube inputTemp(const_cast<arma::mat&>(inputImage).memptr(), height,
width, size, false, false);
arma::cube outputTemp(const_cast<arma::mat&>(outputImage).memptr(),
outputHeight, outputWidth, sizeOut, false, false);

for (size_t c = 0; c < sizeOut ; c++)
{
for (size_t h = 0; h < outputHeight; h++)
{
for (size_t w = 0; w < outputWidth; w++)
{
size_t height_index = h / upscaleFactor;
size_t width_index = w / upscaleFactor;
size_t channel_index = (upscaleFactor * (h % upscaleFactor)) +
(w % upscaleFactor) + (c * std::pow(upscaleFactor, 2));
outputTemp(w, h, c) = inputTemp(width_index, height_index,
channel_index);
}
}
}
output.col(n) = outputImage;
}
}

template<typename InputDataType, typename OutputDataType>
template<typename eT>
void PixelShuffle<InputDataType, OutputDataType>::Backward(
const arma::Mat<eT>& input, const arma::Mat<eT>& gy, arma::Mat<eT>& g)
{
g.zeros(arma::size(input));
for (size_t n = 0; n < batchSize; n++)
{
arma::mat gyImage = gy.col(n);
arma::mat gImage = g.col(n);
arma::cube gyTemp(const_cast<arma::mat&>(gyImage).memptr(), outputHeight,
outputWidth, sizeOut, false, false);
arma::cube gTemp(const_cast<arma::mat&>(gImage).memptr(), height, width,
size, false, false);

for (size_t c = 0; c < sizeOut ; c++)
{
for (size_t h = 0; h < outputHeight; h++)
{
for (size_t w = 0; w < outputWidth; w++)
{
size_t height_index = h / upscaleFactor;
size_t width_index = w / upscaleFactor;
size_t channel_index = (upscaleFactor * (h % upscaleFactor)) +
(w % upscaleFactor) + (c * std::pow(upscaleFactor, 2));
gTemp(width_index, height_index, channel_index) = gyTemp(w, h, c);
}
}
}
g.col(n) = gImage;
}
}

template<typename InputDataType, typename OutputDataType>
template<typename Archive>
void PixelShuffle<InputDataType, OutputDataType>::serialize(
Archive& ar,
const unsigned int /* version */)
{
ar & BOOST_SERIALIZATION_NVP(delta);
birm marked this conversation as resolved.
Show resolved Hide resolved
ar & BOOST_SERIALIZATION_NVP(outputParameter);
ar & BOOST_SERIALIZATION_NVP(upscaleFactor);
ar & BOOST_SERIALIZATION_NVP(height);
ar & BOOST_SERIALIZATION_NVP(width);
ar & BOOST_SERIALIZATION_NVP(size);
ar & BOOST_SERIALIZATION_NVP(batchSize);
ar & BOOST_SERIALIZATION_NVP(outputHeight);
ar & BOOST_SERIALIZATION_NVP(outputWidth);
ar & BOOST_SERIALIZATION_NVP(sizeOut);
}

} // namespace ann
} // namespace mlpack

#endif