Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ResizeLayer implementation #1098

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mlpack/methods/ann/CMakeLists.txt
Expand Up @@ -22,3 +22,4 @@ add_subdirectory(init_rules)
add_subdirectory(layer)
add_subdirectory(convolution_rules)
add_subdirectory(augmented)
add_subdirectory(image_functions)
14 changes: 14 additions & 0 deletions src/mlpack/methods/ann/image_functions/CMakeLists.txt
@@ -0,0 +1,14 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into mlpack.
set(SOURCES
bilinear_function.hpp
)

# Add directory name to sources.
set(DIR_SRCS)
foreach(file ${SOURCES})
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
endforeach()
# Append sources (with directory name) to list of all mlpack sources (used at
# the parent scope).
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
170 changes: 170 additions & 0 deletions src/mlpack/methods/ann/image_functions/bilinear_function.hpp
@@ -0,0 +1,170 @@
/**
* @file bilinear_function.hpp
* @author Kris Singh
*
* Definition and implementation of the bilinear interpolation function.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_ANN_IMAGE_FUNCTIONS_BILINEAR_FUNCTION_HPP
#define MLPACK_METHODS_ANN_IMAGE_FUNCTIONS_BILINEAR_FUNCTION_HPP

#include <mlpack/prereqs.hpp>

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
* The bilinear interpolation function
*
*/
class BiLinearFunction
{
/*
* The constructor for the bilinear interpolation.
*
* @param inRowSize: Number of input rows
* @param inColSize: Number of input columns.
* @param outRowSize: Number of output rows.
* @param outColSie: Number of output columns.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You forgot to describe the depth argument.

*/
public:
BiLinearFunction(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you describe the arguments?

const size_t inRowSize,
const size_t inColSize,
const size_t outRowSize,
const size_t outColSize):
inRowSize(inRowSize),
inColSize(inColSize),
outRowSize(outRowSize),
outColSize(outColSize)
{};
/**
* UpSample the given input.
*
* @param input The input matrix
* @param output The resulting interpolated output image.
*/
template<typename eT>
void UpSample(const arma::Mat<eT>& input, arma::Mat<eT>& output)
{
// Get dimensions.
if (output.is_empty() && (output.n_cols != outColSize || output.n_rows != outRowSize))
output = arma::resize(output, outRowSize, outColSize);

scaleRow = (double)input.n_rows / (double) output.n_rows;
scaleCol = (double)input.n_cols / (double) output.n_cols;

for (size_t i = 0; i < output.n_rows; i++)
{
for (size_t j = 0; j < output.n_cols; j++)
{
rOrigin = std::floor(i * scaleRow);
cOrigin = std::floor(j * scaleCol);

if (rOrigin > input.n_rows - 2)
rOrigin = input.n_rows - 2;
if (cOrigin > input.n_cols - 2)
cOrigin = input.n_cols - 2;


double deltaR = i * scaleRow - rOrigin;
double deltaC = j * scaleCol - cOrigin;
coeff1 = (1 - deltaR) * (1 - deltaC);
coeff2 = deltaR * (1 - deltaC);
coeff3 = (1 - deltaR) * deltaC;
coeff4 = deltaR * deltaC;


output(i, j) = input(cOrigin * input.n_rows + rOrigin) * coeff1 +
input(cOrigin * input.n_rows + rOrigin + 1) * coeff2 +
input((cOrigin + 1) * input.n_rows + rOrigin) * coeff3 +
input((cOrigin + 1) * input.n_rows + rOrigin+1) * coeff4;
}
}
}

/**
* DownSample the given input.
*
* @param input The input matrix
* @param output The resulting down-sampled output image.
*/
template<typename eT>
void DownSample(const arma::Mat<eT>& input, arma::Mat<eT>& output)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you describe the arguments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While upsampling the input represents the image and output represent the zoomed image.
Here it is opposite input represents the image and output un-zoomed image.(smaller image).
This function is required for doing backprop through the layer if we get the image gradient from the layer l+1 we have to downsample it to apply to l-1.

{

if (output.is_empty() && (output.n_cols != inColSize || output.n_rows != inRowSize))
output = arma::resize(output, inRowSize, inColSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure that the layer should reshape the output. Maybe it is better to represent the output as a column (inRowSize * inColSize, 1). In that case the layer should work with the linear layer.


if (input.n_rows == output.n_rows && input.n_cols == output.n_cols)
{
output = input;
}
else
{
scaleRow = (double)(input.n_rows - 1) / output.n_rows;
scaleCol = (double)(input.n_cols - 1) / output.n_cols;

for (size_t i = 0; i < output.n_rows; i++)
for (size_t j = 0; j < output.n_cols; j++)
{
rOrigin = std::floor(i * scaleRow);
cOrigin = std::floor(j * scaleCol);

if (rOrigin > input.n_rows - 2)
rOrigin = input.n_rows - 2;
if (cOrigin > input.n_cols - 2)
cOrigin = input.n_cols - 2;

double deltaR = i * scaleRow - rOrigin;
double deltaC = j * scaleCol - cOrigin;
coeff1 = (1 - deltaR) * (1 - deltaC);
coeff2 = deltaR * (1 - deltaC);
coeff3 = (1 - deltaR) * deltaC;
coeff4 = deltaR * deltaC;

output(i, j) = input(cOrigin * input.n_rows + rOrigin) * coeff1 +
input(cOrigin * input.n_rows + rOrigin + 1) * coeff2 +
input((cOrigin + 1) * input.n_rows + rOrigin) * coeff3 +
input((cOrigin + 1) * input.n_rows + rOrigin+1) * coeff4;
}
}
}

private:
//! Locally stored row size of the input.
const size_t inRowSize;
//! Locally stored column size of the input.
const size_t inColSize;
//! Locally stored row size of the output.
const size_t outRowSize;
//! Locally stored column size of the input.
const size_t outColSize;

//! Locally stored scaling factor along row.
Copy link
Contributor

@lozhnikov lozhnikov Aug 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess all variables below should be defined locally inside the functions. How do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the variables are used by both the Upsample and the DownSample function. So rather than creating stack variables, it is better to just create heap variables.
Could you tell the benefits of using stack variable here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for me, it improves readability and points out that you needn't these variables outside of the function.

double scaleRow;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed that before but the discussion was stopped: I think it is better to declare the variables below inside the functions since we needn't their values. As for me that improves readability. How do you think?

//! Locally stored scaling factor along row.
double scaleCol;
//! Locally stored interger part of the current row idx in input.
double rOrigin;
//! Locally stored interger part of the current column idx in input.
double cOrigin;

//! Locally stored coefficient around given idx.
double coeff1;
//! Locally stored coefficient around given idx.
double coeff2;
//! Locally stored coefficient around given idx.
double coeff3;
//! Locally stored coefficient around given idx.
double coeff4;
}; // class BiLinearFunction

} // namespace ann
} // namespace mlpack

#endif
2 changes: 2 additions & 0 deletions src/mlpack/methods/ann/layer/CMakeLists.txt
Expand Up @@ -61,6 +61,8 @@ set(SOURCES
recurrent_attention_impl.hpp
reinforce_normal.hpp
reinforce_normal_impl.hpp
resize.hpp
resize_impl.hpp
select.hpp
select_impl.hpp
sequential.hpp
Expand Down
4 changes: 3 additions & 1 deletion src/mlpack/methods/ann/layer/layer_types.hpp
Expand Up @@ -34,6 +34,7 @@
#include <mlpack/methods/ann/layer/parametric_relu.hpp>
#include <mlpack/methods/ann/layer/reinforce_normal.hpp>
#include <mlpack/methods/ann/layer/select.hpp>
#include <mlpack/methods/ann/layer/resize.hpp>

// Convolution modules.
#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
Expand Down Expand Up @@ -114,7 +115,8 @@ using LayerTypes = boost::variant<
ReinforceNormal<arma::mat, arma::mat>*,
Select<arma::mat, arma::mat>*,
Sequential<arma::mat, arma::mat>*,
VRClassReward<arma::mat, arma::mat>*
VRClassReward<arma::mat, arma::mat>*,
Resize<BiLinearFunction, arma::mat, arma::mat>*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All layers except Resize are defined alphabetically.

>;

} // namespace ann
Expand Down
112 changes: 112 additions & 0 deletions src/mlpack/methods/ann/layer/resize.hpp
@@ -0,0 +1,112 @@
/**
* @file resize.hpp
* @author Kris Singh
*
*
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_ANN_LAYER_RESIZE_HPP
#define MLPACK_METHODS_ANN_LAYER_RESIZE_HPP

#include <mlpack/prereqs.hpp>

#include <mlpack/methods/ann/image_functions/bilinear_function.hpp>

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
* Implementation of the Resize layer class. The Resize class represents a
* single layer of a neural network.
*
* @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
* @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
* arma::sp_mat or arma::cube).
*
* @tparam InterpolationType Type of the inpterpolation applied to the input
* (BilinearFunction)
*
*/
template <
class InterpolationType = BiLinearFunction,
typename InputDataType = arma::mat,
typename OutputDataType = arma::mat
>
class Resize
{
public:
//! Create the Resize object.
Resize(InterpolationType policy);

/**
* Forward pass through the Resize layer. The resize layer interpolates
* the matrix using the given interpolation method.
* If the size of the input and output are same the Forward layer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree with this statement.

If the size of the input and output are same the Forward layer does no nothing.

Actually, the layer performs the interpolation.

* does no nothing.
*
* @param input Input the input matrix to interpolate
* @param output The interpolated matrix.
*/
template<typename eT>
void Forward(const arma::Mat<eT>&& input, arma::Mat<eT>&& output);

/**
* Ordinary feed backward pass of a neural network, calculating the function
* f(x) by propagating x backwards trough f. Using the results from the feed
* forward pass.
*
* @param input The propagated input activation.
* @param gy The backpropagated error.
* @param g The calculated gradient.
*/
template<typename eT>
void Backward(const arma::Mat<eT>&& /* input */,
arma::Mat<eT>&& gy,
arma::Mat<eT>&& g);

//! Get the input parameter.
InputDataType const& InputParameter() const { return inputParameter; }
//! Modify the input parameter.
InputDataType& InputParameter() { return inputParameter; }

//! Get the output parameter.
OutputDataType const& OutputParameter() const { return outputParameter; }
//! Modify the output parameter.
OutputDataType& OutputParameter() { return outputParameter; }

//! Get the delta.
OutputDataType const& Delta() const { return delta; }
//! Modify the delta.
OutputDataType& Delta() { return delta; }

/**
* Serialize the layer
*/
template<typename Archive>
void Serialize(Archive& ar, const unsigned int /* version */);

private:
//! Locally-stored interpolation method
InterpolationType policy;
//! Locally-stored delta object.
OutputDataType delta;

//! Locally-stored input parameter object.
InputDataType inputParameter;

//! Locally-stored output parameter object.
OutputDataType outputParameter;
}; // class Resize

} // namespace ann
} // namespace mlpack

// Include implementation.
#include "resize_impl.hpp"

#endif