From 4727298ad033e589ae3ef9ece550470dd9151daf Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 2 Jun 2016 20:50:05 +0200 Subject: [PATCH 01/82] Fix explicitly specialized template issue. --- src/mlpack/core/data/serialization_template_version.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlpack/core/data/serialization_template_version.hpp b/src/mlpack/core/data/serialization_template_version.hpp index 8cf67dc121c..6b617a8e1d1 100644 --- a/src/mlpack/core/data/serialization_template_version.hpp +++ b/src/mlpack/core/data/serialization_template_version.hpp @@ -18,7 +18,6 @@ #define BOOST_TEMPLATE_CLASS_VERSION(SIGNATURE, T, N) \ namespace boost { \ namespace serialization { \ -template<> \ SIGNATURE \ struct version> \ { \ From 00e867f1abc85371db59f985f67b06ad13f47eff Mon Sep 17 00:00:00 2001 From: nilayjain Date: Sun, 5 Jun 2016 12:30:02 +0000 Subject: [PATCH 02/82] edge_boxes: feature extraction --- src/mlpack/methods/CMakeLists.txt | 1 + src/mlpack/methods/edge_boxes/CMakeLists.txt | 20 + .../methods/edge_boxes/edge_boxes_main.cpp | 90 ++ .../methods/edge_boxes/feature_extraction.hpp | 85 ++ .../edge_boxes/feature_extraction_impl.hpp | 903 ++++++++++++++++++ 5 files changed, 1099 insertions(+) create mode 100644 src/mlpack/methods/edge_boxes/CMakeLists.txt create mode 100644 src/mlpack/methods/edge_boxes/edge_boxes_main.cpp create mode 100644 src/mlpack/methods/edge_boxes/feature_extraction.hpp create mode 100644 src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index 5734d5c9d8a..adb67489b67 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -23,6 +23,7 @@ set(DIRS decision_stump det emst + edge_boxes fastmks gmm hmm diff --git a/src/mlpack/methods/edge_boxes/CMakeLists.txt b/src/mlpack/methods/edge_boxes/CMakeLists.txt new file mode 100644 index 00000000000..e64722c2de8 --- /dev/null +++ b/src/mlpack/methods/edge_boxes/CMakeLists.txt @@ -0,0 +1,20 @@ + +cmake_minimum_required(VERSION 2.8) + +# Define the files we need to compile. +# Anything not in this list will not be compiled into mlpack. +set(SOURCES + feature_extraction.hpp + feature_extraction_impl.hpp +) + +# Add directory name to sources. +set(DIR_SRCS) +foreach(file ${SOURCES}) + set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) +endforeach() +# Append sources (with directory name) to list of all mlpack sources (used at +# the parent scope). +set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) + +add_cli_executable(edge_boxes) diff --git a/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp b/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp new file mode 100644 index 00000000000..3be7692ff97 --- /dev/null +++ b/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp @@ -0,0 +1,90 @@ +/** + * @file decision_stump.hpp + * @author + * + * Definition of decision stumps. + */ +#include +#include "feature_extraction.hpp" + +using namespace mlpack; +using namespace mlpack::structured_tree; +using namespace std; + +int main() +{ + /* + :param options: + num_images: number of images in the dataset. + rgbd: 0 for RGB, 1 for RGB + depth + shrink: amount to shrink channels + n_orient: number of orientations per gradient scale + grd_smooth_rad: radius for image gradient smoothing + grd_norm_rad: radius for gradient normalization + reg_smooth_rad: radius for reg channel smoothing + ss_smooth_rad: radius for sim channel smoothing + p_size: size of image patches + g_size: size of ground truth patches + n_cell: number of self similarity cells + + n_pos: number of positive patches per tree + n_neg: number of negative patches per tree + fraction: fraction of features to use to train each tree + n_tree: number of trees in forest to train + n_class: number of classes (clusters) for binary splits + min_count: minimum number of data points to allow split + min_child: minimum number of data points allowed at child nodes + max_depth: maximum depth of tree + split: options include 'gini', 'entropy' and 'twoing' + discretize: optional function mapping structured to class labels + + stride: stride at which to compute edges + sharpen: sharpening amount (can only decrease after training) + n_tree_eval: number of trees to evaluate per location + nms: if true apply non-maximum suppression to edges + */ + + map options; + options["num_images"] = 2; + options["row_size"] = 321; + options["col_size"] = 481; + options["rgbd"] = 0; + options["shrink"] = 2; + options["n_orient"] = 4; + options["grd_smooth_rad"] = 0; + options["grd_norm_rad"] = 4; + options["reg_smooth_rad"] = 2; + options["ss_smooth_rad"] = 8; + options["p_size"] = 32; + options["g_size"] = 16; + options["n_cell"] = 5; + + options["n_pos"] = 10000; + options["n_neg"] = 10000; + options["fraction"] = 0.25; + options["n_tree"] = 8; + options["n_class"] = 2; + options["min_count"] = 1; + options["min_child"] = 8; + options["max_depth"] = 64; + options["split"] = 0; // we use 0 for gini, 1 for entropy, 2 for other + options["stride"] = 2; + options["sharpen"] = 2; + options["n_tree_eval"] = 4; + options["nms"] = 1; // 1 for true, 0 for false + + StructuredForests SF(options); +// arma::uvec x(2); + //SF.GetFeatureDimension(x); + + arma::mat segmentations, boundaries, images; + data::Load("/home/nilay/example/small_images.csv", images); + data::Load("/home/nilay/example/small_boundary_1.csv", boundaries); + data::Load("/home/nilay/example/small_segmentation_1.csv", segmentations); + + arma::mat input_data = SF.LoadData(images, boundaries, segmentations); + cout << input_data.n_rows << " " << input_data.n_cols << endl; + SF.PrepareData(input_data); + return 0; +} + diff --git a/src/mlpack/methods/edge_boxes/feature_extraction.hpp b/src/mlpack/methods/edge_boxes/feature_extraction.hpp new file mode 100644 index 00000000000..ba14e23b5fd --- /dev/null +++ b/src/mlpack/methods/edge_boxes/feature_extraction.hpp @@ -0,0 +1,85 @@ +/** + * @file feature_extraction.hpp + * @author Nilay Jain + * + * Feature Extraction for the edge_boxes algorithm. + */ +#ifndef MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_HPP +#define MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_HPP +#define INF 999999.9999 +#define EPS 1E-20 +#include + +namespace mlpack { +namespace structured_tree { + +template +class StructuredForests +{ + + public: + + std::map options; + + StructuredForests(const std::map& inMap); + + MatType LoadData(MatType& images, MatType& boundaries, + MatType& segmentations); + + void PrepareData(MatType& InputData); + + private: + + arma::vec GetFeatureDimension(); + + arma::vec dt_1d(arma::vec& f, int n); + + void dt_2d(MatType& im); + + MatType dt_image(MatType& im, double on); + + arma::field GetFeatures(MatType& img,arma::umat& loc); + + CubeType CopyMakeBorder(CubeType& InImage, + int top, int left, int bottom, int right); + + void GetShrunkChannels(CubeType& InImage, CubeType& reg_ch, CubeType& ss_ch); + + CubeType RGB2LUV(CubeType& InImage); + + MatType bilinearInterpolation(MatType const &src, + size_t height, size_t width); + + CubeType sepFilter2D(CubeType& InImage, + arma::vec& kernel, int radius); + + CubeType ConvTriangle(CubeType& InImage, int radius); + + void Gradient(CubeType& InImage, + MatType& Magnitude, + MatType& Orientation); + + MatType MaxAndLoc(CubeType& mag, arma::umat& Location); + + CubeType Histogram(MatType& Magnitude, + MatType& Orientation, + int downscale, int interp); + + CubeType ViewAsWindows(CubeType& channels, arma::umat& loc); + + CubeType GetRegFtr(CubeType& channels, arma::umat& loc); + + CubeType GetSSFtr(CubeType& channels, arma::umat& loc); + + CubeType Rearrange(CubeType& channels); + + CubeType PDist(CubeType& features, arma::uvec& grid_pos); + +}; + + +} //namespace structured_tree +} // namespace mlpack +#include "feature_extraction_impl.hpp" +#endif + diff --git a/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp b/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp new file mode 100644 index 00000000000..9680faa2142 --- /dev/null +++ b/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp @@ -0,0 +1,903 @@ +/** + * @file feature_extraction_impl.hpp + * @author Nilay Jain + * + * Implementation of feature extraction methods. + */ +#ifndef MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_IMPL_HPP +#define MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_IMPL_HPP + + +#include "feature_extraction.hpp" +#include + +namespace mlpack { +namespace structured_tree { + +template +StructuredForests:: +StructuredForests(const std::map& inMap) +{ + this->options = inMap; +} + +template +MatType StructuredForests:: +LoadData(MatType& images, MatType& boundaries, MatType& segmentations) +{ + int num_images = this->options["num_images"]; + int row_size = this->options["row_size"]; + int col_size = this->options["col_size"]; + MatType input_data(num_images * row_size * 5, col_size); + // we store the input data as follows: + // images (3), boundaries (1), segmentations (1). + int loop_iter = num_images * 5; + size_t row_idx = 0; + int col_i = 0, col_s = 0, col_b = 0; + for(size_t i = 0; i < loop_iter; ++i) + { + if (i % 5 == 4) + { + input_data.submat(row_idx, 0, row_idx + row_size - 1,\ + col_size - 1) = MatType(segmentations.colptr(col_s),\ + col_size, row_size).t(); + ++col_s; + } + else if (i % 5 == 3) + { + input_data.submat(row_idx, 0, row_idx + row_size - 1,\ + col_size - 1) = MatType(boundaries.colptr(col_b),\ + col_size, row_size).t(); + ++col_b; + } + else + { + input_data.submat(row_idx, 0, row_idx + row_size - 1,\ + col_size - 1) = MatType(images.colptr(col_i), + col_size, row_size).t(); + ++col_i; + } + row_idx += row_size; + } + return input_data; +} + +template +arma::vec StructuredForests:: +GetFeatureDimension() +{ + /* + shrink: amount to shrink channels + p_size: size of image patches + n_cell: number of self similarity cells + n_orient: number of orientations per gradient scale + */ + arma::vec P(2); + int shrink, p_size, n_cell; + shrink = this->options["shrink"]; + p_size = this->options["p_size"]; + n_cell = this->options["n_cell"]; + + /* + n_color_ch: number of color channels + n_grad_ch: number of gradient channels + n_ch: total number of channels + */ + int n_color_ch, n_grad_ch, n_ch; + if (this->options["rgbd"] == 0) + n_color_ch = 3; + else + n_color_ch = 4; + + n_grad_ch = 2 * (1 + this->options["n_orient"]); + + n_ch = n_color_ch + n_grad_ch; + P[0] = pow((p_size / shrink) , 2) * n_ch; + P[1] = pow(n_cell , 2) * (pow (n_cell, 2) - 1) / 2 * n_ch; + return P; +} + +template +arma::vec StructuredForests:: +dt_1d(arma::vec& f, int n) +{ + arma::vec d(n), v(n), z(n + 1); + int k = 0; + v[0] = 0.0; + z[0] = -INF; + z[1] = +INF; + for (size_t q = 1; q <= n - 1; ++q) + { + float s = ( (f[q] + q * q)-( f[v[k]] + v[k] * v[k]) ) / (2 * q - 2 * v[k]); + while (s <= z[k]) + { + --k; + s = ( (f[q] + q * q) - (f[v[k]] + v[k] * v[k]) ) / (2 * q - 2 * v[k]); + } + + k++; + v[k] = (double)q; + z[k] = s; + z[k+1] = +INF; + } + + k = 0; + for (int q = 0; q <= n-1; q++) + { + while (z[k+1] < q) + k++; + d[q] = (q - v[k]) * (q - v[k]) + f[v[k]]; + } + return d; +} + +template +void StructuredForests:: +dt_2d(MatType& im) +{ + arma::vec f(std::max(im.n_rows, im.n_cols)); + // transform along columns + for (size_t x = 0; x < im.n_cols; ++x) + { + f.subvec(0, im.n_rows - 1) = im.col(x); + arma::vec d = this->dt_1d(f, im.n_rows); + im.col(x) = d; + } + + // transform along rows + for (int y = 0; y < im.n_rows; y++) + { + f.subvec(0, im.n_cols - 1) = im.row(y).t(); + arma::vec d = this->dt_1d(f, im.n_cols); + im.row(y) = d.t(); + } +} + +/* euclidean distance transform of binary image using squared distance */ +template +MatType StructuredForests:: +dt_image(MatType& im, double on) +{ + MatType out = MatType(im.n_rows, im.n_cols); + out.fill(0.0); + out.elem( find(im != on) ).fill(INF); + this->dt_2d(out); + return out; +} + +template +CubeType StructuredForests:: +CopyMakeBorder(CubeType& InImage, int top, + int left, int bottom, int right) +{ + CubeType OutImage(InImage.n_rows + top + bottom, InImage.n_cols + left + right, InImage.n_slices); + + for(size_t i = 0; i < InImage.n_slices; ++i) + { + OutImage.slice(i).submat(top, left, InImage.n_rows + top - 1, InImage.n_cols + left - 1) + = InImage.slice(i); + + for(size_t j = 0; j < right; ++j) + { + OutImage.slice(i).col(InImage.n_cols + left + j).subvec(top, InImage.n_rows + top - 1) + = InImage.slice(i).col(InImage.n_cols - j - 1); + } + + for(int j = 0; j < left; ++j) + { + OutImage.slice(i).col(j).subvec(top, InImage.n_rows + top - 1) + = InImage.slice(i).col(left - 1 - j); + } + + for(int j = 0; j < top; j++) + { + + OutImage.slice(i).row(j) + = OutImage.slice(i).row(2 * top - 1 - j); + } + + for(int j = 0; j < bottom; j++) + { + OutImage.slice(i).row(InImage.n_rows + top + j) + = OutImage.slice(i).row(InImage.n_rows + top - j - 1); + } + + } + return OutImage; +} + +template +CubeType StructuredForests:: +RGB2LUV(CubeType& InImage) +{ + //assert type is double or float. + double a, y0, maxi; + a = pow(29.0, 3) / 27.0; + y0 = 8.0 / a; + maxi = 1.0 / 270.0; + + arma::vec table(1025); + for (size_t i = 0; i < 1025; ++i) + { + table(i) = i / 1024.0; + + if (table(i) > y0) + table(i) = 116 * pow(table(i), 1.0/3.0) - 16.0; + else + table(i) = table(i) * a; + + table(i) = table(i) * maxi; + } + + MatType rgb2xyz(3,3); + rgb2xyz(0,0) = 0.430574; rgb2xyz(0,1) = 0.430574; rgb2xyz(0,2) = 0.430574; + rgb2xyz(1,0) = 0.430574; rgb2xyz(1,1) = 0.430574; rgb2xyz(1,2) = 0.430574; + rgb2xyz(2,0) = 0.430574; rgb2xyz(2,1) = 0.430574; rgb2xyz(2,2) = 0.430574; + + //see how to calculate this efficiently. numpy.dot does this. + CubeType xyz(InImage.n_rows, InImage.n_cols, rgb2xyz.n_cols); + for(size_t i = 0; i < InImage.n_rows; ++i) + { + for(size_t j = 0; j < InImage.n_cols; ++j) + { + for(size_t k = 0; k < rgb2xyz.n_cols; ++k) + { + double s = 0.0; + for(size_t l = 0; l < InImage.n_slices; ++l) + s += InImage(i, j, l) * rgb2xyz(l, k); + xyz(i, j, k) = s; + } + } + } + + MatType nz(InImage.n_rows, InImage.n_cols); + + nz = 1.0 / ( xyz.slice(0) + (15 * xyz.slice(1) ) + + (3 * xyz.slice(2) + EPS)); + + MatType L = arma::reshape(L, xyz.n_rows, xyz.n_cols); + + MatType U, V; + U = L % (13 * 4 * (xyz.slice(0) % nz) - 13 * 0.197833) + 88 * maxi; + V = L % (13 * 9 * (xyz.slice(1) % nz) - 13 * 0.468331) + 134 * maxi; + + CubeType OutImage(InImage.n_rows, InImage.n_cols, InImage.n_slices); + OutImage.slice(0) = L; + OutImage.slice(1) = U; + OutImage.slice(2) = V; + //OutImage = arma::join_slices(L,U); + //OutImage = arma::join_slices(OutImage, V); + return OutImage; +} + +template +MatType StructuredForests:: +bilinearInterpolation(MatType const &src, + size_t height, size_t width) +{ + MatType dst(height, width); + double const x_ratio = static_cast((src.n_cols - 1)) / width; + double const y_ratio = static_cast((src.n_rows - 1)) / height; + for(size_t row = 0; row != dst.n_rows; ++row) + { + size_t y = static_cast(row * y_ratio); + double const y_diff = (row * y_ratio) - y; //distance of the nearest pixel(y axis) + double const y_diff_2 = 1 - y_diff; + for(size_t col = 0; col != dst.n_cols; ++col) + { + size_t x = static_cast(col * x_ratio); + double const x_diff = (col * x_ratio) - x; //distance of the nearet pixel(x axis) + double const x_diff_2 = 1 - x_diff; + double const y2_cross_x2 = y_diff_2 * x_diff_2; + double const y2_cross_x = y_diff_2 * x_diff; + double const y_cross_x2 = y_diff * x_diff_2; + double const y_cross_x = y_diff * x_diff; + dst(row, col) = y2_cross_x2 * src(y, x) + + y2_cross_x * src(y, x + 1) + + y_cross_x2 * src(y + 1, x) + + y_cross_x * src(y + 1, x + 1); + } + } + + return dst; +} + +template +CubeType StructuredForests:: +sepFilter2D(CubeType& InImage, arma::vec& kernel, int radius) +{ + CubeType OutImage = this->CopyMakeBorder(InImage, radius, radius, radius, radius); + + arma::vec row_res(1), col_res(1); + // reverse InImage and OutImage to avoid making an extra matrix. + for(size_t k = 0; k < OutImage.n_slices; ++k) + { + for(size_t j = radius; j < OutImage.n_cols - radius; ++j) + { + for(size_t i = radius; i < OutImage.n_rows - radius; ++i) + { + row_res = OutImage.slice(k).row(i).subvec(j - radius, j + radius) * kernel; + col_res = OutImage.slice(k).col(i).subvec(i - radius, i + radius).t() * kernel; + // divide by 2: avg of row_res and col_res, divide by 3: avg over 3 locations. + InImage(i - radius, j - radius, k) = (row_res(0) + col_res(0)) / 2 / 3; + } + } + } + + return InImage; +} + +template +CubeType StructuredForests:: +ConvTriangle(CubeType& InImage, int radius) +{ + if (radius == 0) + { + return InImage; + } + else if (radius <= 1) + { + double p = 12.0 / radius / (radius + 2) - 2; + arma::vec kernel = {1 , p, 1}; + kernel = kernel / (p + 2); + + return this->sepFilter2D(InImage, kernel, radius); + } + else + { + int len = 2 * radius + 1; + arma::vec kernel(len); + for( size_t i = 0; i < radius; ++i) + kernel(i) = i + 1; + + kernel(radius) = radius + 1; + + for( size_t i = radius + 1; i < len; ++i) + kernel(i) = i - 1; + return this->sepFilter2D(InImage, kernel, radius); + } +} + +//just a helper function, can't use it for anything else +//finds max numbers on cube axis and returns max values, +// also stores the locations of max values in Location +template +MatType StructuredForests:: +MaxAndLoc(CubeType& mag, arma::umat& Location) +{ + MatType MaxVal(Location.n_rows, Location.n_cols); + for(size_t i = 0; i < mag.n_rows; ++i) + { + for(size_t j = 0; j < mag.n_cols; ++j) + { + double max = -9999999999.0; int max_loc = 0; + for(size_t k = 0; k < mag.n_slices; ++k) + { + if(mag(i, j, k) > max) + { + max = mag(i, j, k); + MaxVal(i, j) = max; + Location(i, j) = k; + } + } + } + } + return MaxVal; +} + +template +void StructuredForests:: +Gradient(CubeType& InImage, + MatType& Magnitude, + MatType& Orientation) +{ + int grd_norm_rad = this->options["grd_norm_rad"]; + CubeType dx(InImage.n_rows, InImage.n_cols, InImage.n_slices), + dy(InImage.n_rows, InImage.n_cols, InImage.n_slices); + + dx.zeros(); + dy.zeros(); + + /* + From MATLAB documentation: + [FX,FY] = gradient(F), where F is a matrix, returns the + x and y components of the two-dimensional numerical gradient. + FX corresponds to ∂F/∂x, the differences in x (horizontal) direction. + FY corresponds to ∂F/∂y, the differences in the y (vertical) direction. + */ + + + /* + gradient calculates the central difference for interior data points. + For example, consider a matrix with unit-spaced data, A, that has + horizontal gradient G = gradient(A). The interior gradient values, G(:,j), are: + + G(:,j) = 0.5*(A(:,j+1) - A(:,j-1)); + where j varies between 2 and N-1, where N is size(A,2). + + The gradient values along the edges of the matrix are calculated with single-sided differences, so that + + G(:,1) = A(:,2) - A(:,1); + G(:,N) = A(:,N) - A(:,N-1); + + The spacing between points in each direction is assumed to be one. + */ + for (size_t i = 0; i < InImage.n_slices; ++i) + { + dx.slice(i).col(0) = InImage.slice(i).col(1) - InImage.slice(i).col(0); + dx.slice(i).col(InImage.n_cols - 1) = InImage.slice(i).col(InImage.n_cols - 1) + - InImage.slice(i).col(InImage.n_cols - 2); + + for (int j = 1; j < InImage.n_cols-1; j++) + dx.slice(i).col(j) = 0.5 * ( InImage.slice(i).col(j+1) - InImage.slice(i).col(j) ); + + // do same for dy. + dy.slice(i).row(0) = InImage.slice(i).row(1) - InImage.slice(i).row(0); + dy.slice(i).row(InImage.n_rows - 1) = InImage.slice(i).row(InImage.n_rows - 1) + - InImage.slice(i).row(InImage.n_rows - 2); + + for (int j = 1; j < InImage.n_rows-1; j++) + dy.slice(i).row(j) = 0.5 * ( InImage.slice(i).row(j+1) - InImage.slice(i).row(j) ); + } + + CubeType mag(InImage.n_rows, InImage.n_cols, InImage.n_slices); + for (size_t i = 0; i < InImage.n_slices; ++i) + { + mag.slice(i) = arma::sqrt( arma::square \ + ( dx.slice(i) + arma::square( dy.slice(i) ) ) ); + } + + arma::umat Location(InImage.n_rows, InImage.n_cols); + Magnitude = this->MaxAndLoc(mag, Location); + if(grd_norm_rad != 0) + { + //we have to do this ugly thing, or override ConvTriangle + // and sepFilter2D methods. + CubeType mag2(InImage.n_rows, InImage.n_cols, 1); + mag2.slice(0) = Magnitude; + mag2 = this->ConvTriangle(mag2, grd_norm_rad); + Magnitude = Magnitude / (mag2.slice(0) + 0.01); + } + MatType dx_mat(dx.n_rows, dx.n_cols),\ + dy_mat(dy.n_rows, dy.n_cols); + + for(size_t j = 0; j < InImage.n_cols; ++j) + { + for(size_t i = 0; i < InImage.n_rows; ++i) + { + dx_mat(i, j) = dx(i, j, Location(i, j)); + dy_mat(i, j) = dy(i, j, Location(i, j)); + } + } + Orientation = arma::atan(dy_mat / dx_mat); + Orientation.transform( [](double val) { if(val < 0) return (val + arma::datum::pi); else return (val);} ); + + for(size_t j = 0; j < InImage.n_cols; ++j) + { + for(size_t i = 0; i < InImage.n_rows; ++i) + { + if( abs(dx_mat(i, j)) + abs(dy_mat(i, j)) < 1E-5) + Orientation(i, j) = 0.5 * arma::datum::pi; + } + } +} + +template +CubeType StructuredForests:: +Histogram(MatType& Magnitude, + MatType& Orientation, + int downscale, int interp) +{ + //i don't think this function can be vectorized. + + //n_orient: number of orientations per gradient scale + int n_orient = this->options["n_orient"]; + //size of HistArr: n_rbin * n_cbin * n_orient . . . (create in caller...) + int n_rbin = (Magnitude.n_rows + downscale - 1) / downscale; + int n_cbin = (Magnitude.n_cols + downscale - 1) / downscale; + double o_range, o; + o_range = arma::datum::pi / n_orient; + + CubeType HistArr(n_rbin, n_cbin, n_orient); + HistArr.zeros(); + + int r, c, o1, o2; + for(size_t i = 0; i < Magnitude.n_rows; ++i) + { + for(size_t j = 0; j < Magnitude.n_cols; ++j) + { + r = i / downscale; + c = j / downscale; + + if( interp != 0) + { + o = Orientation(i, j) / o_range; + o1 = ((int) o) % n_orient; + o2 = (o1 + 1) % n_orient; + HistArr(r, c, o1) += Magnitude(i, j) * (1 + (int)o - o); + HistArr(r, c, o2) += Magnitude(i, j) * (o - (int) o); + } + else + { + o1 = (int) (Orientation(i, j) / o_range + 0.5) % n_orient; + HistArr(r, c, o1) += Magnitude(i, j); + } + } + } + + HistArr = HistArr / downscale; + + for (size_t i = 0; i < HistArr.n_slices; ++i) + HistArr.slice(i) = arma::square(HistArr.slice(i)); + + return HistArr; +} + +template +void StructuredForests:: +GetShrunkChannels(CubeType& InImage, CubeType& reg_ch, CubeType& ss_ch) +{ + CubeType luv = this->RGB2LUV(InImage); + + int shrink = this->options["shrink"]; + int n_orient = this->options["n_orient"]; + int grd_smooth_rad = this->options["grd_smooth_rad"]; + int grd_norm_rad = this->options["grd_norm_rad"]; + int num_channels = 13; + int rsize = luv.n_rows / shrink; + int csize = luv.n_cols / shrink; + CubeType channels(rsize, csize, num_channels); + + + int slice_idx = 0; + + for( slice_idx = 0; slice_idx < luv.n_slices; ++slice_idx) + channels.slice(slice_idx) + = this->bilinearInterpolation(luv.slice(slice_idx), (size_t)rsize, (size_t)csize); + + double scale = 0.5; + + while(scale <= 1.0) + { + CubeType img( (luv.n_rows * scale), + (luv.n_cols * scale), + luv.n_slices ); + + for( slice_idx = 0; slice_idx < luv.n_slices; ++slice_idx) + { + img.slice(slice_idx) = + this->bilinearInterpolation(luv.slice(slice_idx), + (luv.n_rows * scale), + (luv.n_cols * scale) ); + } + + CubeType OutImage = this->ConvTriangle(img, grd_smooth_rad); + + MatType Magnitude(InImage.n_rows, InImage.n_cols), + Orientation(InImage.n_rows, InImage.n_cols); + + this->Gradient(OutImage, Magnitude, Orientation); + + int downscale = std::max(1, (int) (shrink * scale)); + + CubeType Hist = this->Histogram(Magnitude, Orientation, + downscale, 0); + + channels.slice(slice_idx) = + bilinearInterpolation( Magnitude, rsize, csize); + slice_idx++; + for(size_t i = 0; i < InImage.n_slices; ++i) + channels.slice(i + slice_idx) = + bilinearInterpolation( Magnitude, rsize, csize); + slice_idx += 3; + scale += 0.5; + } + + //cout << "size of channels: " << arma::size(channels) << endl; + double reg_smooth_rad, ss_smooth_rad; + reg_smooth_rad = this->options["reg_smooth_rad"] / (double) shrink; + ss_smooth_rad = this->options["ss_smooth_rad"] / (double) shrink; + + + + + if (reg_smooth_rad > 1.0) + reg_ch = this->ConvTriangle(channels, (int) (std::round(reg_smooth_rad)) ); + else + reg_ch = this->ConvTriangle(channels, reg_smooth_rad); + + if (ss_smooth_rad > 1.0) + ss_ch = this->ConvTriangle(channels, (int) (std::round(ss_smooth_rad)) ); + else + ss_ch = this->ConvTriangle(channels, ss_smooth_rad); + +} + +template +CubeType StructuredForests:: +ViewAsWindows(CubeType& channels, arma::umat& loc) +{ + // 500 for pos_loc, and 500 for neg_loc. + // channels = 160, 240, 13. + CubeType features = CubeType(16, 16, 1000 * 13); + int patchSize = 16; + int p = patchSize / 2; + //increase the channel boundary to protect error against image boundaries. + CubeType inc_ch = this->CopyMakeBorder(channels, p, p, p, p); + for (size_t i = 0, channel = 0; i < loc.n_rows; ++i) + { + int x = loc(i, 0); + int y = loc(i, 1); + + /*(x,y) in channels, is ((x+p), (y+p)) in inc_ch*/ + //cout << "(x,y) = " << x << " " << y << endl; + CubeType patch = inc_ch.tube((x + p) - p, (y + p) - p,\ + (x + p) + p - 1, (y + p) + p - 1); + // since each patch has 13 channel we have to increase the index by 13 + + //cout <<"patch size = " << arma::size(patch) << endl; + + features.slices(channel, channel + 12) = patch; + //cout << "sahi hai " << endl; + channel += 13; + + } + //cout << "successfully returned. . ." << endl; + return features; +} + +template +CubeType StructuredForests:: +Rearrange(CubeType& channels) +{ + //we do (16,16,13*1000) to 256, 1000, 13, in vectorized code. + CubeType ch = CubeType(256, 1000, 13); + for(size_t i = 0; i < 1000; i++) + { + //MatType m(256, 13); + for(size_t j = 0; j < 13; ++j) + { + int sl = (i * j) / 1000; + //cout << "(i,j) = " << i << ", " << j << endl; + ch.slice(sl).col(i) = arma::vectorise(channels.slice(i * j)); + } + } + return ch; +} + +// returns 256 * 1000 * 13 dimension features. +template +CubeType StructuredForests:: +GetRegFtr(CubeType& channels, arma::umat& loc) +{ + int shrink = this->options["shrink"]; + int p_size = this->options["p_size"] / shrink; + CubeType wind = this->ViewAsWindows(channels, loc); + return this->Rearrange(wind); +} + +template +CubeType StructuredForests:: +PDist(CubeType& features, arma::uvec& grid_pos) +{ + // size of DestArr: + // InImage.n_rows * (InImage.n_rows - 1)/2 * InImage.n_slices + //find nC2 differences, for locations in the grid_pos. + //python: input: (716, 256, 13) --->(716, 25, 13) ; output: (716, 300, 13). + //input features : 256,1000,13; output: 300, 1000, 13 + + CubeType output(300, 1000, 13); + for(size_t k = 0; k < features.n_slices; ++k) + { + size_t r_idx = 0; + for(size_t i = 0; i < grid_pos.n_elem; ++i) //loop length : 25 + { + for(size_t j = i + 1; j < grid_pos.n_elem; ++j) //loop length : 25 + { + output.slice(k).row(r_idx) = features.slice(k).row(grid_pos(i)) + - features.slice(k).row(grid_pos(j)); + ++r_idx; + } + } + } + return output; +} + +//returns 300,1000,13 dimension features. +template +CubeType StructuredForests:: +GetSSFtr(CubeType& channels, arma::umat& loc) +{ + int shrink = this->options["shrink"]; + int p_size = this->options["p_size"] / shrink; + + //n_cell: number of self similarity cells + int n_cell = this->options["n_cell"]; + int half_cell_size = (int) round(p_size / (2.0 * n_cell)); + + arma::uvec g_pos(n_cell); + for(size_t i = 0; i < n_cell; ++i) + { + g_pos(i) = (int)round( (i + 1) * (p_size + 2 * half_cell_size \ + - 1) / (n_cell + 1.0) - half_cell_size); + } + arma::uvec grid_pos(n_cell * n_cell); + size_t k = 0; + for(size_t i = 0; i < n_cell; ++i) + { + for(size_t j = 0; j < n_cell; ++j) + { + grid_pos(k) = g_pos(i) * p_size + g_pos(j); + ++k; + } + } + + CubeType wind = this->ViewAsWindows(channels, loc); + CubeType re_wind = this->Rearrange(wind); + + return this->PDist(re_wind, grid_pos); +} + +template +arma::field StructuredForests:: +GetFeatures(MatType& image, arma::umat& loc) +{ + int row_size = this->options["row_size"]; + int col_size = this->options["col_size"]; + int bottom, right; + bottom = (4 - (image.n_rows / 3) % 4) % 4; + right = (4 - image.n_cols % 4) % 4; + //cout << "Botttom = " << bottom << " right = " << right << endl; + + CubeType InImage(image.n_rows / 3, image.n_cols, 3); + + for(size_t i = 0; i < 3; ++i) + { + InImage.slice(i) = image.submat(i * row_size, 0, \ + (i + 1) * row_size - 1, col_size - 1); + } + + CubeType OutImage = this->CopyMakeBorder(InImage, 0, 0, bottom, right); + + int num_channels = 13; + int shrink = this->options["shrink"]; + int rsize = OutImage.n_rows / shrink; + int csize = OutImage.n_cols / shrink; + + /* this part gives double free or corruption out error + when executed for a second time */ + CubeType reg_ch = CubeType(rsize, csize, num_channels); + CubeType ss_ch = CubeType(rsize, csize, num_channels); + this->GetShrunkChannels(InImage, reg_ch, ss_ch); + + loc = loc / shrink; + + CubeType reg_ftr = this->GetRegFtr(reg_ch, loc); + CubeType ss_ftr = this->GetSSFtr(ss_ch, loc); + arma::field F(2,1); + F(0,0) = reg_ftr; + F(1,0) = ss_ftr; + return F; + //delete reg_ch; + //free(reg_ch); + //free(ss_ch); +} + +template +void StructuredForests:: +PrepareData(MatType& InputData) +{ + int num_images = this->options["num_images"]; + int n_tree = this->options["n_tree"]; + int n_pos = this->options["n_pos"]; + int n_neg = this->options["n_neg"]; + double fraction = 0.25; + int p_size = this->options["p_size"]; + int g_size = this->options["g_size"]; + int shrink = this->options["shrink"]; + int row_size = this->options["row_size"]; + int col_size = this->options["col_size"]; + // p_rad = radius of image patches. + // g_rad = radius of ground truth patches. + int p_rad = p_size / 2, g_rad = g_size / 2; + + arma::vec FtrDim = this->GetFeatureDimension(); + int n_ftr_dim = FtrDim(0) + FtrDim(1); + int n_smp_ftr_dim = int(n_ftr_dim * fraction); + + for(size_t i = 0; i < n_tree; ++i) + { + //implement the logic for if data already exists. + MatType ftrs = arma::zeros(n_pos + n_neg, n_smp_ftr_dim); + + //effectively a 3d array. . . + MatType lbls = arma::zeros( (n_pos + n_neg ) * g_size, g_size); + + + int loop_iter = num_images * 5; + for(size_t j = 0; j < loop_iter; j += 5) + { + MatType img, bnds, segs; + img = InputData.submat(j * row_size, 0, (j + 3) * row_size - 1, col_size - 1); + bnds = InputData.submat( (j + 3) * row_size, 0, \ + (j + 4) * row_size - 1, col_size - 1 ); + segs = InputData.submat( (j + 4) * row_size, 0, \ + (j + 5) * row_size - 1, col_size - 1 ); + + MatType mask = arma::zeros(row_size, col_size); + for(size_t b = 0; b < mask.n_cols; b = b + shrink) + for(size_t a = 0; a < mask.n_rows; a = a + shrink) + mask(a, b) = 1; + mask.col(p_rad - 1).fill(0); + mask.row( (mask.n_rows - 1) - (p_rad - 1) ).fill(0); + mask.submat(0, 0, mask.n_rows - 1, p_rad - 1).fill(0); + mask.submat(0, mask.n_cols - p_rad, mask.n_rows - 1, + mask.n_cols - 1).fill(0); + + // number of positive or negative patches per ground truth. + //int n_patches_per_gt = (int) (ceil( (float)n_pos / num_images )); + int n_patches_per_gt = 500; + //cout << "n_patches_per_gt = " << n_patches_per_gt << endl; + MatType dis = arma::sqrt( this->dt_image(bnds, 1) ); + MatType dis2 = dis; + //dis.transform( [](double val, const int& g_rad) { return (double)(val < g_rad); } ); + //dis2.transform( [](double val, const int& g_rad) { return (double)(val >= g_rad); } ); + //dis.elem( arma::find(dis >= g_rad) ).zeros(); + //dis2.elem( arma::find(dis < g_rad) ).zeros(); + + + arma::uvec pos_loc = arma::find( (dis < g_rad) % mask ); + arma::uvec neg_loc = arma::find( (dis >= g_rad) % mask ); + + pos_loc = arma::shuffle(pos_loc); + neg_loc = arma::shuffle(neg_loc); + + arma::umat loc(n_patches_per_gt * 2, 2); + //cout << "pos_loc size: " << arma::size(pos_loc) << " neg_loc size: " << arma::size(neg_loc) << endl; + //cout << "n_patches_per_gt = " << n_patches_per_gt << endl; + for(size_t i = 0; i < n_patches_per_gt; ++i) + { + loc.row(i) = arma::ind2sub(arma::size(dis.n_rows, dis.n_cols), pos_loc(i) ).t(); + //cout << "pos_loc: " << loc(i, 0) << ", " << loc(i, 1) << endl; + } + + for(size_t i = n_patches_per_gt; i < 2 * n_patches_per_gt; ++i) + { + loc.row(i) = arma::ind2sub(arma::size(dis.n_rows, dis.n_cols), neg_loc(i) ).t(); + //cout << "neg_loc: " << loc(i, 0) << ", " << loc(i, 1) << endl; + } + + // cout << "num patches = " << n_patches_per_gt << " num elements + = " << pos_loc.n_elem\ + // << " num elements - = " << neg_loc.n_elem << " dis.size " << dis.n_elem << endl; + + //Field F contains reg_ftr and ss_ftr. + arma::field F = this->GetFeatures(img, loc); + //randomly sample 70 values each from reg_ftr and ss_ftr. + /* + CubeType ftr(140, 1000, 13); + arma::uvec r = (0, 255, 256); + arma::uvec s = (0, 299, 300); + arma::uvec rs = r.shuffle(); + arma::uvec ss = s.shuffle(); + */ + CubeType lbl(g_size, g_size, 1000); + CubeType s(segs.n_rows, segs.n_cols, 1); + s.slice(0) = segs; + CubeType in_segs = this->CopyMakeBorder(s, g_rad, + g_rad, g_rad, g_rad); + for(size_t i = 0; i < loc.n_rows; ++i) + { + int x = loc(i, 0); int y = loc(i, 1); + //cout << "x, y = " << x << " " << y << endl; + lbl.slice(i) = in_segs.slice(0).submat((x + g_rad) - g_rad, (y + g_rad) - g_rad, + (x + g_rad) + g_rad - 1, (y + g_rad) + g_rad - 1); + } + } + } +} + + +} // namespace structured_tree +} // namespace mlpack +#endif + From c8d5766a01008ad85a6c0ffaa2486e9244ac0f53 Mon Sep 17 00:00:00 2001 From: MarcosPividori Date: Fri, 3 Jun 2016 09:43:33 -0300 Subject: [PATCH 03/82] Properly resetting auxBound. Start using a Reset() method, to avoid futures errors like this. --- .../methods/neighbor_search/neighbor_search_impl.hpp | 4 +--- .../methods/neighbor_search/neighbor_search_stat.hpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp index e092766ebbb..d86f5146e1a 100644 --- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp +++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp @@ -638,9 +638,7 @@ Search(const size_t k, nodes.pop(); // Reset bounds of this node. - node->Stat().FirstBound() = SortPolicy::WorstDistance(); - node->Stat().SecondBound() = SortPolicy::WorstDistance(); - node->Stat().LastDistance() = 0.0; + node->Stat().Reset(); // Then add the children. for (size_t i = 0; i < node->NumChildren(); ++i) diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp index dfcc5ad743b..433ea6486a7 100644 --- a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp +++ b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp @@ -57,6 +57,17 @@ class NeighborSearchStat auxBound(SortPolicy::WorstDistance()), lastDistance(0.0) { } + /** + * Reset statistic parameters to initial values. + */ + void Reset() + { + firstBound = SortPolicy::WorstDistance(); + secondBound = SortPolicy::WorstDistance(); + auxBound = SortPolicy::WorstDistance(); + lastDistance = 0.0; + } + //! Get the first bound. double FirstBound() const { return firstBound; } //! Modify the first bound. From 5149efd589ec96631267382a3149f711bae96e32 Mon Sep 17 00:00:00 2001 From: nilayjain Date: Mon, 6 Jun 2016 20:45:26 +0000 Subject: [PATCH 04/82] backported ind2sub and sub2ind --- src/mlpack/core/arma_extend/CMakeLists.txt | 1 + src/mlpack/core/arma_extend/arma_extend.hpp | 2 + src/mlpack/core/arma_extend/fn_ind2sub.hpp | 69 +++++++++++++++++++++ src/mlpack/methods/CMakeLists.txt | 2 +- src/mlpack/tests/CMakeLists.txt | 5 +- src/mlpack/tests/ind2sub_test.cpp | 19 ++++++ 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 src/mlpack/core/arma_extend/fn_ind2sub.hpp create mode 100644 src/mlpack/tests/ind2sub_test.cpp diff --git a/src/mlpack/core/arma_extend/CMakeLists.txt b/src/mlpack/core/arma_extend/CMakeLists.txt index 4307b266521..db0c2212c38 100644 --- a/src/mlpack/core/arma_extend/CMakeLists.txt +++ b/src/mlpack/core/arma_extend/CMakeLists.txt @@ -3,6 +3,7 @@ set(SOURCES arma_extend.hpp fn_ccov.hpp + fn_ind2sub.hpp glue_ccov_meat.hpp glue_ccov_proto.hpp hdf5_misc.hpp diff --git a/src/mlpack/core/arma_extend/arma_extend.hpp b/src/mlpack/core/arma_extend/arma_extend.hpp index b8346e55c8a..12765c775f2 100644 --- a/src/mlpack/core/arma_extend/arma_extend.hpp +++ b/src/mlpack/core/arma_extend/arma_extend.hpp @@ -66,6 +66,8 @@ namespace arma { #include "glue_ccov_meat.hpp" #include "fn_ccov.hpp" + // index to subscript and vice versa + #include "fn_ind2sub.hpp" // inplace_reshape() #include "fn_inplace_reshape.hpp" diff --git a/src/mlpack/core/arma_extend/fn_ind2sub.hpp b/src/mlpack/core/arma_extend/fn_ind2sub.hpp new file mode 100644 index 00000000000..b4bbfe70777 --- /dev/null +++ b/src/mlpack/core/arma_extend/fn_ind2sub.hpp @@ -0,0 +1,69 @@ + + #if (ARMA_VERSION_MAJOR < 6 && ARMA_VERSION_MINOR < 399) + inline + uvec + ind2sub(const SizeMat& s, const uword i) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (i >= (s.n_rows * s.n_cols) ), "ind2sub(): index out of range" ); + + uvec out(2); + + out[0] = i % s.n_rows; + out[1] = i / s.n_rows; + + return out; + } + + + inline + uvec + ind2sub(const SizeCube& s, const uword i) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (i >= (s.n_rows * s.n_cols * s.n_slices) ), "ind2sub(): index out of range" ); + + const uword n_elem_slice = s.n_rows * s.n_cols; + + const uword slice = i / n_elem_slice; + const uword j = i - (slice * n_elem_slice); + const uword row = j % s.n_rows; + const uword col = j / s.n_rows; + + uvec out(3); + + out[0] = row; + out[1] = col; + out[2] = slice; + + return out; + } + + + arma_inline + uword + sub2ind(const SizeMat& s, const uword row, const uword col) + { + arma_extra_debug_sigprint(); + + arma_debug_check( ((row >= s.n_rows) || (col >= s.n_cols)), "sub2ind(): subscript out of range" ); + + return uword(row + col*s.n_rows); + } + + + arma_inline + uword + sub2ind(const SizeCube& s, const uword row, const uword col, const uword slice) + { + arma_extra_debug_sigprint(); + + arma_debug_check( ((row >= s.n_rows) || (col >= s.n_cols) || (slice >= s.n_slices)), "sub2ind(): subscript out of range" ); + + return uword( (slice * s.n_rows * s.n_cols) + (col * s.n_rows) + row ); + } +#endif + + diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index adb67489b67..d0ba0ee3a66 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -24,7 +24,7 @@ set(DIRS det emst edge_boxes - fastmks +# fastmks gmm hmm hoeffding_trees diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index e1f255aa723..fab4151a574 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -17,11 +17,12 @@ add_executable(mlpack_test det_test.cpp distribution_test.cpp emst_test.cpp - fastmks_test.cpp +# fastmks_test.cpp feedforward_network_test.cpp gmm_test.cpp hmm_test.cpp hoeffding_tree_test.cpp + ind2sub_test.cpp init_rules_test.cpp kernel_test.cpp kernel_pca_test.cpp @@ -62,7 +63,7 @@ add_executable(mlpack_test sgd_test.cpp serialization.hpp serialization.cpp - serialization_test.cpp + # serialization_test.cpp softmax_regression_test.cpp sort_policy_test.cpp sparse_autoencoder_test.cpp diff --git a/src/mlpack/tests/ind2sub_test.cpp b/src/mlpack/tests/ind2sub_test.cpp new file mode 100644 index 00000000000..14baeba9bff --- /dev/null +++ b/src/mlpack/tests/ind2sub_test.cpp @@ -0,0 +1,19 @@ +#include +//#include + +#include +#include "old_boost_test_definitions.hpp" +BOOST_AUTO_TEST_SUITE(ind2sub_test); + +/** + * This tests handles the case wherein only one class exists in the input + * labels. It checks whether the only class supplied was the only class + * predicted. + */ +BOOST_AUTO_TEST_CASE(ind2sub_test) +{ + arma::mat A = arma::randu(5,5); + arma::uvec u = arma::ind2sub(arma::size(A), 3); + u.print(); +} +BOOST_AUTO_TEST_SUITE_END(); From 61e63b98f8f266d1782a949f12ec66c281b290c2 Mon Sep 17 00:00:00 2001 From: nilayjain Date: Mon, 6 Jun 2016 20:48:43 +0000 Subject: [PATCH 05/82] backported ind2sub and sub2ind --- src/mlpack/methods/CMakeLists.txt | 2 +- src/mlpack/tests/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index d0ba0ee3a66..adb67489b67 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -24,7 +24,7 @@ set(DIRS det emst edge_boxes -# fastmks + fastmks gmm hmm hoeffding_trees diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index fab4151a574..8b36a941c97 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -17,7 +17,7 @@ add_executable(mlpack_test det_test.cpp distribution_test.cpp emst_test.cpp -# fastmks_test.cpp + fastmks_test.cpp feedforward_network_test.cpp gmm_test.cpp hmm_test.cpp @@ -63,7 +63,7 @@ add_executable(mlpack_test sgd_test.cpp serialization.hpp serialization.cpp - # serialization_test.cpp + serialization_test.cpp softmax_regression_test.cpp sort_policy_test.cpp sparse_autoencoder_test.cpp From 5f01b84a027c6c952feee34def26234f1aa12af7 Mon Sep 17 00:00:00 2001 From: nilayjain Date: Mon, 6 Jun 2016 21:02:17 +0000 Subject: [PATCH 06/82] Revert "edge_boxes: feature extraction" This reverts commit 9d85b64c6c6bdff608331195351d09abf56cfc96. --- src/mlpack/methods/CMakeLists.txt | 1 - src/mlpack/methods/edge_boxes/CMakeLists.txt | 20 - .../methods/edge_boxes/edge_boxes_main.cpp | 90 -- .../methods/edge_boxes/feature_extraction.hpp | 85 -- .../edge_boxes/feature_extraction_impl.hpp | 903 ------------------ 5 files changed, 1099 deletions(-) delete mode 100644 src/mlpack/methods/edge_boxes/CMakeLists.txt delete mode 100644 src/mlpack/methods/edge_boxes/edge_boxes_main.cpp delete mode 100644 src/mlpack/methods/edge_boxes/feature_extraction.hpp delete mode 100644 src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index adb67489b67..5734d5c9d8a 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -23,7 +23,6 @@ set(DIRS decision_stump det emst - edge_boxes fastmks gmm hmm diff --git a/src/mlpack/methods/edge_boxes/CMakeLists.txt b/src/mlpack/methods/edge_boxes/CMakeLists.txt deleted file mode 100644 index e64722c2de8..00000000000 --- a/src/mlpack/methods/edge_boxes/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ - -cmake_minimum_required(VERSION 2.8) - -# Define the files we need to compile. -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - feature_extraction.hpp - feature_extraction_impl.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) - -add_cli_executable(edge_boxes) diff --git a/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp b/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp deleted file mode 100644 index 3be7692ff97..00000000000 --- a/src/mlpack/methods/edge_boxes/edge_boxes_main.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/** - * @file decision_stump.hpp - * @author - * - * Definition of decision stumps. - */ -#include -#include "feature_extraction.hpp" - -using namespace mlpack; -using namespace mlpack::structured_tree; -using namespace std; - -int main() -{ - /* - :param options: - num_images: number of images in the dataset. - rgbd: 0 for RGB, 1 for RGB + depth - shrink: amount to shrink channels - n_orient: number of orientations per gradient scale - grd_smooth_rad: radius for image gradient smoothing - grd_norm_rad: radius for gradient normalization - reg_smooth_rad: radius for reg channel smoothing - ss_smooth_rad: radius for sim channel smoothing - p_size: size of image patches - g_size: size of ground truth patches - n_cell: number of self similarity cells - - n_pos: number of positive patches per tree - n_neg: number of negative patches per tree - fraction: fraction of features to use to train each tree - n_tree: number of trees in forest to train - n_class: number of classes (clusters) for binary splits - min_count: minimum number of data points to allow split - min_child: minimum number of data points allowed at child nodes - max_depth: maximum depth of tree - split: options include 'gini', 'entropy' and 'twoing' - discretize: optional function mapping structured to class labels - - stride: stride at which to compute edges - sharpen: sharpening amount (can only decrease after training) - n_tree_eval: number of trees to evaluate per location - nms: if true apply non-maximum suppression to edges - */ - - map options; - options["num_images"] = 2; - options["row_size"] = 321; - options["col_size"] = 481; - options["rgbd"] = 0; - options["shrink"] = 2; - options["n_orient"] = 4; - options["grd_smooth_rad"] = 0; - options["grd_norm_rad"] = 4; - options["reg_smooth_rad"] = 2; - options["ss_smooth_rad"] = 8; - options["p_size"] = 32; - options["g_size"] = 16; - options["n_cell"] = 5; - - options["n_pos"] = 10000; - options["n_neg"] = 10000; - options["fraction"] = 0.25; - options["n_tree"] = 8; - options["n_class"] = 2; - options["min_count"] = 1; - options["min_child"] = 8; - options["max_depth"] = 64; - options["split"] = 0; // we use 0 for gini, 1 for entropy, 2 for other - options["stride"] = 2; - options["sharpen"] = 2; - options["n_tree_eval"] = 4; - options["nms"] = 1; // 1 for true, 0 for false - - StructuredForests SF(options); -// arma::uvec x(2); - //SF.GetFeatureDimension(x); - - arma::mat segmentations, boundaries, images; - data::Load("/home/nilay/example/small_images.csv", images); - data::Load("/home/nilay/example/small_boundary_1.csv", boundaries); - data::Load("/home/nilay/example/small_segmentation_1.csv", segmentations); - - arma::mat input_data = SF.LoadData(images, boundaries, segmentations); - cout << input_data.n_rows << " " << input_data.n_cols << endl; - SF.PrepareData(input_data); - return 0; -} - diff --git a/src/mlpack/methods/edge_boxes/feature_extraction.hpp b/src/mlpack/methods/edge_boxes/feature_extraction.hpp deleted file mode 100644 index ba14e23b5fd..00000000000 --- a/src/mlpack/methods/edge_boxes/feature_extraction.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file feature_extraction.hpp - * @author Nilay Jain - * - * Feature Extraction for the edge_boxes algorithm. - */ -#ifndef MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_HPP -#define MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_HPP -#define INF 999999.9999 -#define EPS 1E-20 -#include - -namespace mlpack { -namespace structured_tree { - -template -class StructuredForests -{ - - public: - - std::map options; - - StructuredForests(const std::map& inMap); - - MatType LoadData(MatType& images, MatType& boundaries, - MatType& segmentations); - - void PrepareData(MatType& InputData); - - private: - - arma::vec GetFeatureDimension(); - - arma::vec dt_1d(arma::vec& f, int n); - - void dt_2d(MatType& im); - - MatType dt_image(MatType& im, double on); - - arma::field GetFeatures(MatType& img,arma::umat& loc); - - CubeType CopyMakeBorder(CubeType& InImage, - int top, int left, int bottom, int right); - - void GetShrunkChannels(CubeType& InImage, CubeType& reg_ch, CubeType& ss_ch); - - CubeType RGB2LUV(CubeType& InImage); - - MatType bilinearInterpolation(MatType const &src, - size_t height, size_t width); - - CubeType sepFilter2D(CubeType& InImage, - arma::vec& kernel, int radius); - - CubeType ConvTriangle(CubeType& InImage, int radius); - - void Gradient(CubeType& InImage, - MatType& Magnitude, - MatType& Orientation); - - MatType MaxAndLoc(CubeType& mag, arma::umat& Location); - - CubeType Histogram(MatType& Magnitude, - MatType& Orientation, - int downscale, int interp); - - CubeType ViewAsWindows(CubeType& channels, arma::umat& loc); - - CubeType GetRegFtr(CubeType& channels, arma::umat& loc); - - CubeType GetSSFtr(CubeType& channels, arma::umat& loc); - - CubeType Rearrange(CubeType& channels); - - CubeType PDist(CubeType& features, arma::uvec& grid_pos); - -}; - - -} //namespace structured_tree -} // namespace mlpack -#include "feature_extraction_impl.hpp" -#endif - diff --git a/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp b/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp deleted file mode 100644 index 9680faa2142..00000000000 --- a/src/mlpack/methods/edge_boxes/feature_extraction_impl.hpp +++ /dev/null @@ -1,903 +0,0 @@ -/** - * @file feature_extraction_impl.hpp - * @author Nilay Jain - * - * Implementation of feature extraction methods. - */ -#ifndef MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_IMPL_HPP -#define MLPACK_METHODS_EDGE_BOXES_STRUCTURED_TREE_IMPL_HPP - - -#include "feature_extraction.hpp" -#include - -namespace mlpack { -namespace structured_tree { - -template -StructuredForests:: -StructuredForests(const std::map& inMap) -{ - this->options = inMap; -} - -template -MatType StructuredForests:: -LoadData(MatType& images, MatType& boundaries, MatType& segmentations) -{ - int num_images = this->options["num_images"]; - int row_size = this->options["row_size"]; - int col_size = this->options["col_size"]; - MatType input_data(num_images * row_size * 5, col_size); - // we store the input data as follows: - // images (3), boundaries (1), segmentations (1). - int loop_iter = num_images * 5; - size_t row_idx = 0; - int col_i = 0, col_s = 0, col_b = 0; - for(size_t i = 0; i < loop_iter; ++i) - { - if (i % 5 == 4) - { - input_data.submat(row_idx, 0, row_idx + row_size - 1,\ - col_size - 1) = MatType(segmentations.colptr(col_s),\ - col_size, row_size).t(); - ++col_s; - } - else if (i % 5 == 3) - { - input_data.submat(row_idx, 0, row_idx + row_size - 1,\ - col_size - 1) = MatType(boundaries.colptr(col_b),\ - col_size, row_size).t(); - ++col_b; - } - else - { - input_data.submat(row_idx, 0, row_idx + row_size - 1,\ - col_size - 1) = MatType(images.colptr(col_i), - col_size, row_size).t(); - ++col_i; - } - row_idx += row_size; - } - return input_data; -} - -template -arma::vec StructuredForests:: -GetFeatureDimension() -{ - /* - shrink: amount to shrink channels - p_size: size of image patches - n_cell: number of self similarity cells - n_orient: number of orientations per gradient scale - */ - arma::vec P(2); - int shrink, p_size, n_cell; - shrink = this->options["shrink"]; - p_size = this->options["p_size"]; - n_cell = this->options["n_cell"]; - - /* - n_color_ch: number of color channels - n_grad_ch: number of gradient channels - n_ch: total number of channels - */ - int n_color_ch, n_grad_ch, n_ch; - if (this->options["rgbd"] == 0) - n_color_ch = 3; - else - n_color_ch = 4; - - n_grad_ch = 2 * (1 + this->options["n_orient"]); - - n_ch = n_color_ch + n_grad_ch; - P[0] = pow((p_size / shrink) , 2) * n_ch; - P[1] = pow(n_cell , 2) * (pow (n_cell, 2) - 1) / 2 * n_ch; - return P; -} - -template -arma::vec StructuredForests:: -dt_1d(arma::vec& f, int n) -{ - arma::vec d(n), v(n), z(n + 1); - int k = 0; - v[0] = 0.0; - z[0] = -INF; - z[1] = +INF; - for (size_t q = 1; q <= n - 1; ++q) - { - float s = ( (f[q] + q * q)-( f[v[k]] + v[k] * v[k]) ) / (2 * q - 2 * v[k]); - while (s <= z[k]) - { - --k; - s = ( (f[q] + q * q) - (f[v[k]] + v[k] * v[k]) ) / (2 * q - 2 * v[k]); - } - - k++; - v[k] = (double)q; - z[k] = s; - z[k+1] = +INF; - } - - k = 0; - for (int q = 0; q <= n-1; q++) - { - while (z[k+1] < q) - k++; - d[q] = (q - v[k]) * (q - v[k]) + f[v[k]]; - } - return d; -} - -template -void StructuredForests:: -dt_2d(MatType& im) -{ - arma::vec f(std::max(im.n_rows, im.n_cols)); - // transform along columns - for (size_t x = 0; x < im.n_cols; ++x) - { - f.subvec(0, im.n_rows - 1) = im.col(x); - arma::vec d = this->dt_1d(f, im.n_rows); - im.col(x) = d; - } - - // transform along rows - for (int y = 0; y < im.n_rows; y++) - { - f.subvec(0, im.n_cols - 1) = im.row(y).t(); - arma::vec d = this->dt_1d(f, im.n_cols); - im.row(y) = d.t(); - } -} - -/* euclidean distance transform of binary image using squared distance */ -template -MatType StructuredForests:: -dt_image(MatType& im, double on) -{ - MatType out = MatType(im.n_rows, im.n_cols); - out.fill(0.0); - out.elem( find(im != on) ).fill(INF); - this->dt_2d(out); - return out; -} - -template -CubeType StructuredForests:: -CopyMakeBorder(CubeType& InImage, int top, - int left, int bottom, int right) -{ - CubeType OutImage(InImage.n_rows + top + bottom, InImage.n_cols + left + right, InImage.n_slices); - - for(size_t i = 0; i < InImage.n_slices; ++i) - { - OutImage.slice(i).submat(top, left, InImage.n_rows + top - 1, InImage.n_cols + left - 1) - = InImage.slice(i); - - for(size_t j = 0; j < right; ++j) - { - OutImage.slice(i).col(InImage.n_cols + left + j).subvec(top, InImage.n_rows + top - 1) - = InImage.slice(i).col(InImage.n_cols - j - 1); - } - - for(int j = 0; j < left; ++j) - { - OutImage.slice(i).col(j).subvec(top, InImage.n_rows + top - 1) - = InImage.slice(i).col(left - 1 - j); - } - - for(int j = 0; j < top; j++) - { - - OutImage.slice(i).row(j) - = OutImage.slice(i).row(2 * top - 1 - j); - } - - for(int j = 0; j < bottom; j++) - { - OutImage.slice(i).row(InImage.n_rows + top + j) - = OutImage.slice(i).row(InImage.n_rows + top - j - 1); - } - - } - return OutImage; -} - -template -CubeType StructuredForests:: -RGB2LUV(CubeType& InImage) -{ - //assert type is double or float. - double a, y0, maxi; - a = pow(29.0, 3) / 27.0; - y0 = 8.0 / a; - maxi = 1.0 / 270.0; - - arma::vec table(1025); - for (size_t i = 0; i < 1025; ++i) - { - table(i) = i / 1024.0; - - if (table(i) > y0) - table(i) = 116 * pow(table(i), 1.0/3.0) - 16.0; - else - table(i) = table(i) * a; - - table(i) = table(i) * maxi; - } - - MatType rgb2xyz(3,3); - rgb2xyz(0,0) = 0.430574; rgb2xyz(0,1) = 0.430574; rgb2xyz(0,2) = 0.430574; - rgb2xyz(1,0) = 0.430574; rgb2xyz(1,1) = 0.430574; rgb2xyz(1,2) = 0.430574; - rgb2xyz(2,0) = 0.430574; rgb2xyz(2,1) = 0.430574; rgb2xyz(2,2) = 0.430574; - - //see how to calculate this efficiently. numpy.dot does this. - CubeType xyz(InImage.n_rows, InImage.n_cols, rgb2xyz.n_cols); - for(size_t i = 0; i < InImage.n_rows; ++i) - { - for(size_t j = 0; j < InImage.n_cols; ++j) - { - for(size_t k = 0; k < rgb2xyz.n_cols; ++k) - { - double s = 0.0; - for(size_t l = 0; l < InImage.n_slices; ++l) - s += InImage(i, j, l) * rgb2xyz(l, k); - xyz(i, j, k) = s; - } - } - } - - MatType nz(InImage.n_rows, InImage.n_cols); - - nz = 1.0 / ( xyz.slice(0) + (15 * xyz.slice(1) ) + - (3 * xyz.slice(2) + EPS)); - - MatType L = arma::reshape(L, xyz.n_rows, xyz.n_cols); - - MatType U, V; - U = L % (13 * 4 * (xyz.slice(0) % nz) - 13 * 0.197833) + 88 * maxi; - V = L % (13 * 9 * (xyz.slice(1) % nz) - 13 * 0.468331) + 134 * maxi; - - CubeType OutImage(InImage.n_rows, InImage.n_cols, InImage.n_slices); - OutImage.slice(0) = L; - OutImage.slice(1) = U; - OutImage.slice(2) = V; - //OutImage = arma::join_slices(L,U); - //OutImage = arma::join_slices(OutImage, V); - return OutImage; -} - -template -MatType StructuredForests:: -bilinearInterpolation(MatType const &src, - size_t height, size_t width) -{ - MatType dst(height, width); - double const x_ratio = static_cast((src.n_cols - 1)) / width; - double const y_ratio = static_cast((src.n_rows - 1)) / height; - for(size_t row = 0; row != dst.n_rows; ++row) - { - size_t y = static_cast(row * y_ratio); - double const y_diff = (row * y_ratio) - y; //distance of the nearest pixel(y axis) - double const y_diff_2 = 1 - y_diff; - for(size_t col = 0; col != dst.n_cols; ++col) - { - size_t x = static_cast(col * x_ratio); - double const x_diff = (col * x_ratio) - x; //distance of the nearet pixel(x axis) - double const x_diff_2 = 1 - x_diff; - double const y2_cross_x2 = y_diff_2 * x_diff_2; - double const y2_cross_x = y_diff_2 * x_diff; - double const y_cross_x2 = y_diff * x_diff_2; - double const y_cross_x = y_diff * x_diff; - dst(row, col) = y2_cross_x2 * src(y, x) + - y2_cross_x * src(y, x + 1) + - y_cross_x2 * src(y + 1, x) + - y_cross_x * src(y + 1, x + 1); - } - } - - return dst; -} - -template -CubeType StructuredForests:: -sepFilter2D(CubeType& InImage, arma::vec& kernel, int radius) -{ - CubeType OutImage = this->CopyMakeBorder(InImage, radius, radius, radius, radius); - - arma::vec row_res(1), col_res(1); - // reverse InImage and OutImage to avoid making an extra matrix. - for(size_t k = 0; k < OutImage.n_slices; ++k) - { - for(size_t j = radius; j < OutImage.n_cols - radius; ++j) - { - for(size_t i = radius; i < OutImage.n_rows - radius; ++i) - { - row_res = OutImage.slice(k).row(i).subvec(j - radius, j + radius) * kernel; - col_res = OutImage.slice(k).col(i).subvec(i - radius, i + radius).t() * kernel; - // divide by 2: avg of row_res and col_res, divide by 3: avg over 3 locations. - InImage(i - radius, j - radius, k) = (row_res(0) + col_res(0)) / 2 / 3; - } - } - } - - return InImage; -} - -template -CubeType StructuredForests:: -ConvTriangle(CubeType& InImage, int radius) -{ - if (radius == 0) - { - return InImage; - } - else if (radius <= 1) - { - double p = 12.0 / radius / (radius + 2) - 2; - arma::vec kernel = {1 , p, 1}; - kernel = kernel / (p + 2); - - return this->sepFilter2D(InImage, kernel, radius); - } - else - { - int len = 2 * radius + 1; - arma::vec kernel(len); - for( size_t i = 0; i < radius; ++i) - kernel(i) = i + 1; - - kernel(radius) = radius + 1; - - for( size_t i = radius + 1; i < len; ++i) - kernel(i) = i - 1; - return this->sepFilter2D(InImage, kernel, radius); - } -} - -//just a helper function, can't use it for anything else -//finds max numbers on cube axis and returns max values, -// also stores the locations of max values in Location -template -MatType StructuredForests:: -MaxAndLoc(CubeType& mag, arma::umat& Location) -{ - MatType MaxVal(Location.n_rows, Location.n_cols); - for(size_t i = 0; i < mag.n_rows; ++i) - { - for(size_t j = 0; j < mag.n_cols; ++j) - { - double max = -9999999999.0; int max_loc = 0; - for(size_t k = 0; k < mag.n_slices; ++k) - { - if(mag(i, j, k) > max) - { - max = mag(i, j, k); - MaxVal(i, j) = max; - Location(i, j) = k; - } - } - } - } - return MaxVal; -} - -template -void StructuredForests:: -Gradient(CubeType& InImage, - MatType& Magnitude, - MatType& Orientation) -{ - int grd_norm_rad = this->options["grd_norm_rad"]; - CubeType dx(InImage.n_rows, InImage.n_cols, InImage.n_slices), - dy(InImage.n_rows, InImage.n_cols, InImage.n_slices); - - dx.zeros(); - dy.zeros(); - - /* - From MATLAB documentation: - [FX,FY] = gradient(F), where F is a matrix, returns the - x and y components of the two-dimensional numerical gradient. - FX corresponds to ∂F/∂x, the differences in x (horizontal) direction. - FY corresponds to ∂F/∂y, the differences in the y (vertical) direction. - */ - - - /* - gradient calculates the central difference for interior data points. - For example, consider a matrix with unit-spaced data, A, that has - horizontal gradient G = gradient(A). The interior gradient values, G(:,j), are: - - G(:,j) = 0.5*(A(:,j+1) - A(:,j-1)); - where j varies between 2 and N-1, where N is size(A,2). - - The gradient values along the edges of the matrix are calculated with single-sided differences, so that - - G(:,1) = A(:,2) - A(:,1); - G(:,N) = A(:,N) - A(:,N-1); - - The spacing between points in each direction is assumed to be one. - */ - for (size_t i = 0; i < InImage.n_slices; ++i) - { - dx.slice(i).col(0) = InImage.slice(i).col(1) - InImage.slice(i).col(0); - dx.slice(i).col(InImage.n_cols - 1) = InImage.slice(i).col(InImage.n_cols - 1) - - InImage.slice(i).col(InImage.n_cols - 2); - - for (int j = 1; j < InImage.n_cols-1; j++) - dx.slice(i).col(j) = 0.5 * ( InImage.slice(i).col(j+1) - InImage.slice(i).col(j) ); - - // do same for dy. - dy.slice(i).row(0) = InImage.slice(i).row(1) - InImage.slice(i).row(0); - dy.slice(i).row(InImage.n_rows - 1) = InImage.slice(i).row(InImage.n_rows - 1) - - InImage.slice(i).row(InImage.n_rows - 2); - - for (int j = 1; j < InImage.n_rows-1; j++) - dy.slice(i).row(j) = 0.5 * ( InImage.slice(i).row(j+1) - InImage.slice(i).row(j) ); - } - - CubeType mag(InImage.n_rows, InImage.n_cols, InImage.n_slices); - for (size_t i = 0; i < InImage.n_slices; ++i) - { - mag.slice(i) = arma::sqrt( arma::square \ - ( dx.slice(i) + arma::square( dy.slice(i) ) ) ); - } - - arma::umat Location(InImage.n_rows, InImage.n_cols); - Magnitude = this->MaxAndLoc(mag, Location); - if(grd_norm_rad != 0) - { - //we have to do this ugly thing, or override ConvTriangle - // and sepFilter2D methods. - CubeType mag2(InImage.n_rows, InImage.n_cols, 1); - mag2.slice(0) = Magnitude; - mag2 = this->ConvTriangle(mag2, grd_norm_rad); - Magnitude = Magnitude / (mag2.slice(0) + 0.01); - } - MatType dx_mat(dx.n_rows, dx.n_cols),\ - dy_mat(dy.n_rows, dy.n_cols); - - for(size_t j = 0; j < InImage.n_cols; ++j) - { - for(size_t i = 0; i < InImage.n_rows; ++i) - { - dx_mat(i, j) = dx(i, j, Location(i, j)); - dy_mat(i, j) = dy(i, j, Location(i, j)); - } - } - Orientation = arma::atan(dy_mat / dx_mat); - Orientation.transform( [](double val) { if(val < 0) return (val + arma::datum::pi); else return (val);} ); - - for(size_t j = 0; j < InImage.n_cols; ++j) - { - for(size_t i = 0; i < InImage.n_rows; ++i) - { - if( abs(dx_mat(i, j)) + abs(dy_mat(i, j)) < 1E-5) - Orientation(i, j) = 0.5 * arma::datum::pi; - } - } -} - -template -CubeType StructuredForests:: -Histogram(MatType& Magnitude, - MatType& Orientation, - int downscale, int interp) -{ - //i don't think this function can be vectorized. - - //n_orient: number of orientations per gradient scale - int n_orient = this->options["n_orient"]; - //size of HistArr: n_rbin * n_cbin * n_orient . . . (create in caller...) - int n_rbin = (Magnitude.n_rows + downscale - 1) / downscale; - int n_cbin = (Magnitude.n_cols + downscale - 1) / downscale; - double o_range, o; - o_range = arma::datum::pi / n_orient; - - CubeType HistArr(n_rbin, n_cbin, n_orient); - HistArr.zeros(); - - int r, c, o1, o2; - for(size_t i = 0; i < Magnitude.n_rows; ++i) - { - for(size_t j = 0; j < Magnitude.n_cols; ++j) - { - r = i / downscale; - c = j / downscale; - - if( interp != 0) - { - o = Orientation(i, j) / o_range; - o1 = ((int) o) % n_orient; - o2 = (o1 + 1) % n_orient; - HistArr(r, c, o1) += Magnitude(i, j) * (1 + (int)o - o); - HistArr(r, c, o2) += Magnitude(i, j) * (o - (int) o); - } - else - { - o1 = (int) (Orientation(i, j) / o_range + 0.5) % n_orient; - HistArr(r, c, o1) += Magnitude(i, j); - } - } - } - - HistArr = HistArr / downscale; - - for (size_t i = 0; i < HistArr.n_slices; ++i) - HistArr.slice(i) = arma::square(HistArr.slice(i)); - - return HistArr; -} - -template -void StructuredForests:: -GetShrunkChannels(CubeType& InImage, CubeType& reg_ch, CubeType& ss_ch) -{ - CubeType luv = this->RGB2LUV(InImage); - - int shrink = this->options["shrink"]; - int n_orient = this->options["n_orient"]; - int grd_smooth_rad = this->options["grd_smooth_rad"]; - int grd_norm_rad = this->options["grd_norm_rad"]; - int num_channels = 13; - int rsize = luv.n_rows / shrink; - int csize = luv.n_cols / shrink; - CubeType channels(rsize, csize, num_channels); - - - int slice_idx = 0; - - for( slice_idx = 0; slice_idx < luv.n_slices; ++slice_idx) - channels.slice(slice_idx) - = this->bilinearInterpolation(luv.slice(slice_idx), (size_t)rsize, (size_t)csize); - - double scale = 0.5; - - while(scale <= 1.0) - { - CubeType img( (luv.n_rows * scale), - (luv.n_cols * scale), - luv.n_slices ); - - for( slice_idx = 0; slice_idx < luv.n_slices; ++slice_idx) - { - img.slice(slice_idx) = - this->bilinearInterpolation(luv.slice(slice_idx), - (luv.n_rows * scale), - (luv.n_cols * scale) ); - } - - CubeType OutImage = this->ConvTriangle(img, grd_smooth_rad); - - MatType Magnitude(InImage.n_rows, InImage.n_cols), - Orientation(InImage.n_rows, InImage.n_cols); - - this->Gradient(OutImage, Magnitude, Orientation); - - int downscale = std::max(1, (int) (shrink * scale)); - - CubeType Hist = this->Histogram(Magnitude, Orientation, - downscale, 0); - - channels.slice(slice_idx) = - bilinearInterpolation( Magnitude, rsize, csize); - slice_idx++; - for(size_t i = 0; i < InImage.n_slices; ++i) - channels.slice(i + slice_idx) = - bilinearInterpolation( Magnitude, rsize, csize); - slice_idx += 3; - scale += 0.5; - } - - //cout << "size of channels: " << arma::size(channels) << endl; - double reg_smooth_rad, ss_smooth_rad; - reg_smooth_rad = this->options["reg_smooth_rad"] / (double) shrink; - ss_smooth_rad = this->options["ss_smooth_rad"] / (double) shrink; - - - - - if (reg_smooth_rad > 1.0) - reg_ch = this->ConvTriangle(channels, (int) (std::round(reg_smooth_rad)) ); - else - reg_ch = this->ConvTriangle(channels, reg_smooth_rad); - - if (ss_smooth_rad > 1.0) - ss_ch = this->ConvTriangle(channels, (int) (std::round(ss_smooth_rad)) ); - else - ss_ch = this->ConvTriangle(channels, ss_smooth_rad); - -} - -template -CubeType StructuredForests:: -ViewAsWindows(CubeType& channels, arma::umat& loc) -{ - // 500 for pos_loc, and 500 for neg_loc. - // channels = 160, 240, 13. - CubeType features = CubeType(16, 16, 1000 * 13); - int patchSize = 16; - int p = patchSize / 2; - //increase the channel boundary to protect error against image boundaries. - CubeType inc_ch = this->CopyMakeBorder(channels, p, p, p, p); - for (size_t i = 0, channel = 0; i < loc.n_rows; ++i) - { - int x = loc(i, 0); - int y = loc(i, 1); - - /*(x,y) in channels, is ((x+p), (y+p)) in inc_ch*/ - //cout << "(x,y) = " << x << " " << y << endl; - CubeType patch = inc_ch.tube((x + p) - p, (y + p) - p,\ - (x + p) + p - 1, (y + p) + p - 1); - // since each patch has 13 channel we have to increase the index by 13 - - //cout <<"patch size = " << arma::size(patch) << endl; - - features.slices(channel, channel + 12) = patch; - //cout << "sahi hai " << endl; - channel += 13; - - } - //cout << "successfully returned. . ." << endl; - return features; -} - -template -CubeType StructuredForests:: -Rearrange(CubeType& channels) -{ - //we do (16,16,13*1000) to 256, 1000, 13, in vectorized code. - CubeType ch = CubeType(256, 1000, 13); - for(size_t i = 0; i < 1000; i++) - { - //MatType m(256, 13); - for(size_t j = 0; j < 13; ++j) - { - int sl = (i * j) / 1000; - //cout << "(i,j) = " << i << ", " << j << endl; - ch.slice(sl).col(i) = arma::vectorise(channels.slice(i * j)); - } - } - return ch; -} - -// returns 256 * 1000 * 13 dimension features. -template -CubeType StructuredForests:: -GetRegFtr(CubeType& channels, arma::umat& loc) -{ - int shrink = this->options["shrink"]; - int p_size = this->options["p_size"] / shrink; - CubeType wind = this->ViewAsWindows(channels, loc); - return this->Rearrange(wind); -} - -template -CubeType StructuredForests:: -PDist(CubeType& features, arma::uvec& grid_pos) -{ - // size of DestArr: - // InImage.n_rows * (InImage.n_rows - 1)/2 * InImage.n_slices - //find nC2 differences, for locations in the grid_pos. - //python: input: (716, 256, 13) --->(716, 25, 13) ; output: (716, 300, 13). - //input features : 256,1000,13; output: 300, 1000, 13 - - CubeType output(300, 1000, 13); - for(size_t k = 0; k < features.n_slices; ++k) - { - size_t r_idx = 0; - for(size_t i = 0; i < grid_pos.n_elem; ++i) //loop length : 25 - { - for(size_t j = i + 1; j < grid_pos.n_elem; ++j) //loop length : 25 - { - output.slice(k).row(r_idx) = features.slice(k).row(grid_pos(i)) - - features.slice(k).row(grid_pos(j)); - ++r_idx; - } - } - } - return output; -} - -//returns 300,1000,13 dimension features. -template -CubeType StructuredForests:: -GetSSFtr(CubeType& channels, arma::umat& loc) -{ - int shrink = this->options["shrink"]; - int p_size = this->options["p_size"] / shrink; - - //n_cell: number of self similarity cells - int n_cell = this->options["n_cell"]; - int half_cell_size = (int) round(p_size / (2.0 * n_cell)); - - arma::uvec g_pos(n_cell); - for(size_t i = 0; i < n_cell; ++i) - { - g_pos(i) = (int)round( (i + 1) * (p_size + 2 * half_cell_size \ - - 1) / (n_cell + 1.0) - half_cell_size); - } - arma::uvec grid_pos(n_cell * n_cell); - size_t k = 0; - for(size_t i = 0; i < n_cell; ++i) - { - for(size_t j = 0; j < n_cell; ++j) - { - grid_pos(k) = g_pos(i) * p_size + g_pos(j); - ++k; - } - } - - CubeType wind = this->ViewAsWindows(channels, loc); - CubeType re_wind = this->Rearrange(wind); - - return this->PDist(re_wind, grid_pos); -} - -template -arma::field StructuredForests:: -GetFeatures(MatType& image, arma::umat& loc) -{ - int row_size = this->options["row_size"]; - int col_size = this->options["col_size"]; - int bottom, right; - bottom = (4 - (image.n_rows / 3) % 4) % 4; - right = (4 - image.n_cols % 4) % 4; - //cout << "Botttom = " << bottom << " right = " << right << endl; - - CubeType InImage(image.n_rows / 3, image.n_cols, 3); - - for(size_t i = 0; i < 3; ++i) - { - InImage.slice(i) = image.submat(i * row_size, 0, \ - (i + 1) * row_size - 1, col_size - 1); - } - - CubeType OutImage = this->CopyMakeBorder(InImage, 0, 0, bottom, right); - - int num_channels = 13; - int shrink = this->options["shrink"]; - int rsize = OutImage.n_rows / shrink; - int csize = OutImage.n_cols / shrink; - - /* this part gives double free or corruption out error - when executed for a second time */ - CubeType reg_ch = CubeType(rsize, csize, num_channels); - CubeType ss_ch = CubeType(rsize, csize, num_channels); - this->GetShrunkChannels(InImage, reg_ch, ss_ch); - - loc = loc / shrink; - - CubeType reg_ftr = this->GetRegFtr(reg_ch, loc); - CubeType ss_ftr = this->GetSSFtr(ss_ch, loc); - arma::field F(2,1); - F(0,0) = reg_ftr; - F(1,0) = ss_ftr; - return F; - //delete reg_ch; - //free(reg_ch); - //free(ss_ch); -} - -template -void StructuredForests:: -PrepareData(MatType& InputData) -{ - int num_images = this->options["num_images"]; - int n_tree = this->options["n_tree"]; - int n_pos = this->options["n_pos"]; - int n_neg = this->options["n_neg"]; - double fraction = 0.25; - int p_size = this->options["p_size"]; - int g_size = this->options["g_size"]; - int shrink = this->options["shrink"]; - int row_size = this->options["row_size"]; - int col_size = this->options["col_size"]; - // p_rad = radius of image patches. - // g_rad = radius of ground truth patches. - int p_rad = p_size / 2, g_rad = g_size / 2; - - arma::vec FtrDim = this->GetFeatureDimension(); - int n_ftr_dim = FtrDim(0) + FtrDim(1); - int n_smp_ftr_dim = int(n_ftr_dim * fraction); - - for(size_t i = 0; i < n_tree; ++i) - { - //implement the logic for if data already exists. - MatType ftrs = arma::zeros(n_pos + n_neg, n_smp_ftr_dim); - - //effectively a 3d array. . . - MatType lbls = arma::zeros( (n_pos + n_neg ) * g_size, g_size); - - - int loop_iter = num_images * 5; - for(size_t j = 0; j < loop_iter; j += 5) - { - MatType img, bnds, segs; - img = InputData.submat(j * row_size, 0, (j + 3) * row_size - 1, col_size - 1); - bnds = InputData.submat( (j + 3) * row_size, 0, \ - (j + 4) * row_size - 1, col_size - 1 ); - segs = InputData.submat( (j + 4) * row_size, 0, \ - (j + 5) * row_size - 1, col_size - 1 ); - - MatType mask = arma::zeros(row_size, col_size); - for(size_t b = 0; b < mask.n_cols; b = b + shrink) - for(size_t a = 0; a < mask.n_rows; a = a + shrink) - mask(a, b) = 1; - mask.col(p_rad - 1).fill(0); - mask.row( (mask.n_rows - 1) - (p_rad - 1) ).fill(0); - mask.submat(0, 0, mask.n_rows - 1, p_rad - 1).fill(0); - mask.submat(0, mask.n_cols - p_rad, mask.n_rows - 1, - mask.n_cols - 1).fill(0); - - // number of positive or negative patches per ground truth. - //int n_patches_per_gt = (int) (ceil( (float)n_pos / num_images )); - int n_patches_per_gt = 500; - //cout << "n_patches_per_gt = " << n_patches_per_gt << endl; - MatType dis = arma::sqrt( this->dt_image(bnds, 1) ); - MatType dis2 = dis; - //dis.transform( [](double val, const int& g_rad) { return (double)(val < g_rad); } ); - //dis2.transform( [](double val, const int& g_rad) { return (double)(val >= g_rad); } ); - //dis.elem( arma::find(dis >= g_rad) ).zeros(); - //dis2.elem( arma::find(dis < g_rad) ).zeros(); - - - arma::uvec pos_loc = arma::find( (dis < g_rad) % mask ); - arma::uvec neg_loc = arma::find( (dis >= g_rad) % mask ); - - pos_loc = arma::shuffle(pos_loc); - neg_loc = arma::shuffle(neg_loc); - - arma::umat loc(n_patches_per_gt * 2, 2); - //cout << "pos_loc size: " << arma::size(pos_loc) << " neg_loc size: " << arma::size(neg_loc) << endl; - //cout << "n_patches_per_gt = " << n_patches_per_gt << endl; - for(size_t i = 0; i < n_patches_per_gt; ++i) - { - loc.row(i) = arma::ind2sub(arma::size(dis.n_rows, dis.n_cols), pos_loc(i) ).t(); - //cout << "pos_loc: " << loc(i, 0) << ", " << loc(i, 1) << endl; - } - - for(size_t i = n_patches_per_gt; i < 2 * n_patches_per_gt; ++i) - { - loc.row(i) = arma::ind2sub(arma::size(dis.n_rows, dis.n_cols), neg_loc(i) ).t(); - //cout << "neg_loc: " << loc(i, 0) << ", " << loc(i, 1) << endl; - } - - // cout << "num patches = " << n_patches_per_gt << " num elements + = " << pos_loc.n_elem\ - // << " num elements - = " << neg_loc.n_elem << " dis.size " << dis.n_elem << endl; - - //Field F contains reg_ftr and ss_ftr. - arma::field F = this->GetFeatures(img, loc); - //randomly sample 70 values each from reg_ftr and ss_ftr. - /* - CubeType ftr(140, 1000, 13); - arma::uvec r = (0, 255, 256); - arma::uvec s = (0, 299, 300); - arma::uvec rs = r.shuffle(); - arma::uvec ss = s.shuffle(); - */ - CubeType lbl(g_size, g_size, 1000); - CubeType s(segs.n_rows, segs.n_cols, 1); - s.slice(0) = segs; - CubeType in_segs = this->CopyMakeBorder(s, g_rad, - g_rad, g_rad, g_rad); - for(size_t i = 0; i < loc.n_rows; ++i) - { - int x = loc(i, 0); int y = loc(i, 1); - //cout << "x, y = " << x << " " << y << endl; - lbl.slice(i) = in_segs.slice(0).submat((x + g_rad) - g_rad, (y + g_rad) - g_rad, - (x + g_rad) + g_rad - 1, (y + g_rad) + g_rad - 1); - } - } - } -} - - -} // namespace structured_tree -} // namespace mlpack -#endif - From 8907d5aa79a526f51292e23e07f9ce349048a662 Mon Sep 17 00:00:00 2001 From: nilayjain Date: Mon, 6 Jun 2016 21:09:53 +0000 Subject: [PATCH 07/82] backported sub2ind & ind2sub --- src/mlpack/core/arma_extend/fn_ind2sub.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mlpack/core/arma_extend/fn_ind2sub.hpp b/src/mlpack/core/arma_extend/fn_ind2sub.hpp index b4bbfe70777..7991b9dc584 100644 --- a/src/mlpack/core/arma_extend/fn_ind2sub.hpp +++ b/src/mlpack/core/arma_extend/fn_ind2sub.hpp @@ -1,5 +1,6 @@ - #if (ARMA_VERSION_MAJOR < 6 && ARMA_VERSION_MINOR < 399) + #if (ARMA_VERSION_MAJOR < 6 || \ + (ARMA_VERSION_MAJOR == 6 && ARMA_VERSION_MINOR < 399)) inline uvec ind2sub(const SizeMat& s, const uword i) From b8da5c67b34e2d133a59fc5b8781412d9d8f7187 Mon Sep 17 00:00:00 2001 From: Keon Kim Date: Wed, 8 Jun 2016 02:27:45 +0900 Subject: [PATCH 08/82] fix doc tutorial --- doc/tutorials/README.md | 48 +++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/doc/tutorials/README.md b/doc/tutorials/README.md index f4114ac40f1..c75f5ab10b4 100644 --- a/doc/tutorials/README.md +++ b/doc/tutorials/README.md @@ -1,20 +1,40 @@ ## Tutorials -Tutorials for mlpack can be found [here : mlpack tutorials](http://www.mlpack.org/tutorial.html). +Tutorials for mlpack can be found [here : mlpack tutorials](http://www.mlpack.org/tutorials.html). -### Method-specific tutorials -* [NeighborSearch tutorial (mlpack_knn / mlpack_kfn)](http://www.mlpack.org/doxygen.php?doc=nstutorial.html) -* [RangeSearch tutorial (mlpack_range_search)](http://www.mlpack.org/doxygen.php?doc=rstutorial.html) -* [LinearRegression tutorial (mlpack_linear_regression)](http://www.mlpack.org/doxygen.php?doc=lrtutorial.html) -* [Density Estimation Trees tutorial (mlpack_det)](http://www.mlpack.org/doxygen.php?doc=dettutorial.html) -* [Euclidean Minimum Spanning Trees tutorial (mlpack_emst)](http://www.mlpack.org/doxygen.php?doc=emst_tutorial.html) -* [K-Means tutorial (mlpack_kmeans)](http://www.mlpack.org/doxygen.php?doc=kmtutorial.html) -* [FastMKS tutorial (mlpack_fastmks)](http://www.mlpack.org/doxygen.php?doc=fmkstutorial.html) ### General mlpack tutorials -* [Building mlpack from source](http://www.mlpack.org/doxygen.php?doc=build.html) -* [mlpack input and output](http://www.mlpack.org/doxygen.php?doc=iodoc.html) -* [Matrices in mlpack](http://www.mlpack.org/doxygen.php?doc=matrices.html) -* [Simple sample mlpack programs](http://www.mlpack.org/doxygen.php?doc=sample.html) -* [mlpack timers](http://www.mlpack.org/doxygen.php?doc=timer.html) + +These tutorials introduce the basic concepts of working with mlpack, aimed at developers who want to use and contribute to mlpack but are not sure where to start. + +* [Building mlpack from source](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=build.html) +* [File Formats in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=formatdoc.html) +* [Matrices in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=matrices.html) +* [mlpack input and output](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=iodoc.html) +* [mlpack timers](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=timer.html) +* [Simple sample mlpack programs](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=sample.html) + + +### Method-specific tutorials + +These tutorials introduce the various methods mlpack offers, aimed at users who want to get started quickly. These tutorials start with simple examples and progress to complex, extensible uses. + +* [NeighborSearch tutorial (mlpack_knn / mlpack_kfn)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=nstutorial.html) +* [LinearRegression tutorial (mlpack_linear_regression)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=lrtutorial.html) +* [RangeSearch tutorial (mlpack_range_search)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=rstutorial.html) +* [Density Estimation Trees tutorial (mlpack_det)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=dettutorial.html) +* [K-Means tutorial (mlpack_kmeans)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=kmtutorial.html) +* [FastMKS tutorial (mlpack_fastmks)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=fmkstutorial.html) +* [Euclidean Minimum Spanning Trees tutorial (mlpack_emst)](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=emst_tutorial.html) +* [Alternating Matrix Factorization Tutorial](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=amftutorial.html) +* [Collaborative Filtering Tutorial](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=cftutorial.html) + + +### Policy Class Documentation + +mlpack uses templates to achieve its genericity and flexibility. Some of the template types used by mlpack are common across multiple machine learning algorithms. The links below provide documentation for some of these common types. + +[The MetricType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=metrics.html) +[The KernelType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=kernels.html) +[The TreeType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=trees.html) \ No newline at end of file From 0d6d3af18dcabc5c5546726d3140795644a0c7eb Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 7 Jun 2016 19:32:35 +0200 Subject: [PATCH 09/82] Use appveyor cache (nuget and armadillo). --- .appveyor.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 20610299e37..3dff67faae6 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -18,8 +18,8 @@ build_script: - ps: cp C:\projects\mlpack\boost_random-vc140.1.60.0.0\lib\native\address-model-64\lib\*.* C:\projects\mlpack\boost_libs\ - ps: cp C:\projects\mlpack\boost_serialization-vc140.1.60.0.0\lib\native\address-model-64\lib\*.* C:\projects\mlpack\boost_libs\ - ps: cp C:\projects\mlpack\boost_unit_test_framework-vc140.1.60.0.0\lib\native\address-model-64\lib\*.* C:\projects\mlpack\boost_libs\ - - appveyor DownloadFile http://sourceforge.net/projects/arma/files/armadillo-6.500.5.tar.gz - - 7z x armadillo-6.500.5.tar.gz -so | 7z x -si -ttar > nul + - if not exist armadillo.tar.gz appveyor DownloadFile "http://sourceforge.net/projects/arma/files/armadillo-6.500.5.tar.gz" -FileName armadillo.tar.gz + - 7z x armadillo.tar.gz -so | 7z x -si -ttar > nul - cd armadillo-6.500.5 && mkdir build && cd build - cmake -G "Visual Studio 14 2015 Win64" -DBLAS_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DCMAKE_PREFIX:FILEPATH="%APPVEYOR_BUILD_FOLDER%/armadillo" -DBUILD_SHARED_LIBS=OFF .. - '"C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" "C:\projects\mlpack\armadillo-6.500.5\build\armadillo.sln" /m /verbosity:quiet /p:Configuration=Release;Platform=x64' @@ -40,6 +40,10 @@ notifications: on_build_failure: true on_build_status_changed: true +cache: + - packages -> **\packages.config + - armadillo.tar.gz -> appveyor.yaml + # All plans have maximum build job execution time of 60 minutes. But right, now # the machine takes 30 minutes to build the code and at least 50 minutes to run # all tests. From 45e8cd652c65974cb57d8bae54aea75496af1a4a Mon Sep 17 00:00:00 2001 From: Keon Kim Date: Wed, 8 Jun 2016 03:03:14 +0900 Subject: [PATCH 10/82] fix typo --- doc/tutorials/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/tutorials/README.md b/doc/tutorials/README.md index c75f5ab10b4..81a382b619b 100644 --- a/doc/tutorials/README.md +++ b/doc/tutorials/README.md @@ -35,6 +35,6 @@ These tutorials introduce the various methods mlpack offers, aimed at users who mlpack uses templates to achieve its genericity and flexibility. Some of the template types used by mlpack are common across multiple machine learning algorithms. The links below provide documentation for some of these common types. -[The MetricType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=metrics.html) -[The KernelType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=kernels.html) -[The TreeType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=trees.html) \ No newline at end of file +* [The MetricType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=metrics.html) +* [The KernelType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=kernels.html) +* [The TreeType policy in mlpack](http://www.mlpack.org/docs/mlpack-git/doxygen.php?doc=trees.html) From 01e699cc1d142a6d5344e60465c4ba6ef2e4e95b Mon Sep 17 00:00:00 2001 From: nilayjain Date: Tue, 7 Jun 2016 20:00:53 +0000 Subject: [PATCH 11/82] added test for ind2sub and sub2ind --- src/mlpack/tests/ind2sub_test.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mlpack/tests/ind2sub_test.cpp b/src/mlpack/tests/ind2sub_test.cpp index 14baeba9bff..aaf1e78abd7 100644 --- a/src/mlpack/tests/ind2sub_test.cpp +++ b/src/mlpack/tests/ind2sub_test.cpp @@ -1,19 +1,23 @@ #include -//#include - #include #include "old_boost_test_definitions.hpp" BOOST_AUTO_TEST_SUITE(ind2sub_test); /** - * This tests handles the case wherein only one class exists in the input - * labels. It checks whether the only class supplied was the only class - * predicted. + * This test checks whether ind2sub and sub2ind are + * compiled successfully and that they function properly. */ BOOST_AUTO_TEST_CASE(ind2sub_test) { - arma::mat A = arma::randu(5,5); - arma::uvec u = arma::ind2sub(arma::size(A), 3); - u.print(); + arma::mat A = arma::randu(4,5); + size_t index = 13; + arma::uvec u = arma::ind2sub(arma::size(A), index); + + BOOST_REQUIRE_EQUAL(u(0), index % A.n_rows); + BOOST_REQUIRE_EQUAL(u(1), index / A.n_rows); + + index = arma::sub2ind(arma::size(A), u(0), u(1)); + BOOST_REQUIRE_EQUAL(index, u(0) + u(1) * A.n_rows); } BOOST_AUTO_TEST_SUITE_END(); + From 7e8abed7ba480e8b516502e4ef905b6422888433 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Tue, 7 Jun 2016 16:52:17 -0400 Subject: [PATCH 12/82] Minor style fixes for ind2sub() test. --- src/mlpack/tests/ind2sub_test.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mlpack/tests/ind2sub_test.cpp b/src/mlpack/tests/ind2sub_test.cpp index aaf1e78abd7..ef1014be0ce 100644 --- a/src/mlpack/tests/ind2sub_test.cpp +++ b/src/mlpack/tests/ind2sub_test.cpp @@ -1,7 +1,14 @@ +/** + * @file ind2sub_test.cpp + * @author Nilay Jain + * + * Test the backported Armadillo ind2sub() and sub2ind() functions. + */ #include #include #include "old_boost_test_definitions.hpp" -BOOST_AUTO_TEST_SUITE(ind2sub_test); + +BOOST_AUTO_TEST_SUITE(ind2subTest); /** * This test checks whether ind2sub and sub2ind are @@ -12,12 +19,12 @@ BOOST_AUTO_TEST_CASE(ind2sub_test) arma::mat A = arma::randu(4,5); size_t index = 13; arma::uvec u = arma::ind2sub(arma::size(A), index); - + BOOST_REQUIRE_EQUAL(u(0), index % A.n_rows); BOOST_REQUIRE_EQUAL(u(1), index / A.n_rows); index = arma::sub2ind(arma::size(A), u(0), u(1)); BOOST_REQUIRE_EQUAL(index, u(0) + u(1) * A.n_rows); } -BOOST_AUTO_TEST_SUITE_END(); +BOOST_AUTO_TEST_SUITE_END(); From 7bbd897de0f3fe6f35b6750209f7a75912393fd1 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Tue, 7 Jun 2016 17:01:20 -0400 Subject: [PATCH 13/82] Add new contributors. --- COPYRIGHT.txt | 4 ++++ src/mlpack/core.hpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index d04af2c29cc..10a3ee2c6ef 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -55,6 +55,10 @@ Copyright: Copyright 2016, Palash Ahuja Copyright 2016, Yannis Mentekidis Copyright 2016, Ranjan Mondal + Copyright 2016, Mikhail Lozhnikov + Copyright 2016, Marcos Pividori + Copyright 2016, Keon Kim + Copyright 2016, Nilay Jain License: BSD-3-clause All rights reserved. . diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp index 9df29476922..c0cbeea1abe 100644 --- a/src/mlpack/core.hpp +++ b/src/mlpack/core.hpp @@ -189,6 +189,10 @@ * - Palash Ahuja * - Yannis Mentekidis * - Ranjan Mondal + * - Mikhail Lozhnikov + * - Marcos Pividori + * - Keon Kim + * - Nilay Jain */ // First, include all of the prerequisites. From 29fcf0af4219ecd706dbfa0c796447f01e390af3 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Tue, 7 Jun 2016 18:49:42 -0400 Subject: [PATCH 14/82] Try debugging symbols for AppVeyor build to see if it is faster. --- .appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 3dff67faae6..9c9fcbbea79 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -24,7 +24,7 @@ build_script: - cmake -G "Visual Studio 14 2015 Win64" -DBLAS_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DCMAKE_PREFIX:FILEPATH="%APPVEYOR_BUILD_FOLDER%/armadillo" -DBUILD_SHARED_LIBS=OFF .. - '"C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" "C:\projects\mlpack\armadillo-6.500.5\build\armadillo.sln" /m /verbosity:quiet /p:Configuration=Release;Platform=x64' - cd C:\projects\mlpack && mkdir build && cd build - - cmake -G "Visual Studio 14 2015 Win64" -DBLAS_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DARMADILLO_INCLUDE_DIR="C:/projects/mlpack/armadillo-6.500.5/include" -DARMADILLO_LIBRARY:FILEPATH="C:\projects\mlpack\armadillo-6.500.5\build\Debug\armadillo.lib" -DBOOST_INCLUDEDIR:PATH="C:\projects\mlpack\boost.1.60.0.0\lib\native\include" -DBOOST_LIBRARYDIR:PATH="C:\projects\mlpack\boost_libs" -DDEBUG=OFF -DPROFILE=OFF .. + - cmake -G "Visual Studio 14 2015 Win64" -DBLAS_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DARMADILLO_INCLUDE_DIR="C:/projects/mlpack/armadillo-6.500.5/include" -DARMADILLO_LIBRARY:FILEPATH="C:\projects\mlpack\armadillo-6.500.5\build\Debug\armadillo.lib" -DBOOST_INCLUDEDIR:PATH="C:\projects\mlpack\boost.1.60.0.0\lib\native\include" -DBOOST_LIBRARYDIR:PATH="C:\projects\mlpack\boost_libs" -DDEBUG=ON -DPROFILE=ON .. - '"C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" "C:\projects\mlpack\build\mlpack.sln" /m /verbosity:normal /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" /nologo /p:BuildInParallel=true /p:Configuration=Release;Platform=x64' - 7z a mlpack-windows-no-libs.zip "%APPVEYOR_BUILD_FOLDER%\build\Release\*.exe" - 7z a mlpack-windows.zip "%APPVEYOR_BUILD_FOLDER%\build\Release\*.*" "%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/*.*" From 055775d2be8a6516434de02dfac91affd2f54947 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 6 Nov 2016 23:20:43 +0100 Subject: [PATCH 15/82] Remove the RMVA model. --- src/mlpack/methods/rmva/CMakeLists.txt | 17 - src/mlpack/methods/rmva/rmva.hpp | 963 ------------------------- src/mlpack/methods/rmva/rmva_impl.hpp | 740 ------------------- src/mlpack/methods/rmva/rmva_main.cpp | 285 -------- 4 files changed, 2005 deletions(-) delete mode 100644 src/mlpack/methods/rmva/CMakeLists.txt delete mode 100644 src/mlpack/methods/rmva/rmva.hpp delete mode 100644 src/mlpack/methods/rmva/rmva_impl.hpp delete mode 100644 src/mlpack/methods/rmva/rmva_main.cpp diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt deleted file mode 100644 index ced53a30ea7..00000000000 --- a/src/mlpack/methods/rmva/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - rmva.hpp - rmva_impl.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) - -add_cli_executable(rmva) diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp deleted file mode 100644 index 5f4f03112c8..00000000000 --- a/src/mlpack/methods/rmva/rmva.hpp +++ /dev/null @@ -1,963 +0,0 @@ -/** - * @file rmva.hpp - * @author Marcus Edel - * - * Definition of the RecurrentNeuralAttention class, which implements the - * Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP -#define __MLPACK_METHODS_RMVA_RMVA_HPP - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * This class implements the Recurrent Model for Visual Attention, using a - * variety of possible layer implementations. - * - * For more information, see the following paper. - * - * @code - * @article{MnihHGK14, - * title={Recurrent Models of Visual Attention}, - * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, - * journal={CoRR}, - * volume={abs/1406.6247}, - * year={2014} - * } - * @endcode - * - * @tparam LocatorType Type of locator network. - * @tparam LocationSensorType Type of location sensor network. - * @tparam GlimpseSensorType Type of glimpse sensor network. - * @tparam GlimpseType Type of glimpse network. - * @tparam StartType Type of start network. - * @tparam FeedbackType Type of feedback network. - * @tparam TransferType Type of transfer network. - * @tparam ClassifierType Type of classifier network. - * @tparam RewardPredictorType Type of reward predictor network. - * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam MatType Matrix type (arma::mat or arma::sp_mat). - */ -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType = RandomInitialization, - typename MatType = arma::mat -> -class RecurrentNeuralAttention -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType>; - - /** - * Construct the RecurrentNeuralAttention object, which will construct the - * recurrent model for visual attentionh using the specified networks. - * - * @param locator The locator network. - * @param locationSensor The location sensor network. - * @param glimpseSensor The glimpse sensor network. - * @param glimpse The glimpse network. - * @param start The start network. - * @param feedback The feedback network. - * @param transfer The transfer network. - * @param classifier The classifier network. - * @param rewardPredictor The reward predictor network. - * @param nStep Number of steps for the back-propagate through time. - * @param initializeRule Rule used to initialize the weight matrix. - */ - template - RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule = - InitializationRuleType()); - /** - * Train the network on the given input data using the given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::mat& predictors, arma::mat& responses); - - /** - * Evaluate the network with the given parameters. This function is usually - * called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the network with the given parameters, and with - * respect to only one point in the dataset. This is useful for - * optimizers such as SGD, which require a separable objective function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - //! Return the number of steps to back-propagate through time. - const size_t& Rho() const { return nStep; } - //! Modify the number of steps to back-propagate through time. - size_t& Rho() { return nStep; } - - //! Return the current location. - const arma::mat& Location(); - - //! Serialize the model. - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const InputType& input, OutputType& output) - { - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Sample an initial starting actions by forwarding zeros through the - // locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - output = std::get::value - 1>( - classifier).OutputParameter(); - } - - /** - * Update the layer reward for all layer that implement the Rewards function. - */ - template - typename std::enable_if::type - ResetReward(const double reward, std::tuple& network) - { - SetReward(reward, std::get(network)); - ResetReward(reward, network); - } - - template - typename std::enable_if::type - ResetReward(const double /* reward */, std::tuple& /* network */) - { - } - - template - typename std::enable_if< - HasRewardCheck::value, void>::type - SetReward(const double reward, T& layer) - { - layer.Reward() = reward; - } - - template - typename std::enable_if< - !HasRewardCheck::value, void>::type - SetReward(const double /* reward */, T& /* layer */) - { - /* Nothing to do here */ - } - - /** - * Reset the network by clearing the delta and by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* layer */) { /* Nothing to do here */ } - - /** - * Reset the location by updating the location for all layer that implement - * the Location function. - */ - template - typename std::enable_if::type - ResetLocation(const arma::mat& /* location */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - typename std::enable_if::type - ResetLocation(const arma::mat& location, std::tuple& network) - { - SetLocation(std::get(network), location); - ResetLocation(location, network); - } - - template - typename std::enable_if< - HasLocationCheck::value, void>::type - SetLocation(T& layer, const arma::mat& location) - { - layer.Location(location); - } - - template - typename std::enable_if< - !HasLocationCheck::value, void>::type - SetLocation(T& /* layer */, const arma::mat& /* location */) - { - // Nothing to do here. - } - - /** - * Save the network layer activations. - */ - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activations, - std::tuple& network, - size_t& activationCounter) - { - Save(I, activations, std::get(network), - std::get(network).InputParameter()); - - activationCounter++; - SaveActivations(activations, network, activationCounter); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activations */, - std::tuple& /* network */, - size_t& /* activationCounter */) - { - // Nothing to do here. - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.RecurrentParameter())); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.OutputParameter())); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - std::get(network), std::get(network).OutputParameter()); - - SaveActivations( - activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - void Save(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* unused */) - { - activationsA.push_back(new DataTypeA(layer.OutputParameter())); - dataTypeACounter++; - } - - template - void Save(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* unused */) - { - activationsB.push_back(new DataTypeB(layer.OutputParameter())); - dataTypeBCounter++; - } - - /** - * Load the network layer activations. - */ - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activations */, - size_t& /* activationCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activations, - size_t& activationCounter, - std::tuple& network) - { - Load(--activationCounter, activations, - std::get(network), - std::get(network).InputParameter()); - - LoadActivations(input, activations, - activationCounter, network); - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.RecurrentParameter() = activations[layerNumber]; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.OutputParameter() = activations[layerNumber]; - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Load(activationsA, - activationsB, - dataTypeACounter, - dataTypeBCounter, - std::get(network), - std::get(network).OutputParameter()); - - LoadActivations( - input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - void Load(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* output */) - { - layer.OutputParameter() = activationsA[--dataTypeACounter]; - } - - template - void Load(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* output */) - { - layer.OutputParameter() = activationsB[--dataTypeBCounter]; - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& t) - { - std::get(t).InputParameter() = input; - std::get(t).Forward(std::get(t).InputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& t) - { - std::get(t).Forward(std::get(t).OutputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - /** - * Run a single iteration of the backward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - } - - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - - BackwardTail(error, t); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* error */, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - - BackwardTail(error, t); - } - - /** - * Link the calculated activation with the correct layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template - void UpdateGradients(const InputType& input, - const ErrorType& error, - std::tuple& network) - { - Update(std::get<0>(network), - input, - std::get<1>(network).Delta(), - std::get<1>(network).OutputParameter()); - - UpdateGradients<1, ErrorType, Tp...>(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta(), - std::get(network).OutputParameter()); - - UpdateGradients(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - error, - std::get(network).OutputParameter()); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(LayerType& layer, - const InputType& input, - const ErrorType& error, - GradientType& /* gradient */) - { - layer.Gradient(input, error, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(LayerType& /* layer */, - const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - // Nothing to do here - } - - //! The locator network. - LocatorType locator; - - //! The location sensor network. - LocationSensorType locationSensor; - - //! The glimpse sensor network. - GlimpseSensorType glimpseSensor; - - //! The glimpse network. - GlimpseType glimpse; - - //! The start network. - StartType start; - - //! The feedback network. - FeedbackType feedback; - - //! The transfer network. - TransferType transfer; - - //! The classifier network. - ClassifierType classifier; - - //! The reward predictor network. - RewardPredictorType rewardPredictor; - - //! The number of steps for the back-propagate through time. - size_t nStep; - - //! Locally stored network input size. - size_t inputSize; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! The index of the current step. - size_t step; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse network. - boost::ptr_vector glimpseActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator network. - boost::ptr_vector locatorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the feedback network. - boost::ptr_vector feedbackActivations; - - //! The activation storage we are using to save the feedback network input. - boost::ptr_vector feedbackActivationsInput; - - //! The activation storage we are using to perform the feed backward pass for - //! the transfer network. - boost::ptr_vector transferActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the location sensor network. - boost::ptr_vector locationSensorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse sensor network. - boost::ptr_vector glimpseSensorMatActivations; - boost::ptr_vector glimpseSensorCubeActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator input. - boost::ptr_vector locatorInput; - - //! The storage we are using to save the location. - boost::ptr_vector location; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatCounter; - size_t glimpseSensorCubeCounter; - - //! The current number of activations in the glimpse network. - size_t glimpseActivationsCounter; - - //! The current number of activations in the glimpse start network. - size_t startActivationsCounter; - - //! The current number of activations in the feedback network. - size_t feedbackActivationsCounter; - - //! The current number of activations in the transfer network. - size_t transferActivationsCounter; - - //! The current number of activations in the locator network. - size_t locatorActivationsCounter; - - //! The current number of activations in the location sensor network. - size_t locationSensorActivationsCounter; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatActivationsCounter; - size_t glimpseSensorCubeActivationsCounter; - - //! The current number of location for the location storage. - size_t locationCounter; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::mat predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Storage the merge the reward input. - arma::field rewardInput; - - //! The current input. - arma::cube input; - - //! The current target. - arma::mat target; - - //! Locally stored performance functions. - NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction; - VRClassRewardLayer<> vRClassRewardFunction; - - //! Locally stored size of the locator network. - size_t locatorSize; - - //! Locally stored size of the location sensor network. - size_t locationSensorSize; - - //! Locally stored size of the glimpse sensor network. - size_t glimpseSensorSize; - - //! Locally stored size of the glimpse network. - size_t glimpseSize; - - //! Locally stored size of the start network. - size_t startSize; - - //! Locally stored size of the feedback network. - size_t feedbackSize; - - //! Locally stored size of the transfer network. - size_t transferSize; - - //! Locally stored size of the classifier network. - size_t classifierSize; - - //! Locally stored size of the reward predictor network. - size_t rewardPredictorSize; - - //! Locally stored recurrent gradient. - arma::mat recurrentGradient; - - //! Locally stored action error. - arma::mat actionError; - - //! Locally stored current location. - arma::mat evaluationLocation; -}; // class RecurrentNeuralAttention - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "rmva_impl.hpp" - -#endif diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp deleted file mode 100644 index cfb310b3993..00000000000 --- a/src/mlpack/methods/rmva/rmva_impl.hpp +++ /dev/null @@ -1,740 +0,0 @@ -/** - * @file rmva_impl.hpp - * @author Marcus Edel - * - * Implementation of the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP -#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP - -// In case it hasn't been included yet. -#include "rmva.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template< - typename TypeLocator, - typename TypeLocationSensor, - typename TypeGlimpseSensor, - typename TypeGlimpse, - typename TypeStart, - typename TypeFeedback, - typename TypeTransfer, - typename TypeClassifier, - typename TypeRewardPredictor -> -RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule) : - locator(std::forward(locator)), - locationSensor(std::forward(locationSensor)), - glimpseSensor(std::forward(glimpseSensor)), - glimpse(std::forward(glimpse)), - start(std::forward(start)), - feedback(std::forward(feedback)), - transfer(std::forward(transfer)), - classifier(std::forward(classifier)), - rewardPredictor(std::forward(rewardPredictor)), - nStep(nStep), - inputSize(0) -{ - // Set the network size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize + - glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1); - - // Set the network weights. - NetworkWeights(initializeRule, parameter, this->locator); - NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize); - NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkWeights(initializeRule, parameter, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkWeights(initializeRule, parameter, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(initializeRule, parameter, this->classifier, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize); - NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(initializeRule, parameter, this->start, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize + rewardPredictorSize); - - rewardInput = arma::field(2, 1); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template class OptimizerType> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Predict(arma::mat& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < predictors.n_cols; i++) - { - SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp); - responses.col(i) = responsesTemp.col(0); - } -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -double RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - input = arma::cube(predictors.colptr(i), 28, 28, 1); - target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - glimpseSensorMatCounter = 0; - glimpseSensorCubeCounter = 0; - glimpseActivationsCounter = 0; - locatorActivationsCounter = 0; - locationSensorActivationsCounter = 0; - glimpseSensorMatActivationsCounter = 0; - glimpseSensorCubeActivationsCounter = 0; - locationCounter = 0; - feedbackActivationsCounter = 0; - transferActivationsCounter = 0; - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Reset activation storage. - glimpseActivations.clear(); - locatorActivations.clear(); - locationSensorActivations.clear(); - glimpseSensorMatActivations.clear(); - glimpseSensorCubeActivations.clear(); - feedbackActivations.clear(); - transferActivations.clear(); - locatorInput.clear(); - location.clear(); - feedbackActivationsInput.clear(); - - // Sample an initial starting actions by forwarding zeros through the locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - SaveActivations(locatorActivations, locator, locatorActivationsCounter); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - SaveActivations(locationSensorActivations, locationSensor, - locationSensorActivationsCounter); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Save the location for the backward path. - location.push_back(new arma::mat(std::get::value - 1>(locator).OutputParameter())); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations, - glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - SaveActivations(feedbackActivations, feedback, - feedbackActivationsCounter); - - feedbackActivationsInput.push_back(new arma::mat( - std::get::value - 1>( - transfer).OutputParameter().memptr(), - std::get::value - 1>( - transfer).OutputParameter().n_rows, - std::get::value - 1>( - transfer).OutputParameter().n_cols)); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - // Reward predictor forward pass. - Forward(std::get::value - 1>( - classifier).OutputParameter(), rewardPredictor); - - double performanceError = negativeLogLikelihoodFunction.Forward( - std::get::value - 1>( - classifier).OutputParameter(), target); - - // Create the input for the vRClassRewardFunction function. - // For which we use the output from the classifier and the rewardPredictor. - rewardInput(0, 0) = std::get::value - 1>( - classifier).OutputParameter(); - rewardInput(1, 0) = std::get::value - 1>( - rewardPredictor).OutputParameter(); - - performanceError += vRClassRewardFunction.Forward(rewardInput, target); - - return performanceError; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - // Reset the gradient. - if (gradient.is_empty()) - { - gradient = arma::zeros(parameter.n_rows, parameter.n_cols); - } - else - { - gradient.zeros(); - } - - // Reset the recurrent gradient. - if (recurrentGradient.is_empty()) - { - recurrentGradient = arma::zeros(parameter.n_rows, - parameter.n_cols); - - actionError = arma::zeros( - std::get::value - 1>( - locator).OutputParameter().n_rows, - std::get::value - 1>( - locator).OutputParameter().n_cols); - } - else - { - recurrentGradient.zeros(); - } - - // Set the recurrent gradient. - NetworkGradients(recurrentGradient, this->locator); - NetworkGradients(recurrentGradient, this->locationSensor, locatorSize); - NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkGradients(recurrentGradient, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkGradients(recurrentGradient, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkGradients(recurrentGradient, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - - // Set the gradient. - NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkGradients(gradient, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkGradients(gradient, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - - // Negative log likelihood backward pass. - negativeLogLikelihoodFunction.Backward(std::get::value - 1>(classifier).OutputParameter(), target, - negativeLogLikelihoodFunction.OutputParameter()); - - const double reward = vRClassRewardFunction.Backward(rewardInput, target, - vRClassRewardFunction.OutputParameter()); - - // Propogate reward through all modules. - ResetReward(reward, locator); - ResetReward(reward, locationSensor); - ResetReward(reward, glimpseSensor); - ResetReward(reward, glimpse); - ResetReward(reward, classifier); - - // RewardPredictor backward pass. - Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor); - - arma::mat classifierError = - negativeLogLikelihoodFunction.OutputParameter() + - vRClassRewardFunction.OutputParameter()(0, 0) + - std::get<0>(rewardPredictor).Delta(); - - // Classifier backward pass. - Backward(classifierError, classifier); - - // Set the initial recurrent error for the first backward step. - arma::mat recurrentError = std::get<0>(classifier).Delta(); - - for (step = nStep - 1; nStep >= 0; step--) - { - // Load the locator activations. - LoadActivations(locatorInput[step], locatorActivations, - locatorActivationsCounter, locator); - - // Load the location sensor activations. - LoadActivations(std::get::value - 1>( - locator).OutputParameter(), locationSensorActivations, - locationSensorActivationsCounter, locationSensor); - - // Load the glimpse sensor activations. - LoadActivations(input, glimpseSensorMatActivations, - glimpseSensorCubeActivations, glimpseSensorMatCounter, - glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location and glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Load the glimpse activations. - LoadActivations(concatLayerOutput, glimpseActivations, - glimpseActivationsCounter, glimpse); - - - if (step == 0) - { - // Load the transfer activations. - LoadActivations(std::get::value - 1>( - start).OutputParameter(), transferActivations, - transferActivationsCounter, transfer); - } - else - { - // Load the feedback activations. - LoadActivations(std::get::value - 1>( - transfer).OutputParameter(), feedbackActivations, - feedbackActivationsCounter, feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Load the transfer activations. - LoadActivations(feedbackLayerOutput, transferActivations, - transferActivationsCounter, transfer); - } - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(location[step], glimpseSensor); - - // Locator backward pass. - Backward(actionError, locator); - - // Transfer backward pass. - Backward(recurrentError, transfer); - - // glimpse network - Backward(std::get<0>(transfer).Delta(), glimpse); - - // Split up the error of the concat layer. - arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat( - 0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0); - arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat( - std::get<0>(glimpse).Delta().n_elem / 2, 0, - std::get<0>(glimpse).Delta().n_elem - 1, 0); - - // Location sensor backward pass. - Backward(locationSensorError, locationSensor); - - // Glimpse sensor backward pass. - Backward(glimpseSensorError, glimpseSensor); - - if (step != 0) - { - // Feedback backward pass. - Backward(std::get<0>(transfer).Delta(), feedback); - } - - // Update the recurrent network gradients. - UpdateGradients(std::get<0>(locationSensor).Delta(), locator); - UpdateGradients(std::get<0>(transfer).Delta(), glimpse); - UpdateGradients(std::get<0>(transfer).Delta(), locationSensor); - UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor); - - // Feedback module. - if (step != 0) - { - UpdateGradients(feedbackActivationsInput[step - 1], - std::get<0>(transfer).Delta(), feedback); - } - else - { - // Set the feedback gradient to zero. - recurrentGradient.submat(locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros(); - - UpdateGradients(std::get<0>(transfer).Delta(), start); - } - - // Update the overall recurrent gradient. - gradient += recurrentGradient; - - if (step != 0) - { - // Update the recurrent error for the next backward step. - recurrentError = std::get<0>(locator).Delta() + - std::get<0>(feedback).Delta(); - } - else - { - break; - } - } - - // Reward predictor gradient update. - UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0), - rewardPredictor); - - // Classifier gradient update. - UpdateGradients(std::get<1>(classifier).Delta(), classifier); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -const arma::mat& RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Location() -{ - if (!location.empty()) - { - evaluationLocation = arma::mat(location[0].n_elem, location.size()); - - for (size_t i = 0; i < location.size(); i++) - { - evaluationLocation.col(i) = arma::vectorise(location[i]); - } - } - - return evaluationLocation; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(inputSize, "inputSize"); - ar & data::CreateNVP(nStep, "nStep"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - // Set the netork size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - // Set the network weights. - NetworkWeights(parameter, this->locator); - NetworkWeights(parameter, this->locationSensor, locatorSize); - NetworkWeights(parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize + - glimpseSensorSize); - NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize); - NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkWeights(parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(parameter, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp deleted file mode 100644 index a3483d63de6..00000000000 --- a/src/mlpack/methods/rmva/rmva_main.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/** - * @file rmva_main.cpp - * @author Marcus Edel - * - * Main executable for the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include "rmva.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -using namespace mlpack; -using namespace mlpack::ann; -using namespace mlpack::optimization; -using namespace std; - -PROGRAM_INFO("Recurrent Model for Visual Attention", - "This program trains the Recurrent Model for Visual Attention on the given " - "labeled training set, or loads a model from the given model file, and then" - " may use that trained model to classify the points in a given test set." - "\n\n" - "Labels are expected to be passed in separately as their own file " - "(--labels_file). If training is not desired, a pre-existing model can be " - "loaded with the --input_model_file (-m) option." - "\n\n" - "If classifying a test set is desired, the test set should be in the file " - "specified with the --test_file (-T) option, and the classifications will " - "be saved to the file specified with the --output_file (-o) option. If " - "saving a trained model is desired, the --output_model_file (-M) option " - "should be given."); - -// Model loading/saving. -PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for " - "Visual Attention.", "m", ""); -PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for" - " Visual Attention to.", "M"); - -// Training parameters. -PARAM_MATRIX_IN("training", "Matrix containing the training set.", "t"); -PARAM_MATRIX_IN("labels", "Matrix containing labels for the training set.", - "l"); - -PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or " - "'lbfgs'.", "O", "minibatch-sgd"); - -PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp" - " (0 indicates no limit).", "n", 500000); -PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or " - "RMSProp.", "e", 1e-7); - -PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent " - "(alpha),", "a", 0.01); -PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are " - "visited for SGD or mini-batch SGD.", "L"); -PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20); - -PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r", - 7); - -PARAM_INT_IN("classes", "The number of classes.", "c", 10); - -PARAM_INT_IN("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0); - -// Test parameters. -PARAM_MATRIX_IN("test", "Matrix containing the test set.", "T"); -PARAM_MATRIX_OUT("output", "The matrix in which the predicted labels for the " - "test set will be written.", "o"); - -int main(int argc, char** argv) -{ - CLI::ParseCommandLine(argc, argv); - - // Check input parameters. - if (CLI::HasParam("training") && CLI::HasParam("input_model_file")) - Log::Fatal << "Cannot specify both --training_file (-t) and " - << "--input_model_file (-m)!" << endl; - - if (!CLI::HasParam("training") && !CLI::HasParam("input_model_file")) - Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are" - << " specified!" << endl; - - if (!CLI::HasParam("training") && CLI::HasParam("labels")) - Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is " - << "not specified." << endl; - - if (!CLI::HasParam("output") && !CLI::HasParam("output_model_file")) - Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) " - << "specified; no output will be saved!" << endl; - - if (CLI::HasParam("output") && !CLI::HasParam("test")) - Log::Warn << "--output_file (-o) ignored because no test file specified " - << "with --test_file (-T)." << endl; - - if (!CLI::HasParam("output") && CLI::HasParam("test")) - Log::Warn << "--test_file (-T) specified, but classification results will " - << "not be saved because --output_file (-o) is not specified." << endl; - - const string optimizerType = CLI::GetParam("optimizer"); - - if ((optimizerType != "sgd") && (optimizerType != "lbfgs") && - (optimizerType != "minibatch-sgd")) - { - Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be " - << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl; - } - - const double stepSize = CLI::GetParam("step_size"); - const size_t maxIterations = (size_t) CLI::GetParam("max_iterations"); - const double tolerance = CLI::GetParam("tolerance"); - const bool shuffle = !CLI::HasParam("linear_scan"); - const size_t batchSize = (size_t) CLI::GetParam("batch_size"); - const size_t rho = (size_t) CLI::GetParam("rho"); - const size_t numClasses = (size_t) CLI::GetParam("classes"); - - const size_t hiddenSize = 256; - const double unitPixels = 13; - const double locatorStd = 0.11; - const size_t imageSize = 28; - const size_t locatorHiddenSize = 128; - const size_t glimpsePatchSize = 8; - const size_t glimpseDepth = 1; - const size_t glimpseScale = 2; - const size_t glimpseHiddenSize = 128; - const size_t imageHiddenSize = 256; - - - // Locator network. - LinearMappingLayer<> linearLayer0(hiddenSize, 2); - BiasLayer<> biasLayer0(2, 1); - HardTanHLayer<> hardTanhLayer0; - ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd); - HardTanHLayer<> hardTanhLayer1; - MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize); - auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0, - reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0); - - // Location sensor network. - LinearLayer<> linearLayer1(2, locatorHiddenSize); - BiasLayer<> biasLayer1(locatorHiddenSize, 1); - ReLULayer<> rectifierLayer0; - auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0); - - // Glimpse sensor network. - GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale); - LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize); - BiasLayer<> biasLayer2(glimpseHiddenSize, 1); - ReLULayer<> rectifierLayer1; - auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2, - rectifierLayer1); - - // Glimpse network. - LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize, - imageHiddenSize); - BiasLayer<> biasLayer3(imageHiddenSize, 1); - ReLULayer<> rectifierLayer2; - LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize); - BiasLayer<> biasLayer4(hiddenSize, 1); - auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2, - linearLayer4, biasLayer4); - - // Feedback network. - LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize); - BiasLayer<> recurrentLayerBias0(hiddenSize, 1); - auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0); - - // Start network. - AdditionLayer<> startLayer0(hiddenSize, 1); - auto start = std::tie(startLayer0); - - // Transfer network. - ReLULayer<> rectifierLayer3; - auto transfer = std::tie(rectifierLayer3); - - // Classifier network. - LinearLayer<> linearLayer5(hiddenSize, numClasses); - BiasLayer<> biasLayer6(numClasses, 1); - LogSoftmaxLayer<> logSoftmaxLayer0; - auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0); - - // Reward predictor network. - ConstantLayer<> constantLayer0(1, 1); - AdditionLayer<> additionLayer0(1, 1); - auto rewardPredictor = std::tie(constantLayer0, additionLayer0); - - // Recurrent Model for Visual Attention. - RecurrentNeuralAttention - net(locator, locationSensor, glimpseSensor, glimpse, start, feedback, - transfer, classifier, rewardPredictor, rho); - - // Either we have to train a model, or load a model. - if (CLI::HasParam("training")) - { - arma::mat trainingData = std::move(CLI::GetParam("training")); - - arma::mat labels; - - // Did the user pass in labels? - if (CLI::HasParam("labels")) - { - // Load labels. - labels = std::move(CLI::GetParam("labels")); - - // Do the labels need to be transposed? - if (labels.n_cols == 1) - labels = labels.t(); - } - - // Now run the optimization. - if (optimizerType == "sgd") - { - SGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - else if (optimizerType == "minibatch-sgd") - { - MiniBatchSGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - opt.BatchSize() = batchSize; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - } - else - { - // Load the model from file. - data::Load(CLI::GetParam("input_model_file"), "rmva_model", net); - } - - // Do we need to do testing? - if (CLI::HasParam("test")) - { - arma::mat testingData = std::move(CLI::GetParam("test")); - - // Time the running of the Naive Bayes Classifier. - arma::mat results; - Timer::Start("rmva_testing"); - net.Predict(testingData, results); - Timer::Stop("rmva_testing"); - - if (CLI::HasParam("output")) - CLI::GetParam("output") = std::move(results); - } - - // Save the model, if requested. - if (CLI::HasParam("output_model_file")) - data::Save(CLI::GetParam("output_model_file"), "rmva_model", net); -} From 6d73df28fc8788c5254f2d267fde771f5ff4d54a Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 7 Nov 2016 22:13:00 +0100 Subject: [PATCH 16/82] Remove unused ann functions. --- src/mlpack/methods/ann/cnn.hpp | 448 ------------------ src/mlpack/methods/ann/cnn_impl.hpp | 289 ----------- src/mlpack/methods/ann/network_traits.hpp | 55 --- src/mlpack/methods/ann/network_util.hpp | 247 ---------- src/mlpack/methods/ann/network_util_impl.hpp | 286 ----------- .../ann/performance_functions/CMakeLists.txt | 17 - .../performance_functions/cee_function.hpp | 74 --- .../performance_functions/mse_function.hpp | 61 --- .../performance_functions/sparse_function.hpp | 141 ------ .../performance_functions/sse_function.hpp | 64 --- src/mlpack/tests/network_util_test.cpp | 149 ------ .../tests/performance_functions_test.cpp | 54 --- 12 files changed, 1885 deletions(-) delete mode 100644 src/mlpack/methods/ann/cnn.hpp delete mode 100644 src/mlpack/methods/ann/cnn_impl.hpp delete mode 100644 src/mlpack/methods/ann/network_traits.hpp delete mode 100644 src/mlpack/methods/ann/network_util.hpp delete mode 100644 src/mlpack/methods/ann/network_util_impl.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/CMakeLists.txt delete mode 100644 src/mlpack/methods/ann/performance_functions/cee_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/mse_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/sparse_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/sse_function.hpp delete mode 100644 src/mlpack/tests/network_util_test.cpp delete mode 100644 src/mlpack/tests/performance_functions_test.cpp diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp deleted file mode 100644 index 72e0803f179..00000000000 --- a/src/mlpack/methods/ann/cnn.hpp +++ /dev/null @@ -1,448 +0,0 @@ -/** - * @file cnn.hpp - * @author Shangtong Zhang - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_HPP -#define MLPACK_METHODS_ANN_CNN_HPP - -#include - -#include -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard convolutional network. - * - * @tparam LayerTypes Contains all layer modules used to construct the network. - * @tparam OutputLayerType The outputlayer type used to evaluate the network. - * @tparam PerformanceFunction Performance strategy used to calculate the error. - */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> -> -class CNN -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = CNN; - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network) and the given optimizer. - * Optionally, specify which initialize rule and performance function should - * be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template class OptimizerType> - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - /** - * Train the convolutional neural network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, const arma::mat& responses); - - /** - * Train the convolutional neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(OptimizerType& optimizer); - - /** - * Train the convolutional neural network on the given input data using the - * given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::cube& predictors, arma::mat& responses); - - /** - * Evaluate the convolutional neural network with the given parameters. This - * function is usually called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the convolutional neural network with the given - * parameters, and with respect to only one point in the dataset. This is - * useful for optimizers such as SGD, which require a separable objective - * function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - /** - * Serialize the convolutional neural network. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /** - * Reset the network by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - /** - * Link the calculated activation with the connection layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } - - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } - - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated convolutional neural network. - LayerTypes network; - - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; - - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::cube predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Locally stored backward error. - arma::mat error; - - //! Locally stored sample size. - size_t sampleSize; -}; // class CNN - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "cnn_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/cnn_impl.hpp b/src/mlpack/methods/ann/cnn_impl.hpp deleted file mode 100644 index ba774ba3097..00000000000 --- a/src/mlpack/methods/ann/cnn_impl.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file cnn_impl.hpp - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_IMPL_HPP -#define MLPACK_METHODS_ANN_CNN_IMPL_HPP - -// In case it hasn't been included yet. -#include "cnn.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - - -template -template class OptimizerType -> -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - Train(predictors, responses); -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, const arma::mat& responses) -{ - numFunctions = predictors.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = responses.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template< - template class OptimizerType -> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Predict(arma::cube& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - ResetParameter(network); - Forward(predictors.slices(0, sampleSize - 1), network); - OutputPrediction(responsesTemp, network); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_slices); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < (predictors.n_slices / sampleSize); i++) - { - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); - responses.col(i) = responsesTemp.col(0); - } -} - -template -double CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - ResetParameter(network); - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - NetworkGradients(gradient, network); - - Backward<>(error, network); - UpdateGradients<>(network); -} - -template -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(sampleSize, "sampleSize"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - NetworkWeights(parameter, network); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/network_traits.hpp b/src/mlpack/methods/ann/network_traits.hpp deleted file mode 100644 index 5aa91e8ae60..00000000000 --- a/src/mlpack/methods/ann/network_traits.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/** - * @file network_traits.hpp - * @author Marcus Edel - * - * NetworkTraits class, a template class to get information about various - * networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP -#define MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP - -namespace mlpack { -namespace ann { - -/** - * This is a template class that can provide information about various - * networks. By default, this class will provide the weakest possible - * assumptions on networks, and each network should override values as - * necessary. If a network doesn't need to override a value, then there's no - * need to write a NetworkTraits specialization for that class. - */ -template -class NetworkTraits -{ - public: - /** - * This is true if the network is a feed forward neural network. - */ - static const bool IsFNN = false; - - /** - * This is true if the network is a recurrent neural network. - */ - static const bool IsRNN = false; - - /** - * This is true if the network is a convolutional neural network. - */ - static const bool IsCNN = false; - - /** - * This is true if the network is a sparse autoencoder. - */ - static const bool IsSAE = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp deleted file mode 100644 index 93bdf044355..00000000000 --- a/src/mlpack/methods/ann/network_util.hpp +++ /dev/null @@ -1,247 +0,0 @@ -/** - * @file network_util.hpp - * @author Marcus Edel - * - * Neural network utilities. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP - -#include - -#include - -/** - * Neural network utility functions. - */ -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Auxiliary function to get the number of weights of the specified network. - * - * @param network The network used for specifying the number of weights. - * @return The number of weights. - */ -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -/** - * Auxiliary function to get the number of weights of the specified layer. - * - * @param layer The layer used for specifying the number of weights. - * @param output The layer output parameter. - * @return The number of weights. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network. - * - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer. - * - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output); - -/** - * Auxiliary function to set the gradients of the specified network. - * - * @param gradients The gradients used to set the gradient of the network. - * @param network The network used to set the gradients. - * @param offset The memory offset of the gradients. - * return The number of gradients. - */ -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the gradients of the specified layer. - * - * @param layer The layer used to set the gradients. - * @param gradients The gradients used to set the gradient of the layer. - * @param offset The memory offset of the gradients. - * @param output The output parameter of the layer. - * @return The number of gradients. - */ -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output); - -/** - * Auxiliary function to get the input size of the specified network. - * - * @param network The network used for specifying the input size. - * @return The input size. - */ -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -/** - * Auxiliary function to get the input size of the specified layer. - * - * @param layer The layer used for specifying the input size. - * @param output The layer output parameter. - * @return The input size. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network using a given - * initialize rule. - * - * @param initializeRule The rule used to initialize the network weights. - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer using the given - * initialize rule. - * - * @param initializeRule The rule used to initialize the layer weights. - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - P& output); - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "network_util_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/network_util_impl.hpp b/src/mlpack/methods/ann/network_util_impl.hpp deleted file mode 100644 index 32034576290..00000000000 --- a/src/mlpack/methods/ann/network_util_impl.hpp +++ /dev/null @@ -1,286 +0,0 @@ -/** - * @file network_util_impl.hpp - * @author Marcus Edel - * - * Implementation of the network auxiliary functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP - -#include "network_util_impl.hpp" - -#include - -namespace mlpack { -namespace ann { - -template -typename std::enable_if::type -NetworkSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkSize(std::tuple& network) -{ - return LayerSize(std::get(network), std::get( - network).OutputParameter()) + NetworkSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(weights, network, - offset + LayerWeights(std::get(network), weights, - offset, std::get(network).OutputParameter())); - -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* unused */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* unused */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset) -{ - NetworkGradients(gradients, network, - offset + LayerGradients(std::get(network), gradients, - offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& /* unused */) -{ - layer.Gradient() = arma::mat(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& /* unused */) -{ - layer.Gradient() = arma::cube(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network) -{ - const size_t inputSize = LayerInputSize(std::get(network), std::get( - network).OutputParameter()); - - if (inputSize) - { - return inputSize; - } - - return NetworkInputSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_cols; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(initializeRule, weights, - network, offset + LayerWeights(initializeRule, std::get(network), - weights, offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& /* initializeRule */, - arma::mat& /* weights */, - std::tuple& /* network */, - size_t /* offset */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* output */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* output */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& /* initializeRule */, - T& /* layer */, - arma::mat& /* weights */, - size_t /* offset */, - P& /* output */) -{ - return 0; -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt b/src/mlpack/methods/ann/performance_functions/CMakeLists.txt deleted file mode 100644 index c64f7263cea..00000000000 --- a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - mse_function.hpp - sse_function.hpp - cee_function.hpp - sparse_function.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp deleted file mode 100644 index 34244521dc8..00000000000 --- a/src/mlpack/methods/ann/performance_functions/cee_function.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/** - * @file cee_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the cross-entropy error performance - * function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cross-entropy error performance function measures the network's - * performance according to the cross entropy errors. The log in the cross- - * entropy take sinto account the closeness of a prediction and is a more - * granular way to calculate the error. - * - * @tparam Layer The layer that is connected with the output layer. - */ -template< - class Layer = LinearLayer< > -> -class CrossEntropyErrorFunction -{ - public: - /** - * Computes the cross-entropy error function.. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the cross-entropy error function. - * - * @param input Input data. - * @param target Target data. - * @return cross-entropy error. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - if (LayerTraits::IsBinary) - return -arma::dot(arma::trunc_log(arma::abs(target - input)), target); - - return -arma::dot(arma::trunc_log(input), target); - } - -}; // class CrossEntropyErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp deleted file mode 100644 index d2f19334d6f..00000000000 --- a/src/mlpack/methods/ann/performance_functions/mse_function.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/** - * @file mse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the mean squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The mean squared error performance function measures the network's - * performance according to the mean of squared errors. - */ -class MeanSquaredErrorFunction -{ - public: - /** - * Computes the mean squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the mean squared error function. - * - * @param input Input data. - * @param target Target data. - * @return mean of squared errors. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - return arma::mean(arma::mean(arma::square(target - input))); - } - -}; // class MeanSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp deleted file mode 100644 index 145a0b64aff..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/** - * @file sparse_function.hpp - * @author Siddharth Agrawal - * @author Tham Ngap Wei - * - * Definition and implementation of the sparse performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ - -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cost function design for the sparse autoencoder. - */ -template -class SparseErrorFunction -{ - public: - /** - * Computes the cost of sparse autoencoder. - * - * @param lambda L2-regularization parameter. - * @param beta KL divergence parameter. - * @param rho Sparsity parameter. - */ - SparseErrorFunction(const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - lambda(lambda), beta(beta), rho(rho) - { - // Nothing to do here. - } - - SparseErrorFunction(SparseErrorFunction &&layer) noexcept - { - *this = std::move(layer); - } - - SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept - { - lambda = layer.lambda; - beta = layer.beta; - rho = layer.rho; - - return *this; - } - - //! Get the KL divergence parameter. - double Beta() const { return beta; } - //! Modify the KL divergence parameter. - void Beta(double value) { beta = value;} - - //! Get the L2-regularization parameter. - double Lambda() const { return lambda; } - //! Modify the L2-regularization parameter. - void Lambda(double value) { lambda = value;} - - //! Get the sparsity parameter. - double Rho() const { return rho; } - //! Modify the sparsity parameter. - void Rho(double value) { rho = value;} - - /** - * Computes the cost of sparse autoencoder. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const Tp& network, - const InType& target, const InType &error) - { - return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(), - std::get<3>(network).RhoCap(), target, error); - } - - /** - * Computes the cost of sparse autoencoder. - * - * @param w1 weights of hidden layer - * @param w2 weights of output layer - * @param rhoCap Average activations of the hidden layer - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const InType& w1, const InType& w2, - const InType& rhoCap, const InType& target, - const InType& error) - { - // Calculate squared L2-norms of w1 and w2. - const double wL2SquaredNorm = - arma::accu(w1 % w1) + arma::accu(w2 % w2); - - // Calculate the reconstruction error, the regularization cost and the KL - // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm - // of the reconstructed data difference. 'weightDecay' is the squared l2-norm - // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer - // activations not being low. It is given by the following formula: - // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap))) - const double sumOfSquaresError = - 0.5 * arma::accu(error % error) / target.n_cols; - - const double weightDecay = 0.5 * lambda * wL2SquaredNorm; - const double klDivergence = - beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) * - arma::trunc_log((1 - rho) / (1 - rhoCap))); - - // The cost is the sum of the terms calculated above. - return sumOfSquaresError + weightDecay + klDivergence; - } - - private: - //! Locally stored L2-regularization parameter. - double lambda; - - //! Locally stored KL divergence parameter. - double beta; - - //! Locally stored sparsity parameter. - double rho; - -}; // class SparseErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp deleted file mode 100644 index 34055fb74c0..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sse_function.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/** - * @file sse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the sum squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The sum squared error performance function measures the network's performance - * according to the sum of squared errors. - */ -class SumSquaredErrorFunction -{ - public: - /** - * Computes the sum squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, - const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the sum squared error function. - * - * @param input Input data. - * @param target Target data. - * @return sum of squared errors. - */ - template - static double Error(const DataType& input, - const DataType& target, - const DataType&) - { - return arma::sum(arma::square(target - input)); - } - -}; // class SumSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/tests/network_util_test.cpp b/src/mlpack/tests/network_util_test.cpp deleted file mode 100644 index 4f0fcf105e0..00000000000 --- a/src/mlpack/tests/network_util_test.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/** - * @file network_util_test.cpp - * @author Marcus Edel - * - * Simple tests for things in the network_util file. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(NetworkUtilTest); - -/** - * Test the network size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(weightNetwork), 1100); -} - -/** - * Test the layer size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerSize(baseLayer, baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(10, 10); - BOOST_REQUIRE_EQUAL(LayerSize(linearLayer, - linearLayer.OutputParameter()), 100); -} - -/** - * Test the network input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkInputSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(5, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(weightNetwork), 5); -} - -/** - * Test the layer input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerInputSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerInputSize(baseLayer, - baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(5, 10); - BOOST_REQUIRE_EQUAL(LayerInputSize(linearLayer, - linearLayer.OutputParameter()), 5); -} - -/** - * Test the network weight auxiliary function using the given initialization - * rule. - */ -BOOST_AUTO_TEST_CASE(NetworkWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - arma::mat parameter = arma::zeros(1100, 1); - - // Create the network. - auto network = std::tie(linearLayer1, linearLayer2); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - NetworkWeights(constantInit, parameter, network); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer2.Weights()), 1000); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 1100); -} - -/** - * Test the layer weight auxiliary function using the given initialization rule. - */ -BOOST_AUTO_TEST_CASE(LayerWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - - arma::mat parameter = arma::zeros(100, 1); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - arma::mat output; - LayerWeights(constantInit, linearLayer1, parameter, 0, output); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 100); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/performance_functions_test.cpp b/src/mlpack/tests/performance_functions_test.cpp deleted file mode 100644 index 35902911422..00000000000 --- a/src/mlpack/tests/performance_functions_test.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file performance_functions_test.cpp - * @author Marcus Edel - * - * Tests for the various performance functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PerformanceFunctionsTest); - -// Test the mean squared error performance function. -BOOST_AUTO_TEST_CASE(MeanSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(MeanSquaredErrorFunction::Error(input, target), 0.5); -} - -// Test the cross entropy performance function. -BOOST_AUTO_TEST_CASE(CrossEntropyErrorTest) -{ - arma::colvec input; - input << std::exp(-2.0) << std::exp(-1.0); - arma::colvec target = arma::ones(2); - - BOOST_REQUIRE_EQUAL(CrossEntropyErrorFunction<>::Error(input, target), 3); -} - -// Test the sum squared error performance function. -BOOST_AUTO_TEST_CASE(SumSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(SumSquaredErrorFunction::Error(input, target), 4); -} - -BOOST_AUTO_TEST_SUITE_END(); From 5051083792b5c8c4a80911eebbece55d898129eb Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 8 Nov 2016 22:46:11 +0100 Subject: [PATCH 17/82] Remove unused ann layer. --- src/mlpack/methods/ann/layer/bias_layer.hpp | 208 -------- .../ann/layer/binary_classification_layer.hpp | 106 ---- .../methods/ann/layer/constant_layer.hpp | 121 ----- src/mlpack/methods/ann/layer/conv_layer.hpp | 324 ------------ .../methods/ann/layer/dropconnect_layer.hpp | 361 ------------- .../methods/ann/layer/dropout_layer.hpp | 252 --------- src/mlpack/methods/ann/layer/empty_layer.hpp | 133 ----- .../methods/ann/layer/glimpse_layer.hpp | 484 ------------------ .../{hard_tanh_layer.hpp => hard_tanh.hpp} | 0 src/mlpack/methods/ann/layer/linear_layer.hpp | 289 ----------- .../methods/ann/layer/log_softmax_layer.hpp | 131 ----- src/mlpack/methods/ann/layer/lstm_layer.hpp | 418 --------------- .../layer/multiclass_classification_layer.hpp | 98 ---- .../ann/layer/multiply_constant_layer.hpp | 113 ---- .../methods/ann/layer/one_hot_layer.hpp | 96 ---- .../methods/ann/layer/pooling_layer.hpp | 267 ---------- .../methods/ann/layer/recurrent_layer.hpp | 192 ------- .../ann/layer/reinforce_normal_layer.hpp | 139 ----- .../methods/ann/layer/softmax_layer.hpp | 114 ----- .../methods/ann/layer/sparse_bias_layer.hpp | 177 ------- .../methods/ann/layer/sparse_input_layer.hpp | 180 ------- .../methods/ann/layer/sparse_output_layer.hpp | 227 -------- .../ann/layer/vr_class_reward_layer.hpp | 171 ------- 23 files changed, 4601 deletions(-) delete mode 100644 src/mlpack/methods/ann/layer/bias_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/binary_classification_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/constant_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/conv_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/dropconnect_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/dropout_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/empty_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/glimpse_layer.hpp rename src/mlpack/methods/ann/layer/{hard_tanh_layer.hpp => hard_tanh.hpp} (100%) delete mode 100644 src/mlpack/methods/ann/layer/linear_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/log_softmax_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/lstm_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/multiply_constant_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/one_hot_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/pooling_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/recurrent_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/softmax_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_bias_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_input_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_output_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp deleted file mode 100644 index 0be535dec7b..00000000000 --- a/src/mlpack/methods/ann/layer/bias_layer.hpp +++ /dev/null @@ -1,208 +0,0 @@ -/** - * @file bias_layer.hpp - * @author Marcus Edel - * - * Definition of the BiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard bias layer. The BiasLayer class represents a - * single layer of a neural network. - * - * A convenient typedef is given: - * - * - 2DBiasLayer - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class BiasLayer -{ - public: - /** - * Create the BiasLayer object using the specified number of units and bias - * parameter. - * - * @param outSize The number of output units. - * @param bias The bias value. - */ - BiasLayer(const size_t outSize, const double bias = 1) : - outSize(outSize), - bias(bias) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + (weights * bias); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = input; - for (size_t s = 0; s < input.n_slices; s++) - { - output.slice(s) += weights(s) * bias; - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const arma::Mat& /* input */, - const ErrorType& error, - GradientType& gradient) - { - gradient = error * bias; - } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(bias, "bias"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored bias value. - double bias; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class BiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::cube -> -using BiasLayer2D = BiasLayer; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -using AdditionLayer = BiasLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp deleted file mode 100644 index 1b3d6172a74..00000000000 --- a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp +++ /dev/null @@ -1,106 +0,0 @@ -/** - * @file binary_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the BinaryClassificationLayer class, which implements a - * binary class classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a binary classification layer that can be used as - * output layer. - */ -class BinaryClassificationLayer -{ - public: - /** - * Create the BinaryClassificationLayer object. - * - * @param confidence The confidence used for the output class transformation. - */ - BinaryClassificationLayer(const double confidence = 0.5) : - confidence(confidence) - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - - for (size_t i = 0; i < output.n_elem; i++) - output(i) = output(i) > confidence ? 1 : 0; - } - - //! Get the confidence parameter. - double const& Confidence() const { return confidence; } - //! Modify the confidence parameter. - double& Confidence() { return confidence; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(confidence, "confidence"); - } - - private: - double confidence; - -}; // class BinaryClassificationLayer - -//! Layer traits for the binary class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant_layer.hpp deleted file mode 100644 index 31da87e7d9d..00000000000 --- a/src/mlpack/methods/ann/layer/constant_layer.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/** - * @file constant_layer.hpp - * @author Marcus Edel - * - * Definition of the ConstantLayer class, which outputs a constant value given - * any input. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the constant layer. The constant layer outputs a given - * constant value given any input value. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class ConstantLayer -{ - public: - /** - * Create the ConstantLayer object that outputs a given constant scalar value - * given any input value. - * - * @param outSize The number of output units. - * @param scalar The constant value used to create the constant output. - */ - ConstantLayer(const size_t outSize, const double scalar) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } - - /** - * Ordinary feed forward pass of a neural network. The forward pass fills the - * output with the specified constant parameter. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& /* input */, arma::Mat& output) - { - output = constantOutput; - } - - /** - * Ordinary feed backward pass of a neural network. The backward pass of the - * constant layer is returns always a zero output error matrix. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& /* input */, - const arma::Mat& /* gy */, - arma::Mat& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols); - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } - - private: - //! Locally-stored constant output matrix. - OutputDataType constantOutput; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class ConstantLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp deleted file mode 100644 index bbb918c8a49..00000000000 --- a/src/mlpack/methods/ann/layer/conv_layer.hpp +++ /dev/null @@ -1,324 +0,0 @@ -/** - * @file conv_layer.hpp - * @author Marcus Edel - * - * Definition of the ConvLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP - -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the ConvLayer class. The ConvLayer class represents a - * single layer of a neural network. - * - * @tparam ForwardConvolutionRule Convolution to perform forward process. - * @tparam BackwardConvolutionRule Convolution to perform backward process. - * @tparam GradientConvolutionRule Convolution to calculate gradient. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename ForwardConvolutionRule = NaiveConvolution, - typename BackwardConvolutionRule = NaiveConvolution, - typename GradientConvolutionRule = NaiveConvolution, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class ConvLayer -{ - public: - /** - * Create the ConvLayer object using the specified number of input maps, - * output maps, filter size, stride and padding parameter. - * - * @param inMaps The number of input maps. - * @param outMaps The number of output maps. - * @param wfilter Width of the filter/kernel. - * @param wfilter Height of the filter/kernel. - * @param xStride Stride of filter application in the x direction. - * @param yStride Stride of filter application in the y direction. - * @param wPad Spatial padding width of the input. - * @param hPad Spatial padding height of the input. - */ - ConvLayer(const size_t inMaps, - const size_t outMaps, - const size_t wfilter, - const size_t hfilter, - const size_t xStride = 1, - const size_t yStride = 1, - const size_t wPad = 0, - const size_t hPad = 0) : - wfilter(wfilter), - hfilter(hfilter), - inMaps(inMaps), - outMaps(outMaps), - xStride(xStride), - yStride(yStride), - wPad(wPad), - hPad(hPad) - { - weights.set_size(wfilter, hfilter, inMaps * outMaps); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad); - const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad); - - output = arma::zeros >(wConv, hConv, outMaps); - for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++) - { - arma::Mat convOutput; - ForwardConvolutionRule::Convolution(input.slice(inMap), - weights.slice(outMap), convOutput); - - output.slice(outMap) += convOutput; - } - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++) - { - for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter, - output); - - g.slice(outMap) += output; - } - } - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The input parameter used for calculating the gradient. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Cube& d, - arma::Cube& g) - { - g = arma::zeros >(weights.n_rows, weights.n_cols, - weights.n_slices); - - for (size_t outMap = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps) - { - arma::Cube inputSlices = input.slices(inMap, inMap); - arma::Cube deltaSlices = d.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output); - - for (size_t i = 0; i < output.n_slices; i++) - g.slice(s) += output.slice(i); - } - } - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(wfilter, "wfilter"); - ar & data::CreateNVP(hfilter, "hfilter"); - ar & data::CreateNVP(inMaps, "inMaps"); - ar & data::CreateNVP(outMaps, "outMaps"); - ar & data::CreateNVP(xStride, "xStride"); - ar & data::CreateNVP(yStride, "yStride"); - ar & data::CreateNVP(wPad, "wPad"); - ar & data::CreateNVP(hPad, "hPad"); - } - - private: - /* - * Rotates a 3rd-order tesor counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); - - // * left-right flip, up-down flip */ - for (size_t s = 0; s < output.n_slices; s++) - output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); - } - - /* - * Rotates a dense matrix counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Mat& input, arma::Mat& output) - { - // * left-right flip, up-down flip */ - output = arma::fliplr(arma::flipud(input)); - } - - /* - * Return the convolution output size. - * - * @param size The size of the input (row or column). - * @param k The size of the filter (width or height). - * @param s The stride size (x or y direction). - * @param p The size of the padding (width or height). - * @return The convolution output size. - */ - size_t ConvOutSize(const size_t size, - const size_t k, - const size_t s, - const size_t p) - { - return std::floor(size + p * 2 - k) / s + 1; - } - - //! Locally-stored filter/kernel width. - size_t wfilter; - - //! Locally-stored filter/kernel height. - size_t hfilter; - - //! Locally-stored number of input maps. - size_t inMaps; - - //! Locally-stored number of output maps. - size_t outMaps; - - //! Locally-stored stride of the filter in x-direction. - size_t xStride; - - //! Locally-stored stride of the filter in y-direction. - size_t yStride; - - //! Locally-stored padding width. - size_t wPad; - - //! Locally-stored padding height. - size_t hPad; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class ConvLayer - -//! Layer traits for the convolution layer. -template< - typename ForwardConvolutionRule, - typename BackwardConvolutionRule, - typename GradientConvolutionRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp deleted file mode 100644 index fdb14cbf11c..00000000000 --- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/** - * @file dropconnect_layer.hpp - * @author Palash Ahuja - * - * Definition of the DropConnectLayer class, which implements a regularizer - * that randomly sets connections to zero. Preventing units from co-adapting. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP - -#include - -#include "empty_layer.hpp" -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The DropConnect layer is a regularizer that randomly with probability - * ratio sets the connection values to zero and scales the remaining - * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) - * when deterministic is false. In the deterministic mode(during testing), - * the layer just computes the output. The output is computed according - * to the input layer. If no input layer is given, it will take a linear layer - * as default. - * - * Note: - * During training you should set deterministic to false and during testing - * you should set deterministic to true. - * - * For more information, see the following. - * - * @code - * @inproceedings{WanICML2013, - * title={Regularization of Neural Networks using DropConnect}, - * booktitle = {Proceedings of the 30th International Conference on Machine - * Learning(ICML - 13)}, - * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and - * Rob Fergus}, - * year = {2013} - * } - * @endcode - * - * @tparam InputLayer Layer used instead of the internal linear layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template< - typename InputLayer = EmptyLayer, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class DropConnectLayer -{ - public: - /** - * Creates the DropConnect Layer as a Linear Object that takes input size, - * output size and ratio as parameter. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - * @param ratio The probability of setting a value to zero. - */ - DropConnectLayer (const size_t inSize, - const size_t outSize, - const double ratio = 0.5) : - inSize(inSize), - outSize(outSize), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(false) - { - weights.set_size(outSize, inSize); - } - - /** - * Create the DropConnectLayer object using the specified ratio and rescale - * parameter. This takes the - * - * @param ratio The probability of setting a connection to zero. - * @param inputLayer the layer object that the dropconnect connection would take. - */ - template - DropConnectLayer(InputLayerType &&inputLayer, - const double ratio = 0.5) : - baseLayer(std::forward(inputLayer)), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(true) - { - static_assert(std::is_same::type, - InputLayer>::value, - "The type of the inputLayer must be InputLayerType"); - } - /** - * Ordinary feed forward pass of the DropConnect layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat &input, arma::Mat &output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (uselayer) - { - baseLayer.Forward(input, output); - } - else - { - output = weights * input; - } - } - else - { - if (uselayer) - { - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(baseLayer.Weights().n_rows, - baseLayer.Weights().n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = baseLayer.Weights(); - - baseLayer.Weights() = baseLayer.Weights() % mask; - - baseLayer.Forward(input, output); - } - else - { - // Scale the input / ( 1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(weights.n_rows, weights.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = weights; - - weights = weights % mask; - output = weights * input; - } - - output = output * scale; - } - } - - /** - * Ordinary feed backward pass of the DropConnect layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, const DataType& gy, DataType& g) - { - if (uselayer) - { - baseLayer.Backward(input, gy, g); - } - else - { - g = weights.t() * gy; - } - } - - /** - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - if (uselayer) - { - baseLayer.Gradient(input, d, g); - - // Denoise the weights. - baseLayer.Weights() = denoise; - } - else - { - g = d * input.t(); - - // Denoise the weights. - weights = denoise; - } - } - - //! Get the weights. - OutputDataType const& Weights() const - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Modify the weights. - OutputDataType& Weights() - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Get the input parameter. - InputDataType &InputParameter() const - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Modify the input parameter. - InputDataType &InputParameter() - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Get the output parameter. - OutputDataType &OutputParameter() const - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Modify the output parameter. - OutputDataType &OutputParameter() - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Get the delta. - OutputDataType const& Delta() const - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Modify the delta. - OutputDataType& Delta() - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Get the gradient. - OutputDataType const& Gradient() const - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! Modify the gradient. - OutputDataType& Gradient() - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! The value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - - //! Modify the value of the deterministic parameter. - bool &Deterministic() { return deterministic; } - - //! The probability of setting a value to zero. - double Ratio() const { return ratio; } - - //! Modify the probability of setting a value to zero. - void Ratio(const double r) - { - ratio = r; - scale = 1.0 / (1.0 - ratio); - } - -private: - //! Locally-stored layer object. - InputLayer baseLayer; - - //! Locally stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! The probability of setting a value to zero. - double ratio; - - //! The scale fraction. - double scale; - - //! If true the default layer is used otherwise a new layer will be created. - bool uselayer; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored mast object. - OutputDataType mask; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; - - //! Denoise mask for the weights. - OutputDataType denoise; -}; // class DropConnectLayer. - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout_layer.hpp deleted file mode 100644 index 3ed0bd62a60..00000000000 --- a/src/mlpack/methods/ann/layer/dropout_layer.hpp +++ /dev/null @@ -1,252 +0,0 @@ -/** - * @file dropout_layer.hpp - * @author Marcus Edel - * - * Definition of the DropoutLayer class, which implements a regularizer that - * randomly sets units to zero. Preventing units from co-adapting. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The dropout layer is a regularizer that randomly with probability ratio - * sets input values to zero and scales the remaining elements by factor 1 / - * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when - * deterministic is false. In the deterministic mode (during testing), the layer - * just scales the output. - * - * Note: During training you should set deterministic to false and during - * testing you should set deterministic to true. - * - * For more information, see the following. - * - * @code - * @article{Hinton2012, - * author = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, - * Ilya Sutskever, Ruslan Salakhutdinov}, - * title = {Improving neural networks by preventing co-adaptation of feature - * detectors}, - * journal = {CoRR}, - * volume = {abs/1207.0580}, - * year = {2012}, - * } - * @endcode - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class DropoutLayer -{ - public: - - /** - * Create the DropoutLayer object using the specified ratio and rescale - * parameter. - * - * @param ratio The probability of setting a value to zero. - * @param rescale If true the input is rescaled when deterministic is False. - */ - DropoutLayer(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of the dropout layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } - - /** - * Ordinary feed forward pass of the dropout layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols, - input.n_slices); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } - - /** - * Ordinary feed backward pass of the dropout layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const DataType& gy, - DataType& g) - { - g = gy % mask * scale; - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the detla. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! The value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - //! The probability of setting a value to zero. - double Ratio() const { return ratio; } - - //! Modify the probability of setting a value to zero. - void Ratio(const double r) - { - ratio = r; - scale = 1.0 / (1.0 - ratio); - } - - //! The value of the rescale parameter. - bool Rescale() const {return rescale; } - //! Modify the value of the rescale parameter. - bool& Rescale() {return rescale; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored mast object. - OutputDataType mask; - - //! The probability of setting a value to zero. - double ratio; - - //! The scale fraction. - double scale; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; - - //! If true the input is rescaled when deterministic is False. - bool rescale; -}; // class DropoutLayer - -//! Layer traits for the bias layer. -template < - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard Dropout-Layer2D. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using DropoutLayer2D = DropoutLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp deleted file mode 100644 index cf5a70e43ae..00000000000 --- a/src/mlpack/methods/ann/layer/empty_layer.hpp +++ /dev/null @@ -1,133 +0,0 @@ -/** - * @file empty_layer.hpp - * @author Palash Ahuja - * - * Definition of the EmptyLayer class, which is basically empty. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP - -namespace mlpack{ -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the EmptyLayer class. The EmptyLayer class represents a - * single layer which is mainly used as placeholder. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class EmptyLayer -{ - public: - /** - * Creates the empty layer object. All the methods are - * empty as well. - */ - EmptyLayer() { /* Nothing to do here. */ } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType& /* input */, OutputType& /* output */) - { - /* Nothing to do here. */ - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* input */, - const ErrorType& /* gy */, - GradientType& /* g */) - { - /* Nothing to do here. */ - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - /* Nothing to do here. */ - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class EmptyLayer - -} //namespace ann -} //namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp deleted file mode 100644 index 3f1e9dffabb..00000000000 --- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp +++ /dev/null @@ -1,484 +0,0 @@ -/** - * @file glimpse_layer.hpp - * @author Marcus Edel - * - * Definition of the GlimpseLayer class, which takes an input image and a - * location to extract a retina-like representation of the input image at - * different increasing scales. - * - * For more information, see the following. - * - * @code - * @article{CoRR2014, - * author = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, - * title = {Recurrent Models of Visual Attention}, - * journal = {CoRR}, - * volume = {abs/1406.6247}, - * year = {2014}, - * } - * @endcode - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The glimpse layer returns a retina-like representation - * (down-scaled cropped images) of increasing scale around a given location in a - * given image. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class GlimpseLayer -{ - public: - - /** - * Create the GlimpseLayer object using the specified ratio and rescale - * parameter. - * - * @param inSize The size of the input units. - * @param size The used glimpse size (height = width). - * @param depth The number of patches to crop per glimpse. - * @param scale The scaling factor used to create the increasing retina-like - * representation. - */ - GlimpseLayer(const size_t inSize, - const size_t size, - const size_t depth = 3, - const size_t scale = 2) : - inSize(inSize), - size(size), - depth(depth), - scale(scale) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of the glimpse layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(size, size, depth * input.n_slices); - - inputDepth = input.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - input.n_rows + padSize * 2, input.n_cols + padSize * 2, - input.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1, - padSize + input.n_cols - 1) = input.subcube(0, 0, - inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - output.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, output.slice(j)); - } - else - { - ReSampling(poolingInput, output.slice(j)); - } - } - } - } - } - } - - /** - * Ordinary feed backward pass of the glimpse layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& input, - const ErrorType& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - g = arma::zeros(inputParameter.n_rows, inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputParameter.n_rows + padSize * 2, inputParameter.n_cols + - padSize * 2, inputParameter.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputParameter.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - g += inputPadded.tube(padSize, padSize, padSize + - inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1); - } - } - - Transform(g); - } - - //! Get the input parameter. - InputDataType& InputParameter() const {return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const {return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the detla. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Set the locationthe x and y coordinate of the center of the output - //! glimpse. - void Location(const arma::mat& location) - { - this->location = location; - } - - private: - /* - * Transform the given input by changing rows to columns. - * - * @param w The input matrix used to perform the transformation. - */ - void Transform(arma::mat& w) - { - arma::mat t = w; - - for (size_t i = 0, k = 0; i < w.n_elem; k++) - { - for (size_t j = 0; j < w.n_cols; j++, i++) - { - w(k, j) = t(i); - } - } - } - - /* - * Transform the given input by changing rows to columns. - * - * @param w The input matrix used to perform the transformation. - */ - void Transform(arma::cube& w) - { - for (size_t i = 0; i < w.n_slices; i++) - { - arma::mat t = w.slice(i); - Transform(t); - w.slice(i) = t; - } - } - - /** - * Apply pooling to the input and store the results to the output parameter. - * - * @param kSize the kernel size used to perform the pooling operation. - * @param input The input to be apply the pooling rule. - * @param output The pooled result. - */ - template - void Pooling(const size_t kSize, - const arma::Mat& input, - arma::Mat& output) - { - - const size_t rStep = kSize; - const size_t cStep = kSize; - - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - output(i / rStep, j / cStep) += pooling.Pooling( - input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1))); - } - } - } - - /** - * Apply unpooling to the input and store the results. - * - * @param input The input to be apply the unpooling rule. - * @param error The error used to perform the unpooling operation. - * @param output The pooled result. - */ - template - void Unpooling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - const size_t rStep = input.n_rows / error.n_rows; - const size_t cStep = input.n_cols / error.n_cols; - - arma::Mat unpooledError; - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)); - - pooling.Unpooling(inputArea, error(i / rStep, j / cStep), - unpooledError); - - output(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)) += unpooledError; - } - } - } - - /** - * Apply ReSampling to the input and store the results in the output - * parameter. - * - * @param input The input to be apply the ReSampling rule. - * @param output The pooled result. - */ - template - void ReSampling(const arma::Mat& input, arma::Mat& output) - { - double wRatio = (double) (input.n_rows - 1) / (size - 1); - double hRatio = (double) (input.n_cols - 1) / (size - 1); - - double iWidth = input.n_rows - 1; - double iHeight = input.n_cols - 1; - - for (size_t y = 0; y < size; y++) - { - for (size_t x = 0; x < size; x++) - { - double ix = wRatio * x; - double iy = hRatio * y; - - // Get the 4 nearest neighbors. - double ixNw = std::floor(ix); - double iyNw = std::floor(iy); - double ixNe = ixNw + 1; - double iySw = iyNw + 1; - - // Get surfaces to each neighbor. - double se = (ix - ixNw) * (iy - iyNw); - double sw = (ixNe - ix) * (iy - iyNw); - double ne = (ix - ixNw) * (iySw - iy); - double nw = (ixNe - ix) * (iySw - iy); - - // Calculate the weighted sum. - output(y, x) = input(iyNw, ixNw) * nw + - input(iyNw, std::min(ixNe, iWidth)) * ne + - input(std::min(iySw, iHeight), ixNw) * sw + - input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se; - } - } - } - - /** - * Apply DownwardReSampling to the input and store the results into the output - * parameter. - * - * @param input The input to be apply the DownwardReSampling rule. - * @param error The error used to perform the DownwardReSampling operation. - * @param output The DownwardReSampled result. - */ - template - void DownwardReSampling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - double iWidth = input.n_rows - 1; - double iHeight = input.n_cols - 1; - - double wRatio = iWidth / (size - 1); - double hRatio = iHeight / (size - 1); - - for (size_t y = 0; y < size; y++) - { - for (size_t x = 0; x < size; x++) - { - double ix = wRatio * x; - double iy = hRatio * y; - - // Get the 4 nearest neighbors. - double ixNw = std::floor(ix); - double iyNw = std::floor(iy); - double ixNe = ixNw + 1; - double iySw = iyNw + 1; - - // Get surfaces to each neighbor. - double se = (ix - ixNw) * (iy - iyNw); - double sw = (ixNe - ix) * (iy - iyNw); - double ne = (ix - ixNw) * (iySw - iy); - double nw = (ixNe - ix) * (iySw - iy); - - double ograd = error(y, x); - - output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd; - output(iyNw, std::min(ixNe, iWidth)) = output(iyNw, - std::min(ixNe, iWidth)) + ne * ograd; - output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight), - ixNw) + sw * ograd; - output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output( - std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd; - } - } - } - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored depth of the input. - size_t inputDepth; - - //! The size of the input units. - size_t inSize; - - //! The used glimpse size (height = width). - size_t size; - - //! The number of patches to crop per glimpse. - size_t depth; - - //! The scale fraction. - size_t scale; - - //! The x and y coordinate of the center of the output glimpse. - arma::mat location; - - //! Locally-stored object to perform the mean pooling operation. - MeanPooling pooling; -}; // class GlimpseLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/hard_tanh_layer.hpp rename to src/mlpack/methods/ann/layer/hard_tanh.hpp diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp deleted file mode 100644 index b3b3dbf0266..00000000000 --- a/src/mlpack/methods/ann/layer/linear_layer.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file linear_layer.hpp - * @author Marcus Edel - * - * Definition of the LinearLayer class also known as fully-connected layer or - * affine transformation. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the LinearLayer class. The LinearLayer class represents a - * single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LinearLayer -{ - public: - /** - * Create the LinearLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - LinearLayer(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Mat& output) - { - arma::Mat data(input.n_elem, 1); - - for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++) - { - for (size_t i = 0; i < data.n_cols; i++, c++) - { - data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) * - input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise( - input.slice(c), 1)); - } - } - - output = weights * data; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = weights.t() * gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const InputType& input, - const ErrorType& error, - GradientType& gradient) - { - GradientDelta(input, error, gradient); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } - - private: - /* - * Calculate the gradient using the output delta (3rd order tensor) and the - * input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - arma::Mat data = arma::Mat(d.n_cols, - input.n_elem / d.n_cols); - - for (size_t s = 0, c = 0; s < input.n_slices / - data.n_rows; s++) - { - for (size_t i = 0; i < data.n_rows; i++, c++) - { - data.row(i).subvec(s * input.n_rows * - input.n_cols, (s + 1) * - input.n_rows * - input.n_cols - 1) = arma::vectorise( - input.slice(c), 1); - } - } - - g.slice(0) = d * data / d.n_cols; - } - - /* - * Calculate the gradient (3rd order tensor) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, g.slice(0)); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Mat& g) - { - arma::Cube grad = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, grad); - g = grad.slice(0); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Mat& g) - { - g = d * input.t(); - } - - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class LinearLayer - -/** - * Linear Mapping layer to map between 3rd order tensors and dense matrices. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::mat -> -using LinearMappingLayer = LinearLayer; - -//! Layer traits for the linear layer. -template< - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp deleted file mode 100644 index 2b417e32b61..00000000000 --- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp +++ /dev/null @@ -1,131 +0,0 @@ -/** - * @file log_softmax_layer.hpp - * @author Marcus Edel - * - * Definition of the LogSoftmaxLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the log softmax layer. The log softmax loss layer computes - * the multinomial logistic loss of the softmax of its inputs. This layer is - * meant to be used in combination with the negative log likelihood layer - * (NegativeLogLikelihoodLayer), which expects that the input contains - * log-probabilities for each class. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LogSoftmaxLayer -{ - public: - /** - * Create the LogSoftmaxLayer object. - */ - LogSoftmaxLayer() { /* Nothing to do here. */ } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& input, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - InputDataType& Delta() const { return delta; } - //! Modify the delta. - InputDataType& Delta() { return delta; } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class LogSoftmaxLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp deleted file mode 100644 index 6ccd2fc65fd..00000000000 --- a/src/mlpack/methods/ann/layer/lstm_layer.hpp +++ /dev/null @@ -1,418 +0,0 @@ -/** - * @file lstm_layer.hpp - * @author Marcus Edel - * - * Definition of the LSTMLayer class, which implements a lstm network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a lstm network layer. - * - * This class allows specification of the type of the activation functions used - * for the gates and cells and also of the type of the function used to - * initialize and update the peephole weights. - * - * @tparam GateActivationFunction Activation function used for the gates. - * @tparam StateActivationFunction Activation function used for the state. - * @tparam OutputActivationFunction Activation function used for the output. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - class GateActivationFunction = LogisticFunction, - class StateActivationFunction = TanhFunction, - class OutputActivationFunction = TanhFunction, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LSTMLayer -{ - public: - /** - * Create the LSTMLayer object using the specified parameters. - * - * @param outSize The number of output units. - * @param peepholes The flag used to indicate if peephole connections should - * be used (Default: false). - * @param WeightInitRule The weight initialization rule used to initialize the - * weight matrix. - */ - LSTMLayer(const size_t outSize, const bool peepholes = false) : - outSize(outSize), - peepholes(peepholes), - seqLen(1), - offset(0) - { - if (peepholes) - { - peepholeWeights.set_size(outSize, 3); - peepholeDerivatives = arma::zeros(outSize, 3); - } - else - { - peepholeWeights.set_size(0, 0); - } - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (inGate.n_cols < seqLen) - { - inGate = arma::zeros(outSize, seqLen); - inGateAct = arma::zeros(outSize, seqLen); - inGateError = arma::zeros(outSize, seqLen); - outGate = arma::zeros(outSize, seqLen); - outGateAct = arma::zeros(outSize, seqLen); - outGateError = arma::zeros(outSize, seqLen); - forgetGate = arma::zeros(outSize, seqLen); - forgetGateAct = arma::zeros(outSize, seqLen); - forgetGateError = arma::zeros(outSize, seqLen); - state = arma::zeros(outSize, seqLen); - stateError = arma::zeros(outSize, seqLen); - cellAct = arma::zeros(outSize, seqLen); - } - - // Split up the inputactivation into the 3 parts (inGate, forgetGate, - // outGate). - inGate.col(offset) = input.submat(0, 0, outSize - 1, 0); - - forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0); - outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0); - - if (peepholes && offset > 0) - { - inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1); - forgetGate.col(offset) += peepholeWeights.col(1) % - state.col(offset - 1); - } - - arma::Col inGateActivation = inGateAct.unsafe_col(offset); - GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation); - - arma::Col forgetGateActivation = forgetGateAct.unsafe_col(offset); - GateActivationFunction::fn(forgetGate.unsafe_col(offset), - forgetGateActivation); - - arma::Col cellActivation = cellAct.unsafe_col(offset); - StateActivationFunction::fn(input.submat(outSize * 2, 0, - (outSize * 3) - 1, 0), cellActivation); - - state.col(offset) = inGateAct.col(offset) % cellActivation; - - if (offset > 0) - state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1); - - if (peepholes) - outGate.col(offset) += peepholeWeights.col(2) % state.col(offset); - - arma::Col outGateActivation = outGateAct.unsafe_col(offset); - GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation); - - OutputActivationFunction::fn(state.unsafe_col(offset), output); - output = outGateAct.col(offset) % output; - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - queryOffset = seqLen - offset - 1; - - arma::Col outGateDerivative; - GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset), - outGateDerivative); - - arma::Col stateActivation; - StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation); - - outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation; - - arma::Col stateDerivative; - StateActivationFunction::deriv(stateActivation, stateDerivative); - - stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) % - stateDerivative; - - if (queryOffset < (seqLen - 1)) - { - stateError.col(queryOffset) += stateError.col(queryOffset + 1) % - forgetGateAct.col(queryOffset + 1); - - if (peepholes) - { - stateError.col(queryOffset) += inGateError.col(queryOffset + 1) % - peepholeWeights.col(0); - stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) % - peepholeWeights.col(1); - } - } - - if (peepholes) - { - stateError.col(queryOffset) += outGateError.col(queryOffset) % - peepholeWeights.col(2); - } - - arma::Col cellDerivative; - StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative); - - arma::Col cellError = inGateAct.col(queryOffset) % cellDerivative % - stateError.col(queryOffset); - - if (queryOffset > 0) - { - arma::Col forgetGateDerivative; - GateActivationFunction::deriv(forgetGateAct.col(queryOffset), - forgetGateDerivative); - - forgetGateError.col(queryOffset) = forgetGateDerivative % - stateError.col(queryOffset) % state.col(queryOffset - 1); - } - - arma::Col inGateDerivative; - GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative); - - inGateError.col(queryOffset) = inGateDerivative % - stateError.col(queryOffset) % cellAct.col(queryOffset); - - if (peepholes) - { - peepholeDerivatives.col(2) += outGateError.col(queryOffset) % - state.col(queryOffset); - - if (queryOffset > 0) - { - peepholeDerivatives.col(0) += inGateError.col(queryOffset) % - state.col(queryOffset - 1); - peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) % - state.col(queryOffset - 1); - } - } - - g = arma::zeros >(outSize * 4, 1); - g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset); - g.submat(outSize, 0, (outSize * 2) - 1, 0) = - forgetGateError.col(queryOffset); - g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError; - g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) = - outGateError.col(queryOffset); - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of the lstm layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& /* gy */, - GradientDataType& /* g */) - { - if (peepholes && offset == 0) - { - peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() * - (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) * - inGate.col(queryOffset).t()); - - peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() * - (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) * - forgetGate.col(queryOffset).t()); - - peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() * - (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) * - outGate.col(queryOffset).t()); - - peepholeDerivatives.zeros(); - } - } - - //! Get the peephole weights. - OutputDataType const& Weights() const { return peepholeWeights; } - //! Modify the peephole weights. - OutputDataType& Weights() { return peepholeWeights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the peephole gradient. - OutputDataType const& Gradient() const { return peepholeGradient; } - //! Modify the peephole gradient. - OutputDataType& Gradient() { return peepholeGradient; } - - //! Get the sequence length. - size_t SeqLen() const { return seqLen; } - //! Modify the sequence length. - size_t& SeqLen() { return seqLen; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(peepholes, "peepholes"); - - if (peepholes) - { - ar & data::CreateNVP(peepholeWeights, "peepholeWeights"); - - if (Archive::is_loading::value) - { - peepholeDerivatives = arma::zeros( - peepholeWeights.n_rows, 3); - } - } - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored peephole indication flag. - bool peepholes; - - //! Locally-stored length of the the input sequence. - size_t seqLen; - - //! Locally-stored sequence offset. - size_t offset; - - //! Locally-stored query offset. - size_t queryOffset; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored ingate object. - InputDataType inGate; - - //! Locally-stored ingate activation object. - InputDataType inGateAct; - - //! Locally-stored ingate error object. - InputDataType inGateError; - - //! Locally-stored outgate object. - InputDataType outGate; - - //! Locally-stored outgate activation object. - InputDataType outGateAct; - - //! Locally-stored outgate error object. - InputDataType outGateError; - - //! Locally-stored forget object. - InputDataType forgetGate; - - //! Locally-stored forget activation object. - InputDataType forgetGateAct; - - //! Locally-stored forget error object. - InputDataType forgetGateError; - - //! Locally-stored state object. - InputDataType state; - - //! Locally-stored state erro object. - InputDataType stateError; - - //! Locally-stored cell activation object. - InputDataType cellAct; - - //! Locally-stored peephole weight object. - OutputDataType peepholeWeights; - - //! Locally-stored derivatives object. - OutputDataType peepholeDerivatives; - - //! Locally-stored peephole gradient object. - OutputDataType peepholeGradient; -}; // class LSTMLayer - -//! Layer traits for the lstm layer. -template< - class GateActivationFunction, - class StateActivationFunction, - class OutputActivationFunction, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = true; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp deleted file mode 100644 index 7705b52205a..00000000000 --- a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/** - * @file multiclass_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the MulticlassClassificationLayer class, which implements a - * multiclass classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a multiclass classification layer that can be used as - * output layer. - * - * A convenience typedef is given: - * - * - ClassificationLayer - */ -class MulticlassClassificationLayer -{ - public: - /** - * Create the MulticlassClassificationLayer object. - */ - MulticlassClassificationLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - } -}; // class MulticlassClassificationLayer - -//! Layer traits for the multiclass classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -/*** - * Alias ClassificationLayer. - */ -using ClassificationLayer = MulticlassClassificationLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp deleted file mode 100644 index afa0f42e63c..00000000000 --- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file multiply_constant_layer.hpp - * @author Marcus Edel - * - * Definition of the MultiplyConstantLayer class, which multiplies the input by - * a (non-learnable) constant. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the multiply constant layer. The multiply constant layer - * multiplies the input by a (non-learnable) constant. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class MultiplyConstantLayer -{ - public: - /** - * Create the BaseLayer object. - */ - MultiplyConstantLayer(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network. Multiply the input with the - * specified constant scalar value. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType& input, OutputType& output) - { - output = input * scalar; - } - - /** - * Ordinary feed backward pass of a neural network. The backward pass - * multiplies the error with the specified constant scalar value. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* input */, const DataType& gy, DataType& g) - { - g = gy * scalar; - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } - - private: - //! Locally-stored constant scalar value. - const double scalar; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class MultiplyConstantLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp deleted file mode 100644 index 63200b2c335..00000000000 --- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/** - * @file one_hot_layer.hpp - * @author Shangtong Zhang - * - * Definition of the OneHotLayer class, which implements a standard network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a one hot classification layer that can be used as - * output layer. - */ -class OneHotLayer -{ - public: - /** - * Create the OneHotLayer object. - */ - OneHotLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - output.zeros(); - - arma::uword maxIndex = 0; - inputActivations.max(maxIndex); - output(maxIndex) = 1; - } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } -}; // class OneHotLayer - -//! Layer traits for the one-hot class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - - -#endif diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp deleted file mode 100644 index e8a205f44f8..00000000000 --- a/src/mlpack/methods/ann/layer/pooling_layer.hpp +++ /dev/null @@ -1,267 +0,0 @@ -/** - * @file pooling_layer.hpp - * @author Marcus Edel - * @author Nilay Jain - * - * Definition of the PoolingLayer class, which attaches various pooling - * functions to the embedding layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the pooling layer. The pooling layer works as a metaclass - * which attaches various functions to the embedding layer. - * - * @tparam PoolingRule Pooling function used for the embedding layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename PoolingRule = MeanPooling, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class PoolingLayer -{ - public: - /** - * Create the PoolingLayer object using the specified number of units. - * - * @param kSize Size of the pooling window. - * @param stride The stride of the convolution operation. - * @param pooling The pooling strategy. - */ - PoolingLayer(const size_t kSize, - const size_t stride = 1, - PoolingRule pooling = PoolingRule()) : - kSize(kSize), - stride(stride), - pooling(pooling) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - Pooling(input, output); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::zeros >((input.n_rows - kSize) / stride + 1, - (input.n_cols - kSize) / stride + 1, input.n_slices); - - for (size_t s = 0; s < input.n_slices; s++) - Pooling(input.slice(s), output.slice(s)); - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, inputParameter.n_slices); - - for (size_t s = 0; s < gy.n_slices; s++) - { - Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s)); - } - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube from the error matrix. - arma::Cube mappedError = arma::zeros(outputParameter.n_rows, - outputParameter.n_cols, outputParameter.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * outputParameter.n_rows * outputParameter.n_cols, - (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - outputParameter.n_rows, outputParameter.n_cols); - } - } - - Backward(inputParameter, mappedError, g); - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - InputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - InputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kSize, "kSize"); - ar & data::CreateNVP(pooling, "pooling"); - ar & data::CreateNVP(stride, "stride"); - } - - private: - /** - * Apply pooling to the input and store the results. - * - * @param input The input to be apply the pooling rule. - * @param output The pooled result. - */ - template - void Pooling(const arma::Mat& input, arma::Mat& output) - { - const size_t rStep = kSize; - const size_t cStep = kSize; - - for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride) - { - for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride) - { - output(i, j) += pooling.Pooling(input( - arma::span(rowidx, rowidx + rStep - 1), - arma::span(colidx, colidx + cStep - 1))); - } - } - } - - /** - * Apply unpooling to the input and store the results. - * - * @param input The input to be apply the unpooling rule. - * @param output The pooled result. - */ - template - void Unpooling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - const size_t rStep = input.n_rows / error.n_rows; - const size_t cStep = input.n_cols / error.n_cols; - - arma::Mat unpooledError; - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)); - - pooling.Unpooling(inputArea, error(i / rStep, j / cStep), - unpooledError); - - output(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)) += unpooledError; - } - } - } - - //! Locally-stored size of the pooling window. - size_t kSize; - - //! Locally-stored stride value by which we move filter. - size_t stride; - - //! Locally-stored pooling strategy. - PoolingRule pooling; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class PoolingLayer - -//! Layer traits for the pooling layer. -template< - typename PoolingRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp deleted file mode 100644 index 5e231a7480b..00000000000 --- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp +++ /dev/null @@ -1,192 +0,0 @@ -/** - * @file recurrent_layer.hpp - * @author Marcus Edel - * - * Definition of the RecurrentLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the RecurrentLayer class. Recurrent layers can be used - * similarly to feed-forward layers except that the input isn't stored in the - * inputParameter, instead it's in stored in the recurrentParameter. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class RecurrentLayer -{ - public: - /** - * Create the RecurrentLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - RecurrentLayer(const size_t inSize, const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(inSize, 1)) - { - weights.set_size(outSize, inSize); - } - - /** - * Create the RecurrentLayer object using the specified number of units. - * - * @param outSize The number of output units. - */ - RecurrentLayer(const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(outSize, 1)) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + weights * recurrentParameter; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::mat& g) - { - g = (weights).t() * gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input activation. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - GradientDataType& g) - { - g = d * recurrentParameter.t(); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the input parameter. - InputDataType const& RecurrentParameter() const { return recurrentParameter; } - //! Modify the input parameter. - InputDataType& RecurrentParameter() { return recurrentParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(recurrentParameter, "recurrentParameter"); - ar & data::CreateNVP(weights, "weights"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored recurrent parameter object. - InputDataType recurrentParameter; -}; // class RecurrentLayer - -//! Layer traits for the recurrent layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp deleted file mode 100644 index 655e443b1e5..00000000000 --- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/** - * @file reinforce_normal_layer.hpp - * @author Marcus Edel - * - * Definition of the ReinforceNormalLayer class, which implements the REINFORCE - * algorithm for the normal distribution. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the reinforce normal layer. The reinforce normal layer - * implements the REINFORCE algorithm for the normal distribution. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class ReinforceNormalLayer -{ - public: - /** - * Create the ReinforceNormalLayer object. - * - * @param stdev Standard deviation used during the forward and backward pass. - */ - ReinforceNormalLayer(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - } - else - { - // Use maximum a posteriori. - output = input; - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& /* gy */, - DataType& g) - { - g = (input - inputParameter) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= -reward; - } - - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - //! Get the value of the reward parameter. - double Reward() const { return reward; } - //! Modify the value of the deterministic parameter. - double& Reward() { return reward; } - - private: - //! Standard deviation used during the forward and backward pass. - const double stdev; - - //! Locally-stored reward parameter. - double reward; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! If true use maximum a posteriori during the forward pass. - bool deterministic; -}; // class ReinforceNormalLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/softmax_layer.hpp deleted file mode 100644 index a2d3323eed8..00000000000 --- a/src/mlpack/methods/ann/layer/softmax_layer.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/** - * @file softmax_layer.hpp - * @author Marcus Edel - * - * Definition of the SoftmaxLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the softmax layer. The softmax loss layer computes the - * multinomial logistic loss of the softmax of its inputs. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SoftmaxLayer -{ - public: - /** - * Create the SoftmaxLayer object. - */ - SoftmaxLayer() - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = arma::trunc_exp(input - - arma::repmat(arma::max(input), input.n_rows, 1)); - output /= arma::accu(output); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - InputDataType const& Delta() const { return delta; } - //! Modify the delta. - InputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SoftmaxLayer - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp deleted file mode 100644 index c3b723f17d9..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/** - * @file sparse_bias_layer.hpp - * @author Tham Ngap Wei - * - * Definition of the SparseBiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a bias layer design for sparse autoencoder. - * The BiasLayer class represents a single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseBiasLayer -{ - public: - /** - * Create the SparseBiasLayer object using the specified number of units and - * bias parameter. - * - * @param outSize The number of output units. - * @param batchSize The batch size used to train the network. - * @param bias The bias value. - */ - SparseBiasLayer(const size_t outSize, const size_t batchSize) : - outSize(outSize), - batchSize(batchSize) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + arma::repmat(weights, 1, input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - InputDataType& g) - { - g = arma::sum(d, 1) / static_cast( - batchSize); - } - - //! Get the batch size - size_t BatchSize() const { return batchSize; } - //! Modify the batch size - size_t& BatchSize() { return batchSize; } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(batchSize, "batchSize"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! The batch size used to train the network. - size_t batchSize; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseBiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp deleted file mode 100644 index 6b1d9d118f9..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp +++ /dev/null @@ -1,180 +0,0 @@ -/** - * @file sparse_input_layer.hpp - * @author Tham Ngap Wei - * - * Definition of the sparse input class which serve as the first layer - * of the sparse autoencoder - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP - -#include -#include - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the SparseInputLayer. The SparseInputLayer class represents - * the first layer of sparse autoencoder - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat - > -class SparseInputLayer -{ - public: - /** - * Create the SparseInputLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - * @param lambda L2-regularization parameter. - */ - SparseInputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001) : - inSize(inSize), - outSize(outSize), - lambda(lambda) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! L2-regularization parameter. - double lambda; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseInputLayer - -//! Layer traits for the SparseInputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp deleted file mode 100644 index 33a2a72f7f3..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/** - * @file sparse_output_layer.hpp - * @author Tham Ngap Wei - * - * This is the fourth layer of sparse autoencoder. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the SparseOutputLayer class. The SparseOutputLayer class - * represents the fourth layer of the sparse autoencoder. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseOutputLayer -{ - public: - /** - * Create the SparseLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - SparseOutputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - inSize(inSize), - outSize(outSize), - lambda(lambda), - beta(beta), - rho(rho) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - // Average activations of the hidden layer. - rhoCap = arma::sum(input, 1) / static_cast(input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& input, - const arma::Mat& gy, - arma::Mat& g) - { - const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) / - (1 - rhoCap)); - - // NOTE: if the armadillo version high enough, find_nonfinite can prevents - // overflow value: - // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros(); - g = weights.t() * gy + - arma::repmat(klDivGrad, 1, input.n_cols); - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType input, const arma::Mat& d, arma::Mat& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Sets the KL divergence parameter. - void Beta(const double b) - { - beta = b; - } - - //! Gets the KL divergence parameter. - double Beta() const - { - return beta; - } - - //! Sets the sparsity parameter. - void Rho(const double r) - { - rho = r; - } - - //! Gets the sparsity parameter. - double Rho() const - { - return rho; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the RhoCap. - OutputDataType const& RhoCap() const { return rhoCap; } - //! Modify the RhoCap. - OutputDataType& RhoCap() { return rhoCap; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - ar & data::CreateNVP(beta, "beta"); - ar & data::CreateNVP(rho, "rho"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! L2-regularization parameter. - double lambda; - - //! KL divergence parameter. - double beta; - - //! Sparsity parameter. - double rho; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Average activations of the hidden layer. - OutputDataType rhoCap; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseOutputLayer - -//! Layer traits for the SparseOutputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp deleted file mode 100644 index 5b4da8ed0b7..00000000000 --- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp +++ /dev/null @@ -1,171 +0,0 @@ -/** - * @file vr_class_reward_layer.hpp - * @author Marcus Edel - * - * Definition of the VRClassRewardLayer class, which implements the variance - * reduced classification reinforcement layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the variance reduced classification reinforcement layer. - * This layer is meant to be used in combination with the reinforce normal layer - * (ReinforceNormalLayer), which expects that an reward: - * (1 for success, 0 otherwise). - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::field, - typename OutputDataType = arma::field -> -class VRClassRewardLayer -{ - public: - /** - * Create the VRClassRewardLayer object. - * - * @param scale Parameter used to scale the reward. - * @param sizeAverage Take the average over all batches. - */ - VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. - */ - template - double Forward(const arma::field >& input, - const arma::Mat& target) - { - return Forward(input(0, 0), target); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. - */ - template - double Forward(const arma::Mat& input, const arma::Mat& target) - { - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return -reward / input.n_cols; - } - - return -reward; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - double Backward(const arma::field >& input, - const arma::Mat& /* gy */, - arma::field >& g) - { - g = arma::field >(2, 1); - g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols); - - double vrReward = reward - arma::as_scalar(input(1, 0)); - if (sizeAverage) - { - vrReward /= input(0, 0).n_cols; - } - - const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0; - - g(1, 0) = norm * (input(1, 0) - reward); - - return vrReward; - } - - //! Get the input parameter. - InputDataType& InputParameter() const {return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const {return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const {return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - private: - //! Locally-stored value to scale the reward. - const double scale; - - //! If true take the average over all batches. - const bool sizeAverage; - - //! Locally stored reward parameter. - double reward; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; -}; // class VRClassRewardLayer - -}; // namespace ann -}; // namespace mlpack - -#endif From c5d74f1a5a0123c005fbfb6b327c2ff4baa07a34 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 9 Nov 2016 01:24:04 +0100 Subject: [PATCH 18/82] Increase the number of template arguments for the boost list class. --- src/mlpack/prereqs.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index 4849487deec..c036ba98356 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -35,6 +35,12 @@ #define _USE_MATH_DEFINES #include +// Increase the number of template arguments for the boost list class. +#undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#undef BOOST_MPL_LIMIT_LIST_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#define BOOST_MPL_LIMIT_LIST_SIZE 30 + // For tgamma(). #include From 120263d49eb55a3a81279d47d4efe1c1ea7cc14b Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 9 Nov 2016 22:38:06 +0100 Subject: [PATCH 19/82] Move pooling rules into the pooling class. So that we can use the MaxPooling and MeanPooling class names for the actual module name. --- .../methods/ann/pooling_rules/CMakeLists.txt | 15 ----- .../methods/ann/pooling_rules/max_pooling.hpp | 56 ------------------- .../ann/pooling_rules/mean_pooling.hpp | 56 ------------------- 3 files changed, 127 deletions(-) delete mode 100644 src/mlpack/methods/ann/pooling_rules/CMakeLists.txt delete mode 100644 src/mlpack/methods/ann/pooling_rules/max_pooling.hpp delete mode 100644 src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt deleted file mode 100644 index 99b6b803bb1..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - max_pooling.hpp - mean_pooling.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp deleted file mode 100644 index f50b0419fe3..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file max_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MaxPooling class, which implements max pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The max pooling rule for convolution neural networks. Take the maximum value - * within the receptive block. - */ -class MaxPooling -{ - public: - /* - * Return the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return input.max(); - } - - /* - * Set the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp deleted file mode 100644 index 7ab88c329f4..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file mean_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MeanPooling class, which implements mean pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The mean pooling rule for convolution neural networks. Average all values - * within the receptive block. - */ -class MeanPooling -{ - public: - /* - * Return the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return arma::mean(arma::mean(input)); - } - - /* - * Set the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif From 2016462b5deaa7c332b0045bdc51d2427d852ba7 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 26 Nov 2016 20:48:26 +0100 Subject: [PATCH 20/82] Use the stride parameter inside the convolution function. --- .../ann/convolution_rules/fft_convolution.hpp | 30 +++++++++-- .../convolution_rules/naive_convolution.hpp | 54 +++++++++++++------ .../ann/convolution_rules/svd_convolution.hpp | 24 +++++++-- 3 files changed, 82 insertions(+), 26 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index bbcfecdaebb..af1b6f5c9e9 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -47,13 +47,17 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -82,13 +86,17 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -130,11 +138,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -162,11 +174,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -191,11 +207,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index fc7fc6926fb..6fe5bf6e672 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -41,16 +41,20 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { - output = arma::zeros >(input.n_rows - filter.n_rows + 1, - input.n_cols - filter.n_cols + 1); + output = arma::zeros >((input.n_rows - filter.n_rows + 1) / + dW, (input.n_cols - filter.n_cols + 1) / dH); // It seems to be about 3.5 times faster to use pointers instead of // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j). @@ -63,7 +67,7 @@ class NaiveConvolution const eT* kernelPtr = filter.memptr(); for (size_t kj = 0; kj < filter.n_cols; ++kj) { - const eT* inputPtr = input.colptr(kj + j) + i; + const eT* inputPtr = input.colptr(kj + j * dW) + i * dH; for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr) *outputPtr += *kernelPtr * (*inputPtr); } @@ -77,13 +81,17 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1); const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1); @@ -92,11 +100,11 @@ class NaiveConvolution arma::Mat inputPadded = arma::zeros >(outputRows, outputCols); inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1, - filter.n_rows - 1 + input.n_rows - 1, - filter.n_cols - 1 + input.n_cols - 1) = input; + filter.n_rows - 1 + input.n_rows - 1, + filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, - output); + output, dW, dH); } /* @@ -105,15 +113,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -122,7 +134,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -133,15 +145,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input, filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, filter.n_slices); @@ -150,7 +166,7 @@ class NaiveConvolution for (size_t i = 1; i < filter.n_slices; i++) { NaiveConvolution::Convolution(input, filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -161,15 +177,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter, - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -178,7 +198,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter, - output.slice(i)); + output.slice(i), dW, dH); } } diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index a0b317ebb0c..e61b735bf8d 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -49,11 +49,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -113,11 +117,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -142,11 +150,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -170,11 +182,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From ac174b1c3322d7479f2ef0c7f5144268a7e6fe9d Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 3 Dec 2016 22:56:33 +0100 Subject: [PATCH 21/82] Increase the number of template arguments for the boost list class. --- .../methods/ann/layer/{leaky_relu_layer.hpp => leaky_relu.hpp} | 0 ...ive_log_likelihood_layer.hpp => negative_log_likelihood.hpp} | 0 src/mlpack/prereqs.hpp | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename src/mlpack/methods/ann/layer/{leaky_relu_layer.hpp => leaky_relu.hpp} (100%) rename src/mlpack/methods/ann/layer/{negative_log_likelihood_layer.hpp => negative_log_likelihood.hpp} (100%) diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/leaky_relu_layer.hpp rename to src/mlpack/methods/ann/layer/leaky_relu.hpp diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp rename to src/mlpack/methods/ann/layer/negative_log_likelihood.hpp diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index c036ba98356..3c3cfa89175 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -39,7 +39,7 @@ #undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS #undef BOOST_MPL_LIMIT_LIST_SIZE #define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS -#define BOOST_MPL_LIMIT_LIST_SIZE 30 +#define BOOST_MPL_LIMIT_LIST_SIZE 40 // For tgamma(). #include From e9f9eb432ccba1abbfc71c7de07152fc4e4ff110 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:03:20 +0100 Subject: [PATCH 22/82] Remove stride paramater from svd and fft convolution rule. --- .../ann/convolution_rules/fft_convolution.hpp | 20 +++++-------------- .../convolution_rules/naive_convolution.hpp | 6 +++--- .../ann/convolution_rules/svd_convolution.hpp | 16 ++++----------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index af1b6f5c9e9..225626e34b8 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -55,9 +55,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -94,9 +92,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -144,9 +140,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -180,9 +174,7 @@ class FFTConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -213,9 +205,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index 6fe5bf6e672..c90574293ba 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -93,8 +93,8 @@ class NaiveConvolution const size_t dW = 1, const size_t dH = 1) { - const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1); - const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1); + const size_t outputRows = (input.n_rows + 2 * (filter.n_rows - 1)) * dW; + const size_t outputCols = (input.n_cols + 2 * (filter.n_cols - 1)) * dH; // Pad filter and input to the working output shape. arma::Mat inputPadded = arma::zeros >(outputRows, @@ -104,7 +104,7 @@ class NaiveConvolution filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, - output, dW, dH); + output, 1, 1); } /* diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index e61b735bf8d..5206ec1996f 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -55,9 +55,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -123,9 +121,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -156,9 +152,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -188,9 +182,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From 63a6c4ef9b0939b109738a651a50317c8e719a09 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:27:30 +0100 Subject: [PATCH 23/82] Refactor ann layer. --- src/mlpack/methods/ann/layer/add.hpp | 149 +++++ src/mlpack/methods/ann/layer/add_merge.hpp | 153 +++++ src/mlpack/methods/ann/layer/base_layer.hpp | 63 +-- src/mlpack/methods/ann/layer/concat.hpp | 285 ++++++++++ .../methods/ann/layer/concat_performance.hpp | 150 +++++ src/mlpack/methods/ann/layer/constant.hpp | 131 +++++ src/mlpack/methods/ann/layer/convolution.hpp | 524 ++++++++++++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 263 +++++++++ src/mlpack/methods/ann/layer/dropout.hpp | 194 +++++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 86 +-- src/mlpack/methods/ann/layer/join.hpp | 119 ++++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 78 +-- src/mlpack/methods/ann/layer/linear.hpp | 180 ++++++ .../methods/ann/layer/linear_no_bias.hpp | 174 ++++++ src/mlpack/methods/ann/layer/log_softmax.hpp | 131 +++++ src/mlpack/methods/ann/layer/lookup.hpp | 161 ++++++ src/mlpack/methods/ann/layer/lstm.hpp | 516 +++++++++++++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 375 +++++++++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 322 +++++++++++ .../methods/ann/layer/mean_squared_error.hpp | 98 ++++ .../methods/ann/layer/multiply_constant.hpp | 108 ++++ .../ann/layer/negative_log_likelihood.hpp | 46 +- src/mlpack/methods/ann/layer/recurrent.hpp | 356 ++++++++++++ src/mlpack/methods/ann/layer/select.hpp | 127 +++++ src/mlpack/methods/ann/layer/sequential.hpp | 292 ++++++++++ 25 files changed, 4875 insertions(+), 206 deletions(-) create mode 100644 src/mlpack/methods/ann/layer/add.hpp create mode 100644 src/mlpack/methods/ann/layer/add_merge.hpp create mode 100644 src/mlpack/methods/ann/layer/concat.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_performance.hpp create mode 100644 src/mlpack/methods/ann/layer/constant.hpp create mode 100644 src/mlpack/methods/ann/layer/convolution.hpp create mode 100644 src/mlpack/methods/ann/layer/dropconnect.hpp create mode 100644 src/mlpack/methods/ann/layer/dropout.hpp create mode 100644 src/mlpack/methods/ann/layer/join.hpp create mode 100644 src/mlpack/methods/ann/layer/linear.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_no_bias.hpp create mode 100644 src/mlpack/methods/ann/layer/log_softmax.hpp create mode 100644 src/mlpack/methods/ann/layer/lookup.hpp create mode 100644 src/mlpack/methods/ann/layer/lstm.hpp create mode 100644 src/mlpack/methods/ann/layer/max_pooling.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_pooling.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_squared_error.hpp create mode 100644 src/mlpack/methods/ann/layer/multiply_constant.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent.hpp create mode 100644 src/mlpack/methods/ann/layer/select.hpp create mode 100644 src/mlpack/methods/ann/layer/sequential.hpp diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp new file mode 100644 index 00000000000..be8fc60e31b --- /dev/null +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -0,0 +1,149 @@ +/** + * @file add.hpp + * @author Marcus Edel + * + * Definition of the Add class that applies a bias term to the incoming data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_HPP + +#include +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Add module class. The Add module applies a bias term + * to the incoming data. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Add +{ + public: + /** + * Create the Add object using the specified number of output units. + * + * @param outSize The number of output units. + */ + Add(const size_t outSize) : outSize(outSize) + { + weights.set_size(outSize, 1); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = input + weights; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The propagated input. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + } + + private: + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Add + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp new file mode 100644 index 00000000000..7a01792d250 --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -0,0 +1,153 @@ +/** + * @file add_merge.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the AddMerge module class. The AddMerge class accumulates + * the output of various modules. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class AddMerge +{ + public: + //! Create the AddMerge object. + AddMerge() + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& /* input */, OutputType&& output) + { + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(network, "network"); + } + + private: + std::vector network; + + //! Locally-stored delete visitor module object. + DeleteVisitor deleteVisitor; + + //! Locally-stored output parameter visitor module object. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor module object. + DeltaVisitor deltaVisitor; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class AddMerge + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp index 2b915a116d5..68afe1cefeb 100644 --- a/src/mlpack/methods/ann/layer/base_layer.hpp +++ b/src/mlpack/methods/ann/layer/base_layer.hpp @@ -32,7 +32,6 @@ namespace ann /** Artificial Neural Network. */ { * - IdentityLayer * - ReLULayer * - TanHLayer - * - BaseLayer2D * * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, @@ -64,33 +63,14 @@ class BaseLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { ActivationFunction::fn(input, output); } /** * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - ActivationFunction::deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed + * f(x) by propagating x backwards trough f. Using the results from the feed * forward pass. * * @param input The propagated input activation. @@ -98,30 +78,13 @@ class BaseLayer * @param g The calculated gradient. */ template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; + arma::Mat derivative; ActivationFunction::deriv(input, derivative); - g = mappedError % derivative; + g = gy % derivative; } //! Get the input parameter. @@ -205,18 +168,6 @@ template < using TanHLayer = BaseLayer< ActivationFunction, InputDataType, OutputDataType>; -/** - * Standard Base-Layer2D using the logistic activation function. - */ -template < - class ActivationFunction = LogisticFunction, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using BaseLayer2D = BaseLayer< - ActivationFunction, InputDataType, OutputDataType>; - - } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp new file mode 100644 index 00000000000..bd836c7e26b --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -0,0 +1,285 @@ +/** + * @file concat.hpp + * @author Marcus Edel + * + * Definition of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Concat class. The Concat class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Concat +{ + public: + /** + * Create the Concat object using the specified parameters. + * + * @param model Expose all network modules. + * @param same Merge the error in the backward pass. + */ + Concat(const bool model = true, const bool same = true) : + model(model), + same(same) + { + parameters.set_size(0, 0); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } + } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! If true merge the error in the backward pass. + bool same; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; +}; // class Concat + + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp new file mode 100644 index 00000000000..0f03cbc72e1 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -0,0 +1,150 @@ +/** + * @file concat_performance.hpp + * @author Marcus Edel + * + * Definition of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the concat performance class. The class works as a + * feed-forward fully connected network container which plugs performance layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename OutputLayerType = NegativeLogLikelihood<>, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class ConcatPerformance +{ + public: + /** + * Create the ConcatPerformance object. + * + * @param inSize The number of inputs. + * @param outputLayer Output layer used to evaluate the network. + */ + ConcatPerformance(const size_t inSize, + OutputLayerType&& outputLayer = OutputLayerType()) : + inSize(inSize), + outputLayer(std::move(outputLayer)) + { + /* Nothing to do here. */ + } + + /* + * Computes the Negative log likelihood. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, arma::Mat&& target) + { + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; + } + + /** + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. + * + * @param input The propagated input activation. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored number of inputs. + size_t inSize; + + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class ConcatPerformance + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp new file mode 100644 index 00000000000..58816acfbd3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -0,0 +1,131 @@ +/** + * @file constant.hpp + * @author Marcus Edel + * + * Definition of the Constant class, which outputs a constant value given + * any input. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the constant layer. The constant layer outputs a given + * constant value given any input value. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Constant +{ + public: + /** + * Create the Constant object that outputs a given constant scalar value + * given any input value. + * + * @param outSize The number of output units. + * @param scalar The constant value used to create the constant output. + */ + Constant(const size_t outSize, const double scalar) : + inSize(0), + outSize(outSize) + { + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); + } + + /** + * Ordinary feed forward pass of a neural network. The forward pass fills the + * output with the specified constant parameter. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; + } + + /** + * Ordinary feed backward pass of a neural network. The backward pass of the + * constant layer is returns always a zero output error matrix. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) + { + g = arma::zeros(inSize, 1); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(constantOutput, "constantOutput"); + } + + private: + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored constant output matrix. + OutputDataType constantOutput; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class ConstantLayer + +}; // namespace ann +}; // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp new file mode 100644 index 00000000000..be7fb7d6a5b --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -0,0 +1,524 @@ +/** + * @file convolution.hpp + * @author Marcus Edel + * + * Definition of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP + +#include + +#include +#include +#include +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Convolution class. The Convolution class represents a + * single layer of a neural network. + * + * @tparam ForwardConvolutionRule Convolution to perform forward process. + * @tparam BackwardConvolutionRule Convolution to perform backward process. + * @tparam GradientConvolutionRule Convolution to calculate gradient. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename ForwardConvolutionRule = NaiveConvolution, + typename BackwardConvolutionRule = NaiveConvolution, + typename GradientConvolutionRule = NaiveConvolution, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Convolution +{ +public: + //! Create the Convolution object. + Convolution() + { + /* Nothing to do here. */ + } + + /** + * Create the Convolution object using the specified number of input maps, + * output maps, filter size, stride and padding parameter. + * + * @param inSize The number of input maps. + * @param outSize The number of output maps. + * @param kW Width of the filter/kernel. + * @param kH Height of the filter/kernel. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. + * @param padW Padding width of the input. + * @param padH Padding height of the input. + * @param inputWidth The widht of the input data. + * @param inputHeight The height of the input data. + */ + Convolution(const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const size_t padW = 0, + const size_t padH = 0, + const size_t inputWidth = 0, + const size_t inputHeight = 0) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) + { + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); + } + + /* + * Set the weight and bias term. + */ + void Reset() + { + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false,false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + + + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) + { + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); + } + + private: + + /* + * Return the convolution output size. + * + * @param size The size of the input (row or column). + * @param k The size of the filter (width or height). + * @param s The stride size (x or y direction). + * @param p The size of the padding (width or height). + * @return The convolution output size. + */ + size_t ConvOutSize(const size_t size, + const size_t k, + const size_t s, + const size_t p) + { + return std::floor(size + p * 2 - k) / s + 1; + } + + /* + * Rotates a 3rd-order tensor counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Cube& input, arma::Cube& output) + { + output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); + + // * left-right flip, up-down flip */ + for (size_t s = 0; s < output.n_slices; s++) + output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); + } + + /* + * Rotates a dense matrix counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Mat& input, arma::Mat& output) + { + // * left-right flip, up-down flip */ + output = arma::fliplr(arma::flipud(input)); + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Mat& input, + size_t wPad, + size_t hPad, + arma::Mat& output) + { + if (output.n_rows != input.n_rows + wPad * 2 || + output.n_cols != input.n_cols + hPad * 2) + { + output = arma::zeros(input.n_rows + wPad * 2, input.n_cols + hPad * 2); + } + + output.submat(wPad, hPad, wPad + input.n_rows - 1, + hPad + input.n_cols - 1) = input; + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Cube& input, + size_t wPad, + size_t hPad, + arma::Cube& output) + { + output = arma::zeros(input.n_rows + wPad * 2, + input.n_cols + hPad * 2, input.n_slices); + + for (size_t i = 0; i < input.n_slices; ++i) + { + Pad(input.slice(i), wPad, hPad, output.slice(i)); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored filter/kernel width. + size_t kW; + + //! Locally-stored filter/kernel height. + size_t kH; + + //! Locally-stored stride of the filter in x-direction. + size_t dW; + + //! Locally-stored stride of the filter in y-direction. + size_t dH; + + //! Locally-stored padding width. + size_t padW; + + //! Locally-stored padding height. + size_t padH; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight object. + arma::cube weight; + + //! Locally-stored bias term object. + arma::mat bias; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored transformed output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed padded input parameter. + arma::cube inputPaddedTemp; + + //! Locally-stored transformed error parameter. + arma::cube gTemp; + + //! Locally-stored transformed gradient parameter. + arma::cube gradientTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Convolution + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp new file mode 100644 index 00000000000..6180c812572 --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -0,0 +1,263 @@ +/** + * @file dropconnect.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Definition of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "linear.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The DropConnect layer is a regularizer that randomly with probability + * ratio sets the connection values to zero and scales the remaining + * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) + * when deterministic is false. In the deterministic mode(during testing), + * the layer just computes the output. The output is computed according + * to the input layer. If no input layer is given, it will take a linear layer + * as default. + * + * Note: + * During training you should set deterministic to false and during testing + * you should set deterministic to true. + * + * For more information, see the following. + * + * @code + * @inproceedings{WanICML2013, + * title={Regularization of Neural Networks using DropConnect}, + * booktitle = {Proceedings of the 30th International Conference on Machine + * Learning(ICML - 13)}, + * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and + * Rob Fergus}, + * year = {2013} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class DropConnect +{ + public: + //! Create the DropConnect object. + DropConnect() + { + /* Nothing to do here. */ + } + + /** + * Creates the DropConnect Layer as a Linear Object that takes input size, + * output size and ratio as parameter. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param ratio The probability of setting a value to zero. + */ + DropConnect(const size_t inSize, + const size_t outSize, + const double ratio = 0.5) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) + { + network.push_back(baseLayer); + } + + ~DropConnect() + { + boost::apply_visitor(DeleteVisitor(), baseLayer); + } + + /** + * Ordinary feed forward pass of the DropConnect layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + + output = output * scale; + } + } + + /** + * Ordinary feed backward pass of the DropConnect layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) + { + boost::apply_visitor( + BackwardVisitor( + std::move(input), + std::move(gy), + std::move(g) + ), + baseLayer); + } + + /** + * Calculate the gradient using the output delta and the input activation. + * + * @param input The propagated input. + * @param d The calculated error. + * @param g The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + + //! Modify the value of the deterministic parameter. + bool &Deterministic() { return deterministic; } + + //! The probability of setting a value to zero. + double Ratio() const { return ratio; } + + //! Modify the probability of setting a value to zero. + void Ratio(const double r) + { + ratio = r; + scale = 1.0 / (1.0 - ratio); + } + +private: + //! The probability of setting a value to zero. + double ratio; + + //! The scale fraction. + double scale; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored mask object. + OutputDataType mask; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Denoise mask for the weights. + OutputDataType denoise; + + //! Locally-stored layer module. + LayerTypes baseLayer; + + //! Locally-stored network modules. + std::vector network; +}; // class DropConnect. + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp new file mode 100644 index 00000000000..57e76f6170d --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -0,0 +1,194 @@ +/** + * @file dropout.hpp + * @author Marcus Edel + * + * Definition of the Dropout class, which implements a regularizer that + * randomly sets units to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The dropout layer is a regularizer that randomly with probability ratio + * sets input values to zero and scales the remaining elements by factor 1 / + * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when + * deterministic is false. In the deterministic mode (during testing), the layer + * just scales the output. + * + * Note: During training you should set deterministic to false and during + * testing you should set deterministic to true. + * + * For more information, see the following. + * + * @code + * @article{Hinton2012, + * author = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, + * Ilya Sutskever, Ruslan Salakhutdinov}, + * title = {Improving neural networks by preventing co-adaptation of feature + * detectors}, + * journal = {CoRR}, + * volume = {abs/1207.0580}, + * year = {2012}, + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Dropout +{ + public: + /** + * Create the Dropout object using the specified ratio and rescale + * parameter. + * + * @param ratio The probability of setting a value to zero. + * @param rescale If true the input is rescaled when deterministic is False. + */ + Dropout(const double ratio = 0.5, + const bool rescale = true) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of the dropout layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } + } + + /** + * Ordinary feed backward pass of the dropout layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy % mask * scale; + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the detla. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! The probability of setting a value to zero. + double Ratio() const { return ratio; } + + //! Modify the probability of setting a value to zero. + void Ratio(const double r) + { + ratio = r; + scale = 1.0 / (1.0 - ratio); + } + + //! The value of the rescale parameter. + bool Rescale() const {return rescale; } + //! Modify the value of the rescale parameter. + bool& Rescale() {return rescale; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); + } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored mast object. + OutputDataType mask; + + //! The probability of setting a value to zero. + double ratio; + + //! The scale fraction. + double scale; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! If true the input is rescaled when deterministic is False. + bool rescale; +}; // class Dropout + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index c707017fcbf..76b19f964af 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -1,16 +1,16 @@ /** - * @file hard_tanh_layer.hpp + * @file hard_tanh.hpp * @author Dhawal Arora * - * Definition and implementation of the HardTanHLayer layer. + * Definition and implementation of the HardTanH layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP #include @@ -46,18 +46,18 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class HardTanHLayer +class HardTanH { public: /** - * Create the HardTanHLayer object using the specified parameters. The range + * Create the HardTanH object using the specified parameters. The range * of the linear region can be adjusted by specifying the maxValue and * minValue. Default (maxValue = 1, minValue = -1). * * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanHLayer(const double maxValue = 1, const double minValue = -1) : + HardTanH(const double maxValue = 1, const double minValue = -1) : maxValue(maxValue), minValue(minValue) { // Nothing to do here. @@ -71,9 +71,14 @@ class HardTanHLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { - Fn(input, output); + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } } /** @@ -86,49 +91,18 @@ class HardTanHLayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const DataType&& input, + DataType&& gy, + DataType&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + g = gy; + for (size_t i = 0; i < input.n_elem; i++) { - for (size_t i = 0; i < gy.n_cols; i++) + if (input(i) < minValue || input(i) > maxValue) { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); + g(i) = 0; } } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; } //! Get the input parameter. @@ -197,20 +171,6 @@ class HardTanHLayer std::max( val, minValue ), maxValue ); } ); } - /** - * Computes the HardTanH function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - Fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the HardTanH function. * @@ -229,7 +189,7 @@ class HardTanHLayer * @param x The resulting derivatives. */ template - void Deriv(const InputType& x, OutputType& y) + void Deriv(const InputType&& x, OutputType& y) { y = x; @@ -251,7 +211,7 @@ class HardTanHLayer //! Minimum value for the HardTanH function. double minValue; -}; // class HardTanHLayer +}; // class HardTanH } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp new file mode 100644 index 00000000000..2933ef181ef --- /dev/null +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -0,0 +1,119 @@ +/** + * @file join.hpp + * @author Marcus Edel + * + * Definition of the Join module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Join module class. The Join class accumulates + * the output of various modules. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Join +{ + public: + //! Create the Join object. + Join() + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); + } + + private: + //! Locally-stored number of input rows. + size_t inSizeRows; + + //! Locally-stored number of input cols. + size_t inSizeCols; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Join + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp index a87792e4295..8e69712b7f9 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -1,8 +1,8 @@ /** - * @file leaky_relu_layer.hpp + * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLULayer layer first introduced + * Definition and implementation of LeakyReLU layer first introduced * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * @@ -11,8 +11,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP #include @@ -41,17 +41,17 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class LeakyReLULayer +class LeakyReLU { public: /** - * Create the LeakyReLULayer object using the specified parameters. + * Create the LeakyReLU object using the specified parameters. * The non zero gradient can be adjusted by specifying tha parameter * alpha in the range 0 to 1. Default (alpha = 0.03) * * @param alpha Non zero gradient */ - LeakyReLULayer(const double alpha = 0.03) : alpha(alpha) + LeakyReLU(const double alpha = 0.03) : alpha(alpha) { // Nothing to do here. } @@ -64,7 +64,7 @@ class LeakyReLULayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { Fn(input, output); } @@ -79,51 +79,13 @@ class LeakyReLULayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) + void Backward(const DataType&& input, DataType&& gy, DataType&& g) { DataType derivative; Deriv(input, derivative); g = gy % derivative; } - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; - } - //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } //! Modify the input parameter. @@ -177,20 +139,6 @@ class LeakyReLULayer y = arma::max(x, alpha * x); } - /** - * Computes the LeakyReLU function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the LeakyReLU function. * @@ -215,11 +163,11 @@ class LeakyReLULayer y = x; for (size_t i = 0; i < x.n_elem; i++) + { y(i) = Deriv(x(i)); + } } - - //! Locally-stored delta object. OutputDataType delta; @@ -232,9 +180,9 @@ class LeakyReLULayer //! Leakyness Parameter in the range 0 + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Linear layer class. The Linear class represents a + * single layer of a neural network. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Linear +{ + public: + //! Create the Linear object. + Linear() {} + + /** + * Create the Linear layer object using the specified number of units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + Linear(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize + outSize, 1); + } + + /* + * Reset the layer parameter. + */ + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = (weight * input) + bias; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* unused */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = weight.t() * gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight paramters. + OutputDataType weight; + + //! Locally-stored bias term parameters. + OutputDataType bias; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Linear + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp new file mode 100644 index 00000000000..92064727822 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -0,0 +1,174 @@ +/** + * @file linear.hpp + * @author Marcus Edel + * + * Definition of the LinearNoBias class also known as fully-connected layer or + * affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP + +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the LinearNoBias class. The LinearNoBias class represents a + * single layer of a neural network. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LinearNoBias +{ + public: + //! Create the LinearNoBias object. + LinearNoBias() {} + /** + * Create the LinearNoBias object using the specified number of units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + LinearNoBias(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize, 1); + } + + /* + * Reset the layer parameter. + */ + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weight * input; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = weight.t() * gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight parameter. + OutputDataType weight; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LinearNoBias + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp new file mode 100644 index 00000000000..95a79c97b91 --- /dev/null +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -0,0 +1,131 @@ +/** + * @file log_softmax.hpp + * @author Marcus Edel + * + * Definition of the LogSoftmax class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the log softmax layer. The log softmax loss layer computes + * the multinomial logistic loss of the softmax of its inputs. This layer is + * meant to be used in combination with the negative log likelihood layer + * (NegativeLogLikelihoodLayer), which expects that the input contains + * log-probabilities for each class. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LogSoftMax +{ + public: + /** + * Create the LogSoftmax object. + */ + LogSoftMax() { /* Nothing to do here. */ } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy - arma::exp(input) * arma::accu(gy); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + InputDataType& Delta() const { return delta; } + //! Modify the delta. + InputDataType& Delta() { return delta; } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LogSoftmax + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp new file mode 100644 index 00000000000..0f9f0b4978d --- /dev/null +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -0,0 +1,161 @@ +/** + * @file lookup.hpp + * @author Marcus Edel + * + * Definition of the Lookup class a particular convolution, where the width of + * the convolution is 1. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP + +#include +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Lookup class. The Lookup class is a particular + * convolution, where the width of the convolution is 1. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Lookup +{ + public: + /** + * Create the Lookup object using the specified number of input and output + * units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + Lookup(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize, inSize); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weights.cols(arma::conv_to::from(input) - 1); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Lookup + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp new file mode 100644 index 00000000000..6cd9dc630fb --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -0,0 +1,516 @@ +/** + * @file lstm.hpp + * @author Marcus Edel + * + * Definition of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * An implementation of a lstm network layer. + * + * This class allows specification of the type of the activation functions used + * for the gates and cells and also of the type of the function used to + * initialize and update the peephole weights. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LSTM +{ + public: + //! Create the LSTM object. + LSTM() { /* Nothing to do here */ } + + /** + * Create the LSTM layer object using the specified parameters. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + LSTM(const size_t inSize, const size_t outSize, const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + { + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(prevOutput), + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)) + ), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(0, 0, 1 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)) + ), + outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor( + ForwardVisitor( + std::move(nextCell), + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) + ), + cellModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)) + ), + cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)), + std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, + cellActivationModule)) + ), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)), + std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)), + std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)), + std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)), + std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) + ), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) + ), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) + { + boost::apply_visitor( + GradientVisitor( + std::move(input), + std::move(prevError) + ), + input2GateModule); + + boost::apply_visitor( + GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError) + ), + output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } + } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the maximum number of steps to backpropagate through time (BPTT). + size_t Rho() const { return rho; } + //! Modify the maximum number of steps to backpropagate through time (BPTT). + size_t& Rho() { return rho; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the model modules. + std::vector& Model() { return network; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored previous output. + arma::mat prevOutput; + + //! Locally-stored previous cell state. + arma::mat prevCell; + + //! Locally-stored input 2 gate module. + LayerTypes input2GateModule; + + //! Locally-stored output 2 gate module. + LayerTypes output2GateModule; + + //! Locally-stored input gate module. + LayerTypes inputGateModule; + + //! Locally-stored hidden state module. + LayerTypes hiddenStateModule; + + //! Locally-stored forget gate module. + LayerTypes forgetGateModule; + + //! Locally-stored output gate module. + LayerTypes outputGateModule; + + //! Locally-stored cell module. + LayerTypes cellModule; + + //! Locally-stored cell activation module. + LayerTypes cellActivationModule; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored list of network modules. + std::vector network; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! Locally-stored cell parameters. + std::vector cellParameter; + + //! Locally-stored output parameters. + std::vector outParameter; + + //! Locally-stored previous error. + arma::mat prevError; + + //! Locally-stored cell activation error. + arma::mat cellActivationError; + + //! Locally-stored foget gate error. + arma::mat forgetGateError; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LSTM + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp new file mode 100644 index 00000000000..e93077ebbad --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -0,0 +1,375 @@ +/** + * @file max_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP + +#include +// #include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/* + * The max pooling rule for convolution neural networks. Take the maximum value + * within the receptive block. + */ +class MaxPoolingRule +{ + public: + /* + * Return the maximum value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + size_t Pooling(const MatType& input) + { + return arma::as_scalar(arma::find(input.max() == input, 1)); + } +}; + +/** + * Implementation of the MaxPooling layer. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MaxPooling +{ +public: + //! Create the PoolingLayer object. + MaxPooling() + { + /* Nothing to do here */ + } + + /** + * Create the MaxPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + * @param floor Rounding operator (floor or ceil). + */ + MaxPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void PoolingOperation(const arma::Mat& input, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dW) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dH) + { + arma::mat subInput = input(arma::span(rowidx, rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + const size_t idx = pooling.Pooling(subInput); + output(i, j) = subInput(idx); + + if (!deterministic) + { + arma::Mat subIndices = indices(arma::span(rowidx, + rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + poolingIndices(i, j) = subIndices(idx); + } + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param error The backward error. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void Unpooling(const arma::Mat& error, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t i = 0; i < poolingIndices.n_elem; ++i) + { + output(poolingIndices(i)) += error(i); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored pooling strategy. + MaxPoolingRule pooling; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored indices matrix parameter. + arma::Mat indices; + + //! Locally-stored indices column parameter. + arma::Col indicesCol; + + //! Locally-stored pooling indicies. + std::vector poolingIndices; +}; // class MaxPooling + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp new file mode 100644 index 00000000000..e0c097f6ddc --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -0,0 +1,322 @@ +/** + * @file mean_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the MeanPooling. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanPooling +{ +public: + //! Create the MeanPooling object. + MeanPooling() + { + /* Nothing to do here */ + } + + /** + * Create the MeanPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + */ + MeanPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) + + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + */ + template + void Pooling(const arma::Mat& input, arma::Mat& output) + { + const size_t rStep = kW; + const size_t cStep = kH; + + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dH) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dW) + { + arma::mat subInput = input( + arma::span(rowidx, rowidx + rStep - 1 - offset), + arma::span(colidx, colidx + cStep - 1 - offset)); + + output(i, j) = arma::mean(arma::mean(subInput)); + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param input The input to be apply the unpooling rule. + * @param output The pooled result. + */ + template + void Unpooling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + const size_t rStep = input.n_rows / error.n_rows - offset; + const size_t cStep = input.n_cols / error.n_cols - offset; + + arma::Mat unpooledError; + for (size_t j = 0; j < input.n_cols - cStep; j += cStep) + { + for (size_t i = 0; i < input.n_rows - rStep; i += rStep) + { + const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)); + + unpooledError = arma::Mat(inputArea.n_rows, inputArea.n_cols); + unpooledError.fill(error(i / rStep, j / cStep) / inputArea.n_elem); + + output(arma::span(i, i + rStep - 1 - offset), + arma::span(j, j + cStep - 1 - offset)) += unpooledError; + } + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanPooling + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp new file mode 100644 index 00000000000..6abdc15c21c --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -0,0 +1,98 @@ +/** + * @file mean_squared_error.hpp + * @author Marcus Edel + * + * Definition and implementation of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The mean squared error performance function measures the network's + * performance according to the mean of squared errors. + * + * @tparam ActivationFunction Activation function used for the embedding layer. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanSquaredError +{ + public: + /** + * Create the MeanSquaredError object. + */ + MeanSquaredError() { /* Nothing to do here. */ } + + /* + * Computes the mean squared error function. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + return arma::mean(arma::mean(arma::square(input - target))); + } + + /** + * Ordinary feed backward pass of a neural network. + * + * @param input The propagated input activation. + * @param target The target vector. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + output = (input - target); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanSquaredError + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp new file mode 100644 index 00000000000..2caa8a37a35 --- /dev/null +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -0,0 +1,108 @@ +/** + * @file multiply_constant.hpp + * @author Marcus Edel + * + * Definition of the MultiplyConstantLayer class, which multiplies the input by + * a (non-learnable) constant. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the multiply constant layer. The multiply constant layer + * multiplies the input by a (non-learnable) constant. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MultiplyConstant +{ + public: + /** + * Create the MultiplyConstant object. + */ + MultiplyConstant(const double scalar) : scalar(scalar) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network. Multiply the input with the + * specified constant scalar value. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + output = input * scalar; + } + + /** + * Ordinary feed backward pass of a neural network. The backward pass + * multiplies the error with the specified constant scalar value. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) + { + g = gy * scalar; + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(scalar, "scalar"); + } + + private: + //! Locally-stored constant scalar value. + const double scalar; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MultiplyConstant + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 6c08698f790..2f5a863fd8d 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -1,16 +1,16 @@ /** - * @file negative_log_likelihood_layer.hpp + * @file negative_log_likelihood.hpp * @author Marcus Edel * - * Definition of the NegativeLogLikelihoodLayer class. + * Definition of the NegativeLogLikelihood class. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP -#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP #include @@ -19,11 +19,10 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of the negative log likelihood layer. The negative log - * likelihood layer expects that the input contains log-probabilities for each + * likelihood layer expectes that the input contains log-probabilities for each * class. The layer also expects a class index, in the range between 1 and the * number of classes, as target when calling the Forward function. * - * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, @@ -33,26 +32,22 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class NegativeLogLikelihoodLayer +class NegativeLogLikelihood { public: /** * Create the NegativeLogLikelihoodLayer object. */ - NegativeLogLikelihoodLayer() { /* Nothing to do here. */ } + NegativeLogLikelihood() { /* Nothing to do here. */ } - /** - * Ordinary feed forward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for - * each class. The layer also expects a class index, in the range between 1 - * and the number of classes, as target when calling the Forward function. + /* + * Computes the Negative log likelihood. * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. */ template - double Forward(const arma::Mat& input, const arma::Mat& target) + double Forward(const arma::Mat&& input, arma::Mat&& target) { double output = 0; @@ -70,7 +65,7 @@ class NegativeLogLikelihoodLayer /** * Ordinary feed backward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for + * likelihood layer expectes that the input contains log-probabilities for * each class. The layer also expects a class index, in the range between 1 * and the number of classes, as target when calling the Forward function. * @@ -80,10 +75,13 @@ class NegativeLogLikelihoodLayer * @param output The calculated error. */ template - void Backward(const arma::Mat& input, - const arma::Mat& target, - arma::Mat& output) + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) { + // std::cout << "------------------------------------------------------\n"; + // std::cout << "NegativeLogLikelihood\n"; + output = arma::zeros >(input.n_rows, input.n_cols); for (size_t i = 0; i < input.n_cols; ++i) { @@ -93,6 +91,10 @@ class NegativeLogLikelihoodLayer output(currentTarget, i) = -1; } + + // std::cout << "output: \n" << output << std::endl; + + // std::cout << "------------------------------------------------------\n"; } //! Get the input parameter. @@ -119,7 +121,7 @@ class NegativeLogLikelihoodLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class NegativeLogLikelihoodLayer +}; // class NegativeLogLikelihood }; // namespace ann }; // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp new file mode 100644 index 00000000000..5870071faef --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -0,0 +1,356 @@ +/** + * @file recurrent.hpp + * @author Marcus Edel + * + * Definition of the LinearLayer class also known as fully-connected layer or + * affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the RecurrentLayer class. Recurrent layers can be used + * similarly to feed-forward layers. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Recurrent +{ + public: + /** + * Create the Recurrent object using the specified modules. + * + * @param start The start module. + * @param start The input module. + * @param start The feedback module. + * @param start The transfer module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + Recurrent(const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + + { + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + } + + private: + //! Locally-stored start module. + LayerTypes startModule; + + //! Locally-stored input module. + LayerTypes inputModule; + + //! Locally-stored feedback module. + LayerTypes feedbackModule; + + //! Locally-stored transfer module. + LayerTypes transferModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; +}; // class Recurrent + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp new file mode 100644 index 00000000000..15ecec36d01 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -0,0 +1,127 @@ +/** + * @file select.hpp + * @author Marcus Edel + * + * Definition and implementation of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The select module selects the specified column from a given input matrix. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Select +{ + public: + /** + * Create the Select object. + * + * @param index The column which should be extracted from the given input. + * @param index The number of elements that should be used. + */ + Select(const size_t index, const size_t elements = 0) : + index(index), + elements(elements) + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored column index. + size_t index; + + //! Locally-stored number of elements selected. + size_t elements; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Select + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp new file mode 100644 index 00000000000..277b3342dc2 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -0,0 +1,292 @@ +/** + * @file sequential.hpp + * @author Marcus Edel + * + * Definition of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Sequential class. The sequential class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Sequential +{ + public: + + /** + * Create the Sequential object using the specified parameters. + * + * @param model Expose the all network modules. + */ + Sequential(const bool model = true) : model(model), reset(false) + { + /* Nothing to do here. */ + } + + //! Destroy the Sequential object. + ~Sequential() + { + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } + + output = boost::apply_visitor(outputParameterVisitor, network.back()); + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! Indicator if we already initialized the model. + bool reset; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! The input width. + size_t width; + + //! The input height. + size_t height; +}; // class Sequential + + +} // namespace ann +} // namespace mlpack + +#endif From ed538ba49a9e2d0ad59930e7e559fee6eb185112 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:29:09 +0100 Subject: [PATCH 24/82] Remove the rmva model for the CmakeLists file. --- src/mlpack/methods/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index f292e9756c9..dde69de0b1b 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -51,7 +51,6 @@ set(DIRS randomized_svd range_search rann - rmva regularized_svd softmax_regression sparse_autoencoder From 2db9ef78b303e95b6ccaed2463a693a559336051 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 5 Dec 2016 01:01:25 +0100 Subject: [PATCH 25/82] Add visitor function set; which abstracts away the different types of layers. --- .../methods/ann/layer/layer_visitor.hpp | 1034 ++++++++++++++ .../methods/ann/layer/layer_visitor_impl.hpp | 1241 +++++++++++++++++ 2 files changed, 2275 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/layer_visitor.hpp create mode 100644 src/mlpack/methods/ann/layer/layer_visitor_impl.hpp diff --git a/src/mlpack/methods/ann/layer/layer_visitor.hpp b/src/mlpack/methods/ann/layer/layer_visitor.hpp new file mode 100644 index 00000000000..a311b0addbc --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor.hpp @@ -0,0 +1,1034 @@ +/** + * @file layer_visitor.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP + +#include +#include + +#include + +namespace mlpack { +namespace ann { + +/** + * LoadOutputParameterVisitor restores the output parameter using the given + * parameter set. + */ +class LoadOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Restore the output parameter given a parameter set. + LoadOutputParameterVisitor(std::vector&& parameter); + + //! Restore the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Restore the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Restore the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * SaveOutputParameterVisitor saves the output parameter into the given + * parameter set. + */ +class SaveOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Save the output parameter into the given parameter set. + SaveOutputParameterVisitor(std::vector&& parameter); + + //! Save the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Save the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Save the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * DeleteVisitor executes the destructor of the instantiated object. + */ +class DeleteVisitor : public boost::static_visitor +{ + public: + //! Execute the destructor. + template + void operator()(LayerType* layer) const; +}; + +/** + * ForwardOutputVisitor executes the Forward() function given the input and + * output parameter. + */ +class ForwardOutputVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardOutputVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + double operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * ForwardVisitor executes the Forward() function given the input and output + * parameter. + */ +class ForwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * BackwardVisitor executes the Backward() function given the input, error and + * delta parameter. + */ +class BackwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Backward() function given the input, error and delta + //! parameter. + BackwardVisitor(arma::mat&& input, arma::mat&& error, arma::mat&& delta); + + //! Execute the Backward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The error parameter. + arma::mat&& error; + + //! The delta parameter. + arma::mat&& delta; +}; + +/** + * ResetVisitor executes the Reset() function. + */ +class ResetVisitor : public boost::static_visitor +{ + public: + //! Execute the Reset() function. + template + void operator()(LayerType* layer) const; + + private: + //! Execute the Reset() function for a module which implements the Reset() + //! function. + template + typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Model() + //! function. + template + typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Reset() + //! and Model() function. + template + typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Do not execute the Reset() function for a module which doesn't implement + // the Reset() or Model() function. + template + typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; +}; + +/** + * InputParameterVisitor exposes the input parameter of the given module. + */ +class InputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the input parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * OutputParameterVisitor exposes the output parameter of the given module. + */ +class OutputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the output parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * DeltaVisitor exposes the delta parameter of the given module. + */ +class DeltaVisitor : public boost::static_visitor +{ + public: + //! Return the delta parameter. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * ParametersVisitor exposes the parameters set of the given module and stores + * the parameters set into the given matrix. + */ +class ParametersVisitor : public boost::static_visitor +{ + public: + //! Store the parameters set into the given parameters matrix. + ParametersVisitor(arma::mat&& parameters); + + //! Set the parameters set. + template + void operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not set the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Set the parameters set if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * ParametersSetVisitor update the parameters set using the given matrix. + */ +class ParametersSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters set given the parameters matrix. + ParametersSetVisitor(arma::mat&& parameters); + + //! Update the parameters set. + template + void operator()(LayerType *layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not update the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Update the parameters set if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * WeightSizeVisitor returns the number of weights of the given module. + */ +class WeightSizeVisitor : public boost::static_visitor +{ + public: + //! Return the number of weights. + template + size_t operator()(LayerType* layer) const; + + private: + //! If the module doesn't implement the Parameters() or Model() function + //! return 0. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Model() + //! function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the accumulated number of parameters if the module implements the + //! Parameters() and Model() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; +}; + +/** + * SetInputWidthVisitor updates the input width parameter with the given input + * width. + */ +class SetInputWidthVisitor : public boost::static_visitor +{ + public: + //! Update the input width parameter with the given input width. + SetInputWidthVisitor(const size_t inputWidth = 0, const bool reset = false); + + //! Update the input width parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input width parameter. + size_t inputWidth; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() or + //! Model() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; +}; + +/** + * SetInputHeightVisitor updates the input height parameter with the given input + * height. + */ +class SetInputHeightVisitor : public boost::static_visitor +{ + public: + //! Update the input height parameter with the given input height. + SetInputHeightVisitor(const size_t inputHeight = 0, const bool reset = false); + + //! Update the input height parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input height parameter. + size_t inputHeight; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() or + //! Model() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class OutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputHeight() method of the given module. + */ +class OutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * LastOutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class LastOutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * LastOutputHeightVisitor exposes the OutputHeight() method of the given module. + */ +class LastOutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * WeightSetVisitor update the module parameters given the parameters set. + */ +class WeightSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters given the parameters set and offset. + WeightSetVisitor(arma::mat&& weight, const size_t offset = 0); + + //! Update the parameters set. + template + size_t operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& weight; + + //! The parameters offset. + const size_t offset; + + //! Do not update the parameters if the module doesn't implement the + //! Parameters() or Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() and + //! Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; +}; + +/** + * RhoVisitor exposes the Rho() method of the given module. + */ +class RhoVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; +}; + +/** + * DeterministicSetVisitor set the deterministic parameter given the + * deterministic value. + */ +class DeterministicSetVisitor : public boost::static_visitor +{ + public: + //! Set the deterministic parameter given the current deterministic value. + DeterministicSetVisitor(const bool deterministic = true); + + //! Set the deterministic parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The deterministic parameter. + const bool deterministic; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; +}; + +/** + * AddVisitor exposes the Add() method of the given module. + */ +class AddVisitor : public boost::static_visitor +{ + public: + //! Exposes the Add() method of the given module. + template + AddVisitor(T newLayer); + + //! Exposes the Add() method. + template + void operator()(LayerType* layer) const; + + private: + //! The layer that should be added. + LayerTypes newLayer; + + //! Only add the layer if the module implements the Add() function. + template + typename std::enable_if< + HasAddCheck::value, void>::type + LayerAdd(T* layer) const; + + //! Do not add the layer if the module doesn't implement the Add() function. + template + typename std::enable_if< + !HasAddCheck::value, void>::type + LayerAdd(T* layer) const; +}; + +/** + * GradientSetVisitor update the gradient parameter given the gradient set. + */ +class GradientSetVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientSetVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + + +/** + * GradientUpdateVisitor update the gradient parameter given the gradient set. + */ +class GradientUpdateVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientUpdateVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + +/* + * GradientZeroVisitor set the gradient to zero for the given module. + */ +class GradientZeroVisitor : public boost::static_visitor +{ + public: + //! Set the gradient to zero for the given module. + GradientZeroVisitor(); + + //! Set the gradient to zero. + template + void operator()(LayerType* layer) const; + + private: + //! Set the gradient to zero if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not set the gradient to zero if the module doesn't implement the + //! Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * SearchModeVisitor executes the Gradient() method of the given module using + * the input and delta parameter. + */ +class GradientVisitor : public boost::static_visitor +{ + public: + //! Executes the Gradient() method of the given module using the input and + //! delta parameter. + GradientVisitor(arma::mat&& input, arma::mat&& delta); + + //! Executes the Gradient() method. + template + void operator()(LayerType* layer) const; + + private: + //! The input set. + arma::mat&& input; + + //! The delta parameter. + arma::mat&& delta; + + //! Execute the Gradient() function if the module implements the Gradient() + //! function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not execute the Gradient() function if the module doesn't implement + //! the Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * RewardSetVisitor set the reward parameter given the reward value. + */ +class RewardSetVisitor : public boost::static_visitor +{ + public: + //! Set the reward parameter given the reward value. + RewardSetVisitor(const double reward); + + //! Set the reward parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The reward value. + const double reward; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; +}; + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "layer_visitor_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp new file mode 100644 index 00000000000..482b8e10ad3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp @@ -0,0 +1,1241 @@ +/** + * @file layer_visitor_impl.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP + +// In case it hasn't been included yet. +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann { + +//! LoadOutputParameterVisitor visitor class. +inline LoadOutputParameterVisitor::LoadOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void LoadOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(LoadOutputParameterVisitor(std::move(parameter)), + layer->Model()[layer->Model().size() - i - 1]); + } + + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +//! SaveOutputParameterVisitor visitor class. +inline SaveOutputParameterVisitor::SaveOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void SaveOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SaveOutputParameterVisitor(std::move(parameter)), + layer->Model()[i]); + } +} + +//! DeleteVisitor visitor class. +template +inline void DeleteVisitor::operator()(LayerType* layer) const +{ + if (layer) + delete layer; +} + +//! ForwardOutputVisitor visitor class. +inline ForwardOutputVisitor::ForwardOutputVisitor(arma::mat&& input, + arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline double ForwardOutputVisitor::operator()(LayerType* layer) const +{ + return layer->Forward(std::move(input), std::move(output)); +} + +//! ForwardVisitor visitor class. +inline ForwardVisitor::ForwardVisitor(arma::mat&& input, arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline void ForwardVisitor::operator()(LayerType* layer) const +{ + layer->Forward(std::move(input), std::move(output)); +} + +//! BackwardVisitor visitor class. +inline BackwardVisitor::BackwardVisitor(arma::mat&& input, + arma::mat&& error, + arma::mat&& delta) : + input(std::move(input)), + error(std::move(error)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void BackwardVisitor::operator()(LayerType* layer) const +{ + layer->Backward(std::move(input), std::move(error), std::move(delta)); +} + +//! ResetVisitor visitor class. +template +inline void ResetVisitor::operator()(LayerType* layer) const +{ + ResetParameter(layer); +} + +template +inline typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } + + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! InputParameterVisitor visitor class. +template +inline arma::mat& InputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->InputParameter(); +} + +//! OutputParameterVisitor visitor class. +template +inline arma::mat& OutputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->OutputParameter(); +} + +//! DeltaVisitor visitor class. +template +inline arma::mat& DeltaVisitor::operator()(LayerType *layer) const +{ + return layer->Delta(); +} + +//! ParametersVisitor visitor class. +inline ParametersVisitor::ParametersVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* layer, P& /* output */) const +{ + parameters = layer->Parameters(); +} + +//! ParametersSetVisitor visitor class. +inline ParametersSetVisitor::ParametersSetVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersSetVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* layer, P& /* output */) const +{ + layer->Parameters() = parameters; +} + +//! WeightSizeVisitor visitor class. +template +inline size_t WeightSizeVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* /* layer */, P& /* output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +//! SetInputWidthVisitor visitor class. +inline SetInputWidthVisitor::SetInputWidthVisitor(const size_t inputWidth, + const bool reset) : + inputWidth(inputWidth), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerInputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +//! SetInputHeightVisitor visitor class. +inline SetInputHeightVisitor::SetInputHeightVisitor(const size_t inputHeight, + const bool reset) : + inputHeight(inputHeight), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerInputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +//! OutputWidthVisitor visitor class. +template +inline size_t OutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! OutputHeightVisitor visitor class. +template +inline size_t OutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! LastOutputWidthVisitor visitor class. +template +inline size_t LastOutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(LastOutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! LastOutputHeightVisitor visitor class. +template +inline size_t LastOutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(LastOutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! WeightSetVisitor visitor class. +inline WeightSetVisitor::WeightSetVisitor(arma::mat&& weight, + const size_t offset) : + weight(std::move(weight)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t WeightSetVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, std::move(layer->OutputParameter())); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* /* layer */, P&& /*output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /*output */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +//! RhoVisitor visitor class. +template +inline size_t RhoVisitor::operator()(LayerType* layer) const +{ + return LayerRho(layer); +} + +template +inline typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + size_t moduleRho = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + moduleRho = boost::apply_visitor(RhoVisitor(), layer->Model()[i]); + if (moduleRho != 0) + { + return moduleRho; + } + } + + return moduleRho; +} + +template +inline typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +template +inline typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +//! DeterministicSetVisitor visitor class. +inline DeterministicSetVisitor::DeterministicSetVisitor( + const bool deterministic) : deterministic(deterministic) +{ + /* Nothing to do here. */ +} + +template +inline void DeterministicSetVisitor::operator()(LayerType* layer) const +{ + LayerDeterministic(layer); +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* /* input */) const +{ + /* Nothing to do here. */ +} + +//! AddVisitor visitor class. +template +inline AddVisitor::AddVisitor(T newLayer) : + newLayer(std::move(newLayer)) +{ + /* Nothing to do here. */ +} + +template +inline void AddVisitor::operator()(LayerType* layer) const +{ + LayerAdd(layer); +} + +template +inline typename std::enable_if< + HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* layer) const +{ + layer->Add(newLayer); +} + +template +inline typename std::enable_if< + !HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! GradientSetVisitor visitor class. +inline GradientSetVisitor::GradientSetVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientSetVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientUpdateVisitor visitor class. +inline GradientUpdateVisitor::GradientUpdateVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientUpdateVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientZeroVisitor visitor class. +inline GradientZeroVisitor::GradientZeroVisitor() +{ + /* Nothing to do here. */ +} + +template +inline void GradientZeroVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient().zeros(); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! GradientVisitor visitor class. +inline GradientVisitor::GradientVisitor(arma::mat&& input, arma::mat&& delta) : + input(std::move(input)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void GradientVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient(std::move(input), std::move(delta), + std::move(layer->Gradient())); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! RewardSetVisitor visitor class. +inline RewardSetVisitor::RewardSetVisitor(const double reward) : reward(reward) +{ + /* Nothing to do here. */ +} + +template +inline void RewardSetVisitor::operator()(LayerType* layer) const +{ + LayerReward(layer); +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* /* input */) const +{ + /* Nothing to do here. */ +} + +} // namespace ann +} // namespace mlpack + +#endif From 913430934ce4ca46b5823546863998acc2c54937 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 5 Dec 2016 16:54:23 +0100 Subject: [PATCH 26/82] Minor style fixes. --- src/mlpack/methods/ann/layer/dropout.hpp | 2 +- src/mlpack/methods/ann/layer/negative_log_likelihood.hpp | 7 ------- src/mlpack/methods/ann/layer/select.hpp | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index 57e76f6170d..b5bfa9a3e2c 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -62,7 +62,7 @@ class Dropout * @param rescale If true the input is rescaled when deterministic is False. */ Dropout(const double ratio = 0.5, - const bool rescale = true) : + const bool rescale = true) : ratio(ratio), scale(1.0 / (1.0 - ratio)), rescale(rescale) diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 2f5a863fd8d..0de8cb7cd5b 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -79,9 +79,6 @@ class NegativeLogLikelihood const arma::Mat&& target, arma::Mat&& output) { - // std::cout << "------------------------------------------------------\n"; - // std::cout << "NegativeLogLikelihood\n"; - output = arma::zeros >(input.n_rows, input.n_cols); for (size_t i = 0; i < input.n_cols; ++i) { @@ -91,10 +88,6 @@ class NegativeLogLikelihood output(currentTarget, i) = -1; } - - // std::cout << "output: \n" << output << std::endl; - - // std::cout << "------------------------------------------------------\n"; } //! Get the input parameter. diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index 15ecec36d01..d683830cc88 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -36,7 +36,7 @@ class Select * Create the Select object. * * @param index The column which should be extracted from the given input. - * @param index The number of elements that should be used. + * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0) : index(index), From 148a8a7a5ce39b3aabd7388c56c304d37a3ed4db Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 6 Dec 2016 22:29:46 +0100 Subject: [PATCH 27/82] Refactor recurrent network test. --- src/mlpack/tests/recurrent_network_test.cpp | 169 ++++++++++---------- 1 file changed, 81 insertions(+), 88 deletions(-) diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index c49ae423ae2..ff5daae9ede 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,26 +3,13 @@ * @author Marcus Edel * * Tests the recurrent network. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include -#include -#include -#include -#include -#include - -#include -#include #include -#include -#include - #include +#include +#include +#include #include #include "test_tools.hpp" @@ -78,19 +65,29 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) // times, I'm fine with that. All I want to know is that the network is able // to escape from local minima and to solve the task. size_t successes = 0; + const size_t rho = 10; for (size_t trial = 0; trial < 5; ++trial) { - // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features. - arma::mat input, labels; - GenerateNoisySines(input, labels, 10, 6); + // Generate 12 (2 * 6) noisy sines. A single sine contains rho points/features. + arma::mat input, labelsTemp; + GenerateNoisySines(input, labelsTemp, rho, 6); + + arma::mat labels = arma::zeros(rho, labelsTemp.n_cols); + for (size_t i = 0; i < labelsTemp.n_cols; ++i) + { + const int value = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + labels.col(i).fill(value); + } /* - * Construct a network with 1 input unit, 4 hidden units and 2 output units. - * The hidden layer is connected to itself. The network structure looks like: + * Construct a network with 1 input unit, 4 hidden units and 10 output + * units. The hidden layer is connected to itself. The network structure + * looks like: * * Input Hidden Output - * Layer(1) Layer(4) Layer(2) + * Layer(1) Layer(4) Layer(10) * +-----+ +-----+ +-----+ * | | | | | | * | +------>| +------>| | @@ -100,38 +97,45 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) * . . * ....... */ - LinearLayer<> linearLayer0(1, 4); - RecurrentLayer<> recurrentLayer0(4); - BaseLayer inputBaseLayer; + Add<> add(4); + Linear<> lookup(1, 4); + SigmoidLayer<> sigmoidLayer; + Linear<> linear(4, 4); + Recurrent<> recurrent(add, lookup, linear, sigmoidLayer, rho); - LinearLayer<> hiddenLayer(4, 2); - BaseLayer hiddenBaseLayer; + RNN<> model(rho); + model.Add >(); + model.Add(recurrent); + model.Add >(4, 10); + model.Add >(); - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, inputBaseLayer, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 500 * input.n_cols, -100); - - net.Train(input, labels, opt); + SGD opt(model, 0.1, 500 * input.n_cols, -100); + model.Train(input, labels, opt); arma::mat prediction; - net.Predict(input, prediction); + model.Predict(input, prediction); size_t error = 0; - for (size_t i = 0; i < labels.n_cols; i++) + for (size_t i = 0; i < prediction.n_cols; ++i) { - if (arma::sum(arma::sum(arma::abs(prediction.col(i) - labels.col(i)))) == 0) + arma::mat singlePrediction = prediction.submat((rho - 1) * rho, i, + rho * rho - 1, i); + + const int predictionValue = arma::as_scalar(arma::find( + arma::max(singlePrediction.col(0)) == + singlePrediction.col(0), 1) + 1); + + const int targetValue = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + + if (predictionValue == targetValue) { error++; } } - double classificationError = 1 - double(error) / labels.n_cols; + double classificationError = 1 - double(error) / prediction.n_cols; + if (classificationError <= 0.2) { ++successes; @@ -279,9 +283,7 @@ void GenerateNextEmbeddedReber(const arma::Mat& transitions, /** * Train the specified network and the construct a Reber grammar dataset. */ -template -void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, - bool embedded = false) +void ReberGrammarTestNetwork(bool embedded = false) { // Reber state transition matrix. (The last two columns are the indices to the // next path). @@ -346,36 +348,34 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, * | | | | | | * | +------>| +------>| | * | | ..>| | | | - * +-----+ . +--+--+ +-----+ + * +-----+ . +--+--+ +-- ---+ * . . * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(7, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); + const size_t outputSize = 7; + const size_t inputSize = 7; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - LinearLayer<>hiddenLayer(10, 7); - BaseLayer hiddenBaseLayer; + RNN > model(rho); - BinaryClassificationLayer classOutputLayer; + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; - for (size_t i = 0; i < 15; i++) + for (size_t i = 0; i < 40; i++) { for (size_t j = 0; j < trainReberGrammarCount; j++) { inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + + model.Train(inputTemp, labelsTemp, opt); } } @@ -384,10 +384,11 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, // Ask the network to predict the next Reber grammar in the given sequence. for (size_t i = 0; i < testReberGrammarCount; i++) { - arma::mat output; + arma::mat output, prediction; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, prediction); + data::Binarize(prediction, output, 0.5); const size_t reberGrammerSize = 7; std::string inputReber = ""; @@ -429,8 +430,7 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, */ BOOST_AUTO_TEST_CASE(ReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM); + ReberGrammarTestNetwork(false); } /** @@ -438,8 +438,7 @@ BOOST_AUTO_TEST_CASE(ReberGrammarTest) */ BOOST_AUTO_TEST_CASE(EmbeddedReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM, true); + ReberGrammarTestNetwork(true); } /* @@ -490,7 +489,6 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) for (size_t i = 2; i < 8; i++) input(2 + rand() % 6, index(i)) = 1; - // Set the prompts which direct the network to give an answer. input(8, 8) = 1; input(9, 9) = 1; @@ -503,8 +501,7 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) * Train the specified network and the construct distracted sequence recall * dataset. */ -template -void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) +void DistractedSequenceRecallTestNetwork() { const size_t trainDistractedSequenceCount = 1000; const size_t testDistractedSequenceCount = 1000; @@ -538,22 +535,18 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(10, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); - - LinearLayer<> hiddenLayer(10, 3); - TanHLayer<> hiddenBaseLayer; - - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); + const size_t outputSize = 3; + const size_t inputSize = 10; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - RNN net(modules, classOutputLayer); + RNN > model(rho); + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - SGD opt(net, 0.04, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; for (size_t i = 0; i < 40; i++) @@ -563,7 +556,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + model.Train(inputTemp, labelsTemp, opt); } } @@ -576,7 +569,8 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) arma::mat output; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, output); + data::Binarize(output, output, 0.5); if (arma::accu(arma::abs(testLabels.at(0, i) - output)) != 0) error += 1; @@ -597,8 +591,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) */ BOOST_AUTO_TEST_CASE(DistractedSequenceRecallTest) { - LSTMLayer<> hiddenLayerLSTMPeephole(10, true); - DistractedSequenceRecallTestNetwork(hiddenLayerLSTMPeephole); + DistractedSequenceRecallTestNetwork(); } BOOST_AUTO_TEST_SUITE_END(); From c0311c422eaaa47fde73a875eaf8b9b0479ce557 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 7 Dec 2016 19:21:41 +0100 Subject: [PATCH 28/82] Remove unused pooling test. --- src/mlpack/tests/pooling_rules_test.cpp | 80 ------------------------- 1 file changed, 80 deletions(-) delete mode 100644 src/mlpack/tests/pooling_rules_test.cpp diff --git a/src/mlpack/tests/pooling_rules_test.cpp b/src/mlpack/tests/pooling_rules_test.cpp deleted file mode 100644 index 0dd2c9dcb24..00000000000 --- a/src/mlpack/tests/pooling_rules_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file convolution_test.cpp - * @author Marcus Edel - * - * Tests for various convolution strategies. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PoolingTest); - -/** - * Test the max pooling rule. - */ -BOOST_AUTO_TEST_CASE(MaxPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 36; - - MaxPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - BOOST_REQUIRE_EQUAL(arma::accu(output), input.max()); -} - -/** - * Test the mean pooling rule. - */ -BOOST_AUTO_TEST_CASE(MeanPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 18.5; - - MeanPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - bool b = arma::all(arma::vectorise(output) == (input.max() / input.n_elem)); - BOOST_REQUIRE_EQUAL(b, true); -} - -BOOST_AUTO_TEST_SUITE_END(); From efa533ac99b4debf8734cae4b06b1e0379b1d344 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 8 Dec 2016 23:19:17 +0100 Subject: [PATCH 29/82] Refactor FNN class; works for CNNs and FFNs --- src/mlpack/methods/ann/ffn.hpp | 398 +++++++----------------- src/mlpack/methods/ann/ffn_impl.hpp | 461 ++++++++++++++++------------ 2 files changed, 383 insertions(+), 476 deletions(-) diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index f9bc4d53a8b..6a6f013683d 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -14,10 +14,10 @@ #include -#include -#include +#include +#include +#include #include -#include #include namespace mlpack { @@ -26,25 +26,20 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of a standard feed forward network. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> + +// NguyenWidrowInitialization +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class FFN { public: //! Convenience typedef for the internal model construction. - using NetworkType = FFN; + using NetworkType = FFN; /** * Create the FFN object with the given predictors and responses set (this is @@ -52,100 +47,32 @@ class FFN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + FFN(OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the FFN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the FNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. + * @param responses Outputs results from input training variables. * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Train the feedforward network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, const arma::mat& responses); + FFN(const arma::mat& predictors, + const arma::mat& responses, + OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Train the feedforward network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(OptimizerType& optimizer); + //! Destructor to release allocated memory. + ~FFN(); /** * Train the feedforward network on the given input data using the given @@ -170,7 +97,7 @@ class FFN /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -184,7 +111,7 @@ class FFN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ double Evaluate(const arma::mat& parameters, const size_t i, @@ -203,6 +130,21 @@ class FFN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -216,214 +158,61 @@ class FFN void Serialize(Archive& ar, const unsigned int /* version */); private: + // Helper functions. /** - * Reset the network by zeroing the layer activations and by setting the - * layer status. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. * - * enable_if (SFINAE) is used to iterate through the network. The general - * case peels off the first type and recurses, as usual with - * variadic function templates. + * @param input Data sequence to compute probabilities for. */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } + void Forward(arma::mat&& input); /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } + void Backward(); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void Gradient(); /** - * Link the calculated activation with the connection layer. + * Reset the module infomration (weights/parameters). */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Measures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + void ResetParameters(); /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + void ResetDeterministic(); /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + void ResetGradients(arma::mat& gradient); - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated feedforward network. - LayerTypes network; - - //! The output layer used to evaluate the network + //! Instantiated outputlayer used to evaluate the network. OutputLayerType outputLayer; - //! Performance strategy used to calculate the error. - PerformanceFunction performanceFunc; + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The input width. + size_t width; - //! Matrix of (trained) parameters. - arma::mat parameter; + //! The input height. + size_t height; + + //! Indicator if we already trained the model. + bool reset; + + //! Locally-stored model modules. + std::vector network; //! The matrix of data points (predictors). arma::mat predictors; @@ -431,11 +220,56 @@ class FFN //! The matrix of responses to the input data points. arma::mat responses; + //! Matrix of (trained) parameters. + arma::mat parameter; + //! The number of separable functions (the number of predictor points). size_t numFunctions; - //! Locally stored backward error. + //! The current error for the backward pass. arma::mat error; + + //! THe current input of the forward/backward pass. + arma::mat currentInput; + + //! THe current target of the forward/backward pass. + arma::mat currentTarget; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! The current evaluation mode (training or testing). + bool deterministic; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient parameter. + arma::mat gradient; }; // class FFN } // namespace ann diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index 5b1cc611871..d640781fbb2 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -19,170 +19,76 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) +template +FFN::FFN(OutputLayerType&& outputLayer, InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, +template +FFN::FFN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) + OutputLayerType&& outputLayer, + InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); + if (!reset) + { + ResetParameters(); + } } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) +template +FFN::~FFN() { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deleteVisitor)); } -template +template template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) -{ - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void FFN::Train(const arma::mat& predictors, const arma::mat& responses, OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + } -template -template< - template class OptimizerType -> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("ffn_optimization"); const double out = optimizer.Optimize(parameter); @@ -192,101 +98,268 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +template +void FFN::Predict(arma::mat& predictors, arma::mat& responses) { - deterministic = true; + if (parameter.is_empty()) + { + ResetParameters(); + } + + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } arma::mat responsesTemp; - ResetParameter(network); - Forward(arma::mat(predictors.colptr(0), predictors.n_rows, 1, false, true), - network); - OutputPrediction(responsesTemp, network); + Forward(std::move(arma::mat(predictors.colptr(0), + predictors.n_rows, 1, false, true))); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()).col(0); responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); responses.col(0) = responsesTemp.col(0); for (size_t i = 1; i < predictors.n_cols; i++) { - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(arma::mat(predictors.colptr(i), + predictors.n_rows, 1, false, true))); - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()); responses.col(i) = responsesTemp.col(0); } } -template -double FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, +template +double FFN::Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic) { - this->deterministic = deterministic; + if (parameter.is_empty()) + { + ResetParameters(); + } + + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } - ResetParameter(network); + currentInput = std::move(arma::mat(predictors.colptr(i), + predictors.n_rows, 1, false, true)); - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(currentInput)); - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); + currentTarget = arma::mat(responses.colptr(i), responses.n_rows, + 1, false, true); + + double res = outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); + + return res; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, +template +void FFN::Gradient(const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } + else + { + gradient.zeros(); + } + + Evaluate(parameters, i, false); + outputLayer.Backward(std::move(boost::apply_visitor(outputParameterVisitor, + network.back())), std::move(currentTarget), std::move(error)); - Evaluate(parameter, i, false); + Backward(); + ResetGradients(gradient); + Gradient(); +} - NetworkGradients(gradient, network); +template +void FFN::ResetParameters() +{ + size_t weights = 0; + for (size_t i = 0; i < network.size(); ++i) + { + weights += boost::apply_visitor(weightSizeVisitor, network[i]); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); - Backward<>(error, network); - UpdateGradients<>(network); + boost::apply_visitor(resetVisitor, network[i]); + } +} + +template +void FFN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} + +template +void FFN::ResetGradients(arma::mat& gradient) +{ + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), network[i]); + } } -template +template +void FFN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } +} + +template +void FFN::Backward() +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(error), std::move( + boost::apply_visitor(deltaVisitor, network.back()))), network.back()); + + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } +} + +template +void FFN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - 2])), std::move(error)), + network[network.size() - 1]); +} + +template template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void FFN::Serialize(Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(width, "width"); + ar & data::CreateNVP(height, "height"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); + + boost::apply_visitor(resetVisitor, network[i]); + } } } From b00fc86fc2cbd3283b53d57a8e0264adeaa96b7f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 9 Dec 2016 21:40:20 +0100 Subject: [PATCH 30/82] Refactor RNN class; works will all current modules including the updated recurrent module. --- src/mlpack/methods/ann/rnn.hpp | 749 +++++----------------------- src/mlpack/methods/ann/rnn_impl.hpp | 525 ++++++++++--------- 2 files changed, 415 insertions(+), 859 deletions(-) diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp index 6b9483cd831..6c5c69968d8 100644 --- a/src/mlpack/methods/ann/rnn.hpp +++ b/src/mlpack/methods/ann/rnn.hpp @@ -14,39 +14,29 @@ #include -#include - -#include -#include -#include -#include +#include +#include +#include #include namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of a standard recurrent neural network. + * Implementation of a standard recurrent neural network container. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class RNN { public: //! Convenience typedef for the internal model construction. - using NetworkType = RNN; + using NetworkType = RNN; /** * Create the RNN object with the given predictors and responses set (this is @@ -54,100 +44,40 @@ class RNN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + RNN(const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the RNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. + * @param responses Outputs results from input training variables. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. + * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, + RNN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Create the RNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Train the recurrent neural network on the given input data. By default, the - * SGD optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::RMSprop). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::SGD - > - void Train(const arma::mat& predictors, const arma::mat& responses); - - /** - * Train the recurrent neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::SGD - > - void Train(OptimizerType& optimizer); + //! Destructor to release allocated memory. + ~RNN(); /** * Train the recurrent neural network on the given input data using the given @@ -172,7 +102,7 @@ class RNN /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -186,9 +116,9 @@ class RNN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ - double Evaluate(const arma::mat& parameters, + double Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic = true); @@ -206,6 +136,29 @@ class RNN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -219,575 +172,117 @@ class RNN void Serialize(Archive& ar, const unsigned int /* version */); private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const DataType& input, DataType& output) - { - deterministic = true; - seqLen = input.n_rows / inputSize; - ResetParameter(network); - - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) - { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); - - // Retrieve output of the subsequence. - if (seqOutput) - { - DataType seqOutput; - OutputPrediction(seqOutput, network); - output = arma::join_cols(output, seqOutput); - } - } - - // Retrieve output of the complete sequence. - if (!seqOutput) - OutputPrediction(output, network); - } - - /** - * Reset the network by clearing the layer activations and by setting the - * layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) - { - activations.clear(); - } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetSeqLen(std::get(network)); - ResetRecurrent(std::get(network), std::get(network).InputParameter()); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * for all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } - - /** - * Reset the layer sequence length by setting the current seqLen parameter - * for all layer that implement the SeqLen function. - */ - template - typename std::enable_if< - HasSeqLenCheck::value, void>::type - ResetSeqLen(T& layer) - { - layer.SeqLen() = seqLen; - } - - template - typename std::enable_if< - !HasSeqLenCheck::value, void>::type - ResetSeqLen(T& /* unused */) { /* Nothing to do here */ } - - /** - * Distinguish between recurrent layer and non-recurrent layer when resetting - * the recurrent parameter. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& layer, P& /* unused */) - { - layer.RecurrentParameter().zeros(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& /* unused */, P& /* unused */) - { - /* Nothing to do here */ - } - + // Helper functions. /** - * Initialize the network by setting the input size and output size. - */ - template - typename std::enable_if::type - InitLayer(const InputDataType& /* unused */, - const TargetDataType& target, - std::tuple& /* unused */) - { - seqOutput = outputSize < target.n_elem ? true : false; - } - - template - typename std::enable_if::type - InitLayer(const InputDataType& input, - const TargetDataType& target, - std::tuple& network) - { - Init(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - InitLayer(input, target, - network); - } - - /** - * Retrieve the weight matrix for all layer that implement the Weights - * function to extract the input size and output size. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. + * + * @param input Data sequence to compute probabilities for. */ - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Init(T& layer, P& /* unused */, D& /* unused */) - { - // Initialize the input size only once. - if (!inputSize) - inputSize = layer.Weights().n_cols; - - outputSize = layer.Weights().n_rows; - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Init(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + void Forward(arma::mat&& input); /** - * Save the network layer activations. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& /* unused */) - { - Save(I, std::get(network), std::get(network).InputParameter()); - LinkRecurrent(network); - } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& network) - { - Save(I, std::get(network), std::get(network).InputParameter()); - SaveActivations(network); - } + void Backward(); /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.OutputParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter(); - } - - /** - * Load the network layer activations. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - std::get<0>(network).InputParameter() = input; - } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - LoadActivations(input, network); - } + void Gradient(); - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. + /* + * Predict the response of the given input sequence. + * + * @param predictors Input predictors. + * @param responses Vector to put output prediction of a response into. */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum); - } + void SinglePredict(const arma::mat& predictors, arma::mat& responses); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Reset the module infomration (weights/parameters). */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void ResetParameters(); /** - * Link the calculated activation with the correct layer. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - } - - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } + void ResetDeterministic(); /** - * Link the calculated activation with the correct recurrent layer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& /* unused */) { /* Nothing to do here */ } + void ResetGradients(arma::mat& gradient); - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& network) - { - UpdateRecurrent(std::get(network), std::get(network).InputParameter(), - std::get(network).OutputParameter()); - LinkRecurrent(network); - } + //! Number of steps to backpropagate through time (BPTT). + size_t rho; - /** - * Distinguish between recurrent layer and non-recurrent layer when updating - * the recurrent activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& layer, P& /* unused */, D& output) - { - layer.RecurrentParameter() = output; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - void Backward(DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, std::tuple& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - BackwardRecurrent(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + //! The input size. + size_t inputSize; - /* - * Update the delta of the recurrent layer. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& layer, P& /* unused */, D& delta) - { - if (!layer.Delta().is_empty()) - delta += layer.Delta(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! The output size. + size_t outputSize; - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - } - - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value && - HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& delta2) - { - layer.Gradient(layer.InputParameter(), delta2, layer.Gradient()); - } - - template - typename std::enable_if< - (!HasGradientCheck::value && - !HasRecurrentParameterCheck::value) || - (!HasGradientCheck::value && - HasRecurrentParameterCheck::value), void>::type - Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if< - HasGradientCheck::value && - !HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - layer.Gradient(layer.InputParameter(), delta1, layer.Gradient()); - } + //! The target size. + size_t targetSize; - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } + //! Indicator if we already trained the model. + bool reset; - //! Instantiated recurrent neural network. - LayerTypes network; + //! Only predict the last element of the input sequence. + bool single; - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; + //! Locally-stored model modules. + std::vector network; - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; + //! The matrix of data points (predictors). + arma::mat predictors; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The matrix of responses to the input data points. + arma::mat responses; //! Matrix of (trained) parameters. arma::mat parameter; - //! The matrix of data points (predictors). - arma::mat predictors; + //! The number of separable functions (the number of predictor points). + size_t numFunctions; - //! The matrix of responses to the input data points. - arma::mat responses; + //! The current error for the backward pass. + arma::mat error; - //! Locally stored network input size. - size_t inputSize; + //! THe current input of the forward/backward pass. + arma::mat currentInput; - //! Locally stored network output size. - size_t outputSize; + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; - //! The index of the current sequence number. - size_t seqNum; + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; - //! Locally stored number of samples in one input sequence. - size_t seqLen; + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; - //! Locally stored parameter that indicates if the input is a sequence. - bool seqOutput; + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; - //! The activation storage we are using to perform the feed backward pass. - boost::ptr_vector activations; + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; - //! The number of separable functions (the number of predictor points). - size_t numFunctions; + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; - //! Locally stored backward error. - arma::mat error; + //! The current evaluation mode (training or testing). + bool deterministic; }; // class RNN } // namespace ann diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp index d8d2f07f0ac..a2abb2ce6c3 100644 --- a/src/mlpack/methods/ann/rnn_impl.hpp +++ b/src/mlpack/methods/ann/rnn_impl.hpp @@ -19,176 +19,91 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols), +template +RNN::RNN(const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); - - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, +template +RNN::RNN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); -} - -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - inputSize(0), - outputSize(0) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + if (!reset) + { + ResetParameters(); + reset = true; + } } -template -template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) +template +RNN::~RNN() { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); - - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } } -template +template template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void RNN::Train(const arma::mat& predictors, const arma::mat& responses, OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + reset = true; + } -template -template< - template class OptimizerType -> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("rnn_optimization"); const double out = optimizer.Optimize(parameter); @@ -198,96 +113,128 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +template +void RNN::Predict(arma::mat& predictors, arma::mat& responses) { - arma::mat responsesTemp; - SinglePredict(arma::mat(predictors.colptr(0), predictors.n_rows, - 1, false, true), responsesTemp); + if (parameter.is_empty()) + { + ResetParameters(); + } - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } + + responses = arma::zeros(outputSize * rho, predictors.n_cols); + arma::mat responsesTemp = responses.col(0); + + for (size_t i = 0; i < predictors.n_cols; i++) + { + SinglePredict( + arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), + responsesTemp); + + responses.col(i) = responsesTemp; + } +} - for (size_t i = 1; i < predictors.n_cols; i++) +template +void RNN::SinglePredict(const arma::mat& predictors, arma::mat& responses) +{ + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - SinglePredict(arma::mat(predictors.colptr(i), predictors.n_rows, - 1, false, true), responsesTemp); - responses.col(i) = responsesTemp.col(0); + currentInput = predictors.rows(seqNum * inputSize, + (seqNum + 1) * inputSize - 1); + Forward(std::move(currentInput)); + + responses.rows(seqNum * outputSize, (seqNum + 1) * outputSize - 1) = + boost::apply_visitor(outputParameterVisitor, network.back()); } } -template -double RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, +template +double RNN::Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic) { - this->deterministic = deterministic; + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true); arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Initialize the activation storage only once. - if (activations.empty()) - InitLayer(input, target, network); - - double networkError = 0; - seqLen = input.n_rows / inputSize; - ResetParameter(network); + if (!inputSize) + { + inputSize = input.n_elem / rho; + targetSize = target.n_elem / rho; + } - error = arma::mat(outputSize, outputSize < target.n_elem ? seqLen : 1); + double performance = 0; - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); + currentInput = input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1); + arma::mat currentTarget = target.rows(seqNum * targetSize, + (seqNum + 1) * targetSize - 1); - // Retrieve output error of the subsequence. - if (seqOutput) + Forward(std::move(currentInput)); + + if (!deterministic) { - arma::mat seqError = error.unsafe_col(seqNum); - arma::mat seqTarget = target.submat(seqNum * outputSize, 0, - (seqNum + 1) * outputSize - 1, 0); - networkError += OutputError(seqTarget, seqError, network); + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } } + + performance += outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); } - // Retrieve output error of the complete sequence. - if (!seqOutput) - return OutputError(target, error, network); + if (!outputSize) + { + outputSize = boost::apply_visitor(outputParameterVisitor, + network.back()).n_elem; + } - return networkError; + return performance; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, +template +void RNN::Gradient(const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } else @@ -295,59 +242,173 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction gradient.zeros(); } - Evaluate(parameter, i, false); + Evaluate(parameters, i, false); - arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols); - NetworkGradients(currentGradient, network); + arma::mat currentGradient = arma::zeros(parameter.n_rows, + parameter.n_cols); + ResetGradients(currentGradient); - const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + 1, false, true); + arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Iterate through the input sequence and perform the feed backward pass. - for (seqNum = seqLen - 1; seqNum >= 0; seqNum--) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Load the network activation for the upcoming backward pass. - LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) * - inputSize - 1), network); + currentGradient.zeros(); + + arma::mat currentTarget = target.rows((rho - seqNum - 1) * targetSize, + (rho - seqNum) * targetSize - 1); + currentInput = input.rows((rho - seqNum - 1) * inputSize, + (rho - seqNum) * inputSize - 1); - // Perform the backward pass. - if (seqOutput) + for (size_t l = 0; l < network.size(); ++l) { - arma::mat seqError = error.unsafe_col(seqNum); - Backward(seqError, network); + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (single && seqNum > 0) + { + error.zeros(); } else { - Backward(error, network); + outputLayer.Backward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget), + std::move(error)); } - // Link the parameters and update the gradients. - LinkParameter(network); - UpdateGradients<>(network); - - // Update the overall gradient. + Backward(); + Gradient(); gradient += currentGradient; + } +} + +template +void RNN::ResetParameters() +{ + size_t weights = 0; + for (LayerTypes& layer : network) + { + weights += boost::apply_visitor(weightSizeVisitor, layer); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } +} + +template +void RNN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} - if (seqNum == 0) break; +template +void RNN::ResetGradients(arma::mat& gradient) +{ + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), layer); } } -template +template +void RNN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + } +} + +template +void RNN::Backward() +{ + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network.back())), + std::move(error), std::move(boost::apply_visitor(deltaVisitor, + network.back()))), network.back()); + + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + network[network.size() - i])), std::move(boost::apply_visitor( + deltaVisitor, network[network.size() - i + 1])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i]))), + network[network.size() - i]); + } +} + +template +void RNN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(deltaVisitor, network[i + 1]))), + network[i]); + } +} + +template template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void RNN::Serialize(Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(single, "single"); + ar & data::CreateNVP(inputSize, "inputSize"); + ar & data::CreateNVP(outputSize, "outputSize"); + ar & data::CreateNVP(targetSize, "targetSize"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } } } From 558d05bf471f15b004aa07c1461f3e7c2df3c49f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 10 Dec 2016 21:46:54 +0100 Subject: [PATCH 31/82] Include all layer modules. --- src/mlpack/methods/ann/layer/layer.hpp | 30 +++++ src/mlpack/methods/ann/layer/layer_types.hpp | 117 +++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/layer.hpp create mode 100644 src/mlpack/methods/ann/layer/layer_types.hpp diff --git a/src/mlpack/methods/ann/layer/layer.hpp b/src/mlpack/methods/ann/layer/layer.hpp new file mode 100644 index 00000000000..c1de77741e9 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer.hpp @@ -0,0 +1,30 @@ +/** + * @file layer.hpp + * @author Marcus Edel + * + * This includes various layers to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_HPP + +#include "add_merge.hpp" +#include "concat_performance.hpp" +#include "convolution.hpp" +#include "dropconnect.hpp" +#include "glimpse.hpp" +#include "layer_types.hpp" +#include "linear.hpp" +#include "linear_no_bias.hpp" +#include "lstm.hpp" +#include "recurrent.hpp" +#include "recurrent_attention.hpp" +#include "sequential.hpp" +#include "concat.hpp" +#include "vr_class_reward.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp new file mode 100644 index 00000000000..156616864db --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_types.hpp @@ -0,0 +1,117 @@ +/** + * @file layer_types.hpp + * @author Marcus Edel + * + * This provides a list of all modules that can be used to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP + +#include + +// Layer modules. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Convolution modules. +#include +#include +#include + +namespace mlpack { +namespace ann { + +template class AddMerge; +template class Concat; +template class DropConnect; +template class Glimpse; +template class Linear; +template class LinearNoBias; +template class LSTM; +template class Recurrent; +template class Sequential; +template class VRClassReward; + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +class ConcatPerformance; + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +class Convolution; + +template< + typename InputDataType, + typename OutputDataType +> +class RecurrentAttention; + +using LayerTypes = boost::variant< + Add*, + AddMerge*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + Concat*, + ConcatPerformance, + arma::mat, arma::mat>*, + Constant*, + Convolution, + NaiveConvolution, + NaiveConvolution, arma::mat, arma::mat>*, + DropConnect*, + Dropout*, + Glimpse*, + HardTanH*, + Join*, + LeakyReLU*, + Linear*, + LinearNoBias*, + LogSoftMax*, + Lookup*, + LSTM*, + MaxPooling*, + MeanPooling*, + MeanSquaredError*, + MultiplyConstant*, + NegativeLogLikelihood*, + Recurrent*, + RecurrentAttention*, + ReinforceNormal*, + Select*, + Sequential*, + VRClassReward* +>; + +} // namespace ann +} // namespace mlpack + +#endif From 30adb3e505afc189322b8d6f4623a43e5ca8d6ef Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 13:52:50 +0100 Subject: [PATCH 32/82] Minor style fixes. --- src/mlpack/methods/ann/layer/convolution.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index be7fb7d6a5b..03477c6ec1e 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -99,7 +99,7 @@ class Convolution void Reset() { weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false,false); + outSize * inSize, false, false); bias = arma::mat(weights.memptr() + weight.n_elem, outSize, 1, false, false); } @@ -196,8 +196,6 @@ class Convolution { gTemp.slice(inMap) += output; } - - } } From e0753876840a6269b264e959472f4b1784f54d0b Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 13:55:03 +0100 Subject: [PATCH 33/82] Add layer traits to check for the input width, height and model function. --- src/mlpack/methods/ann/layer/layer_traits.hpp | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/ann/layer/layer_traits.hpp b/src/mlpack/methods/ann/layer/layer_traits.hpp index a8671d62f5d..ff4fbf2d387 100644 --- a/src/mlpack/methods/ann/layer/layer_traits.hpp +++ b/src/mlpack/methods/ann/layer/layer_traits.hpp @@ -64,27 +64,42 @@ HAS_MEM_FUNC(Gradient, HasGradientCheck); // function. HAS_MEM_FUNC(Deterministic, HasDeterministicCheck); -// This gives us a HasRecurrentParameterCheck type (where U is a function -// pointer) we can use with SFINAE to catch when a type has a -// RecurrentParameter() function. -HAS_MEM_FUNC(RecurrentParameter, HasRecurrentParameterCheck); +// This gives us a HasParametersCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Parameters, HasParametersCheck); -// This gives us a HasSeqLenCheck type (where U is a function pointer) we -// can use with SFINAE to catch when a type has a SeqLen() function. -HAS_MEM_FUNC(SeqLen, HasSeqLenCheck); +// This gives us a HasAddCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Add, HasAddCheck); -// This gives us a HasWeightsCheck type (where U is a function pointer) we +// This gives us a HasModelCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Weights() function. -HAS_MEM_FUNC(Weights, HasWeightsCheck); +HAS_MEM_FUNC(Model, HasModelCheck); // This gives us a HasLocationCheck type (where U is a function pointer) // we can use with SFINAE to catch when a type has a Location() function. HAS_MEM_FUNC(Location, HasLocationCheck); +// This gives us a HasResetCheck type (where U is a function pointer) +// we can use with SFINAE to catch when a type has a Location() function. +HAS_MEM_FUNC(Reset, HasResetCheck); + // This gives us a HasRewardCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Reward() function. HAS_MEM_FUNC(Reward, HasRewardCheck); +// This gives us a HasInputWidth type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputWidth() function. +HAS_MEM_FUNC(InputWidth, HasInputWidth); + +// This gives us a HasInputHeight type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputHeight() function. +HAS_MEM_FUNC(InputHeight, HasInputHeight); + +// This gives us a HasRho type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Rho() function. +HAS_MEM_FUNC(InputHeight, HasRho); + } // namespace ann } // namespace mlpack From a7059a1e946d97714b1ef61fa318d9da2bf7ac25 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:47:53 +0100 Subject: [PATCH 34/82] Refactor neural visual attention modules. --- src/mlpack/methods/ann/layer/glimpse.hpp | 592 ++++++++++++++++++ .../methods/ann/layer/recurrent_attention.hpp | 408 ++++++++++++ .../methods/ann/layer/reinforce_normal.hpp | 140 +++++ .../methods/ann/layer/vr_class_reward.hpp | 191 ++++++ 4 files changed, 1331 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/glimpse.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_attention.hpp create mode 100644 src/mlpack/methods/ann/layer/reinforce_normal.hpp create mode 100644 src/mlpack/methods/ann/layer/vr_class_reward.hpp diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp new file mode 100644 index 00000000000..37db36b1edc --- /dev/null +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -0,0 +1,592 @@ +/** + * @file glimpse.hpp + * @author Marcus Edel + * + * Definition of the GlimpseLayer class, which takes an input image and a + * location to extract a retina-like representation of the input image at + * different increasing scales. + * + * For more information, see the following. + * + * @code + * @article{CoRR2014, + * author = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, + * title = {Recurrent Models of Visual Attention}, + * journal = {CoRR}, + * volume = {abs/1406.6247}, + * year = {2014}, + * } + * @endcode + */ +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP + +#include + +#include "layer_types.hpp" +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + + +/* + * The mean pooling rule for convolution neural networks. Average all values + * within the receptive block. + */ +class MeanPoolingRule +{ + public: + /* + * Return the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + double Pooling(const MatType& input) + { + return arma::mean(arma::mean(input)); + } + + /* + * Set the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + * @param value The unpooled value. + * @param output The unpooled output data. + */ + template + void Unpooling(const MatType& input, const double value, MatType& output) + { + output = arma::zeros(input.n_rows, input.n_cols); + const double mean = arma::mean(arma::mean(input)); + + output.elem(arma::find(mean == input, 1)).fill(value); + } +}; + +/** + * The glimpse layer returns a retina-like representation + * (down-scaled cropped images) of increasing scale around a given location in a + * given image. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Glimpse +{ + public: + + /** + * Create the GlimpseLayer object using the specified ratio and rescale + * parameter. + * + * @param inSize The size of the input units. + * @param size The used glimpse size (height = width). + * @param depth The number of patches to crop per glimpse. + * @param scale The scaling factor used to create the increasing retina-like + * representation. + * @param inputWidth The input width of the given input data. + * @param inputHeight The input height of the given input data. + */ + Glimpse(const size_t inSize, + const size_t size, + const size_t depth = 3, + const size_t scale = 2, + const size_t inputWidth = 0, + const size_t inputHeight = 0) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of the glimpse layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } + + /** + * Ordinary feed backward pass of the glimpse layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType& InputParameter() const {return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const {return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the detla. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Set the locationthe x and y coordinate of the center of the output + //! glimpse. + void Location(const arma::mat& location) + { + this->location = location; + } + + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + private: + /* + * Transform the given input by changing rows to columns. + * + * @param w The input matrix used to perform the transformation. + */ + void Transform(arma::mat& w) + { + arma::mat t = w; + + for (size_t i = 0, k = 0; i < w.n_elem; k++) + { + for (size_t j = 0; j < w.n_cols; j++, i++) + { + w(k, j) = t(i); + } + } + } + + /* + * Transform the given input by changing rows to columns. + * + * @param w The input matrix used to perform the transformation. + */ + void Transform(arma::cube& w) + { + for (size_t i = 0; i < w.n_slices; i++) + { + arma::mat t = w.slice(i); + Transform(t); + w.slice(i) = t; + } + } + + /** + * Apply pooling to the input and store the results to the output parameter. + * + * @param kSize the kernel size used to perform the pooling operation. + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + */ + template + void Pooling(const size_t kSize, + const arma::Mat& input, + arma::Mat& output) + { + + const size_t rStep = kSize; + const size_t cStep = kSize; + + for (size_t j = 0; j < input.n_cols; j += cStep) + { + for (size_t i = 0; i < input.n_rows; i += rStep) + { + output(i / rStep, j / cStep) += pooling.Pooling( + input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1))); + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param input The input to be apply the unpooling rule. + * @param error The error used to perform the unpooling operation. + * @param output The pooled result. + */ + template + void Unpooling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + const size_t rStep = input.n_rows / error.n_rows; + const size_t cStep = input.n_cols / error.n_cols; + + arma::Mat unpooledError; + for (size_t j = 0; j < input.n_cols; j += cStep) + { + for (size_t i = 0; i < input.n_rows; i += rStep) + { + const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)); + + pooling.Unpooling(inputArea, error(i / rStep, j / cStep), + unpooledError); + + output(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)) += unpooledError; + } + } + } + + /** + * Apply ReSampling to the input and store the results in the output + * parameter. + * + * @param input The input to be apply the ReSampling rule. + * @param output The pooled result. + */ + template + void ReSampling(const arma::Mat& input, arma::Mat& output) + { + double wRatio = (double) (input.n_rows - 1) / (size - 1); + double hRatio = (double) (input.n_cols - 1) / (size - 1); + + double iWidth = input.n_rows - 1; + double iHeight = input.n_cols - 1; + + for (size_t y = 0; y < size; y++) + { + for (size_t x = 0; x < size; x++) + { + double ix = wRatio * x; + double iy = hRatio * y; + + // Get the 4 nearest neighbors. + double ixNw = std::floor(ix); + double iyNw = std::floor(iy); + double ixNe = ixNw + 1; + double iySw = iyNw + 1; + + // Get surfaces to each neighbor. + double se = (ix - ixNw) * (iy - iyNw); + double sw = (ixNe - ix) * (iy - iyNw); + double ne = (ix - ixNw) * (iySw - iy); + double nw = (ixNe - ix) * (iySw - iy); + + // Calculate the weighted sum. + output(y, x) = input(iyNw, ixNw) * nw + + input(iyNw, std::min(ixNe, iWidth)) * ne + + input(std::min(iySw, iHeight), ixNw) * sw + + input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se; + } + } + } + + /** + * Apply DownwardReSampling to the input and store the results into the output + * parameter. + * + * @param input The input to be apply the DownwardReSampling rule. + * @param error The error used to perform the DownwardReSampling operation. + * @param output The DownwardReSampled result. + */ + template + void DownwardReSampling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + double iWidth = input.n_rows - 1; + double iHeight = input.n_cols - 1; + + double wRatio = iWidth / (size - 1); + double hRatio = iHeight / (size - 1); + + for (size_t y = 0; y < size; y++) + { + for (size_t x = 0; x < size; x++) + { + double ix = wRatio * x; + double iy = hRatio * y; + + // Get the 4 nearest neighbors. + double ixNw = std::floor(ix); + double iyNw = std::floor(iy); + double ixNe = ixNw + 1; + double iySw = iyNw + 1; + + // Get surfaces to each neighbor. + double se = (ix - ixNw) * (iy - iyNw); + double sw = (ixNe - ix) * (iy - iyNw); + double ne = (ix - ixNw) * (iySw - iy); + double nw = (ixNe - ix) * (iySw - iy); + + double ograd = error(y, x); + + output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd; + output(iyNw, std::min(ixNe, iWidth)) = output(iyNw, + std::min(ixNe, iWidth)) + ne * ograd; + output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight), + ixNw) + sw * ograd; + output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output( + std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd; + } + } + } + + //! The size of the input units. + size_t inSize; + + //! The used glimpse size (height = width). + size_t size; + + //! The number of patches to crop per glimpse. + size_t depth; + + //! The scale fraction. + size_t scale; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored depth of the input. + size_t inputDepth; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube outputTemp; + + //! The x and y coordinate of the center of the output glimpse. + arma::mat location; + + //! Locally-stored object to perform the mean pooling operation. + MeanPoolingRule pooling; + + //! Location-stored module location parameter. + std::vector locationParameter; + + //! Location-stored transformed gradient paramter. + arma::cube gTemp; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; +}; // class GlimpseLayer + +}; // namespace ann +}; // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp new file mode 100644 index 00000000000..1d1405d8863 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -0,0 +1,408 @@ +/** + * @file recurrent_attention.hpp + * @author Marcus Edel + * + * Definition of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * This class implements the Recurrent Model for Visual Attention, using a + * variety of possible layer implementations. + * + * For more information, see the following paper. + * + * @code + * @article{MnihHGK14, + * title={Recurrent Models of Visual Attention}, + * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, + * journal={CoRR}, + * volume={abs/1406.6247}, + * year={2014} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class RecurrentAttention +{ + public: + /** + * Create the RecurrentAttention object using the specified modules. + * + * @param start The module output size. + * @param start The recurrent neural network module. + * @param start The action module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + RecurrentAttention(const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) + { + network.push_back(rnnModule); + network.push_back(actionModule); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); + } + + private: + //! Calculate the gradient of the attention module. + void IntermediateGradient() + { + intermediateGradient.zeros(); + + // Gradient of the action module. + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(initialInput), + std::move(actionError)), actionModule); + } + else + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError)), + actionModule); + } + + // Gradient of the recurrent module. + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError)), + rnnModule); + + attentionGradient += intermediateGradient; + } + + //! Locally-stored module output size. + size_t outSize; + + //! Locally-stored start module. + LayerTypes rnnModule; + + //! Locally-stored input module. + LayerTypes actionModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; + + //! Locally-stored action error parameter. + arma::mat actionError; + + //! Locally-stored action delta. + arma::mat actionDelta; + + //! Locally-stored recurrent delta. + arma::mat rnnDelta; + + //! Locally-stored initial action input. + arma::mat initialInput; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored attention gradient. + arma::mat attentionGradient; + + //! Locally-stored intermediate gradient for the attention module. + arma::mat intermediateGradient; +}; // class RecurrentAttention + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp new file mode 100644 index 00000000000..bc938d1a766 --- /dev/null +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -0,0 +1,140 @@ +/** + * @file reinforce_normal.hpp + * @author Marcus Edel + * + * Definition of the ReinforceNormalLayer class, which implements the REINFORCE + * algorithm for the normal distribution. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the reinforce normal layer. The reinforce normal layer + * implements the REINFORCE algorithm for the normal distribution. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class ReinforceNormal +{ + public: + /** + * Create the ReinforceNormal object. + * + * @param stdev Standard deviation used during the forward and backward pass. + */ + ReinforceNormal(const double stdev) : stdev(stdev) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) + { + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); + } + + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the value of the reward parameter. + double Reward() const { return reward; } + //! Modify the value of the deterministic parameter. + double& Reward() { return reward; } + + private: + //! Standard deviation used during the forward and backward pass. + const double stdev; + + //! Locally-stored reward parameter. + double reward; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored output module parameter parameters. + std::vector moduleInputParameter; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; +}; // class ReinforceNormal + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp new file mode 100644 index 00000000000..d2802dacabc --- /dev/null +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -0,0 +1,191 @@ +/** + * @file vr_class_reward.hpp + * @author Marcus Edel + * + * Definition of the VRClassReward class, which implements the variance + * reduced classification reinforcement layer. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the variance reduced classification reinforcement layer. + * This layer is meant to be used in combination with the reinforce normal layer + * (ReinforceNormalLayer), which expects that an reward: + * (1 for success, 0 otherwise). + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class VRClassReward +{ + public: + /** + * Create the VRClassReward object. + * + * @param scale Parameter used to scale the reward. + * @param sizeAverage Take the average over all batches. + */ + VRClassReward(const double scale = 1, const bool sizeAverage = true) : + scale(scale), + sizeAverage(sizeAverage) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data that contains the log-probabilities for each class. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + */ + template + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; + } + + /** + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. + * + * @param input The propagated input activation. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); + } + + //! Get the input parameter. + InputDataType& InputParameter() const {return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const {return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const {return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + private: + //! Locally-stored value to scale the reward. + const double scale; + + //! If true take the average over all batches. + const bool sizeAverage; + + //! Locally stored reward parameter. + double reward; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored network modules. + std::vector network; +}; // class VRClassReward + +}; // namespace ann +}; // namespace mlpack + +#endif From acd05e3d1331a59b7be5a7fe6041fe56ee48e364 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:50:59 +0100 Subject: [PATCH 35/82] Use refactored rnn,ffn classes for the ann tests. --- src/mlpack/tests/CMakeLists.txt | 2 +- .../tests/activation_functions_test.cpp | 40 +- src/mlpack/tests/convolution_test.cpp | 3 +- .../tests/convolutional_network_test.cpp | 108 ++---- src/mlpack/tests/feedforward_network_test.cpp | 360 +++++++----------- src/mlpack/tests/recurrent_network_test.cpp | 5 + 6 files changed, 193 insertions(+), 325 deletions(-) diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 3b3ab0d30b1..b1dc438eaa2 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(mlpack_test ada_delta_test.cpp akfn_test.cpp aknn_test.cpp + ann_layer_test.cpp arma_extend_test.cpp armadillo_svd_test.cpp aug_lagrangian_test.cpp @@ -56,7 +57,6 @@ add_executable(mlpack_test mlpack_test.cpp nbc_test.cpp nca_test.cpp - network_util_test.cpp nmf_test.cpp nystroem_method_test.cpp octree_test.cpp diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp index bebca0de09b..94df3b59380 100644 --- a/src/mlpack/tests/activation_functions_test.cpp +++ b/src/mlpack/tests/activation_functions_test.cpp @@ -12,23 +12,13 @@ */ #include +#include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - #include #include "test_tools.hpp" @@ -125,7 +115,7 @@ void CheckInverseCorrect(const arma::colvec input) /* * Implementation of the HardTanH activation function test. The function is - * implemented as a HardTanH Layer in hard_tanh_layer.hpp + * implemented as a HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -133,11 +123,11 @@ void CheckInverseCorrect(const arma::colvec input) void CheckHardTanHActivationCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the activation function using the entire vector as input. arma::colvec activations; - htf.Forward(input, activations); + htf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -146,7 +136,7 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, /* * Implementation of the HardTanH activation function derivative test. The - * derivative is implemented as HardTanH Layer in hard_tanh_layer.hpp + * derivative is implemented as HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -154,14 +144,15 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, void CheckHardTanHDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - htf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + htf.Backward(std::move(input), std::move(error), std::move(derivatives)); + for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); @@ -170,7 +161,7 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, /* * Implementation of the LeakyReLU activation function test. The function is - * implemented as LeakyReLU layer in the file leaky_relu_layer.hpp + * implemented as LeakyReLU layer in the file leaky_relu.hpp * * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. @@ -178,11 +169,11 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, void CheckLeakyReLUActivationCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the activation function using the entire vector as input. arma::colvec activations; - lrf.Forward(input, activations); + lrf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -197,18 +188,17 @@ void CheckLeakyReLUActivationCorrect(const arma::colvec input, * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. */ - void CheckLeakyReLUDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - lrf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + lrf.Backward(std::move(input), std::move(error), std::move(derivatives)); for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); diff --git a/src/mlpack/tests/convolution_test.cpp b/src/mlpack/tests/convolution_test.cpp index a277b9cb41b..180ca8bab36 100644 --- a/src/mlpack/tests/convolution_test.cpp +++ b/src/mlpack/tests/convolution_test.cpp @@ -3,12 +3,11 @@ * @author Shangtong Zhang * @author Marcus Edel * - * Tests for various convolution strategies. - * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. + * Tests for various convolution strategies. */ #include diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index 52e1a6c6394..0c99722ca1c 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -11,21 +11,9 @@ */ #include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include #include - -#include -#include +#include +#include #include #include "test_tools.hpp" @@ -38,12 +26,9 @@ using namespace mlpack::optimization; BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest); /** - * Train and evaluate a vanilla network with the specified structure. + * Train the vanilla network on a larger dataset. */ -template< - typename PerformanceFunction -> -void BuildVanillaNetwork() +BOOST_AUTO_TEST_CASE(VanillaNetworkTest) { arma::mat X; X.load("mnist_first250_training_4s_and_9s.arm"); @@ -56,23 +41,19 @@ void BuildVanillaNetwork() } // Build the target matrix. - arma::mat Y = arma::zeros(10, nPoints); + arma::mat Y = arma::zeros(1, nPoints); for (size_t i = 0; i < nPoints; i++) { if (i < nPoints / 2) { - Y.col(i)(5) = 1; + Y(i) = 4; } else { - Y.col(i)(8) = 1; + Y(i) = 9; } } - arma::cube input = arma::cube(28, 28, nPoints); - for (size_t i = 0; i < nPoints; i++) - input.slice(i) = arma::mat(X.colptr(i), 28, 28); - /* * Construct a convolutional neural network with a 28x28x1 input layer, * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer @@ -90,57 +71,46 @@ void BuildVanillaNetwork() * | | +-+ | +-+ | +-+ | +-+ | | | * +---+ +---+ +---+ +---+ +---+ +---+ */ - - ConvLayer<> convLayer0(1, 8, 5, 5); - BiasLayer2D<> biasLayer0(8); - BaseLayer2D<> baseLayer0; - PoolingLayer<> poolingLayer0(2); - - ConvLayer<> convLayer1(8, 12, 5, 5); - BiasLayer2D<> biasLayer1(12); - BaseLayer2D<> baseLayer1; - PoolingLayer<> poolingLayer1(2); - - LinearMappingLayer<> linearLayer0(4608, 10); - BiasLayer<> biasLayer2(10); - SoftmaxLayer<> softmaxLayer0; - - OneHotLayer outputLayer; - - auto modules = std::tie(convLayer0, baseLayer0, linearLayer0, softmaxLayer0); - - CNN net(modules, outputLayer); - biasLayer0.Weights().zeros(); - biasLayer1.Weights().zeros(); - - RMSprop opt(net, 0.01, 0.88, 1e-8, 10 * input.n_slices, 0); - - net.Train(input, Y, opt); - - arma::mat prediction; - net.Predict(input, prediction); + FFN > model; + + model.Add >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28); + model.Add >(); + model.Add >(8, 8, 2, 2); + model.Add >(8, 12, 2, 2); + model.Add >(); + model.Add >(2, 2, 2, 2); + model.Add >(192, 20); + model.Add >(); + model.Add >(20, 30); + model.Add >(); + model.Add >(30, 10); + model.Add >(); + + RMSprop opt(model, 0.01, 0.88, 1e-8, 5000, -1); + + model.Train(std::move(X), std::move(Y), opt); + + arma::mat predictionTemp; + model.Predict(X, predictionTemp); + arma::mat prediction = arma::zeros(1, predictionTemp.n_cols); + + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; - for (size_t i = 0; i < nPoints; i++) + for (size_t i = 0; i < X.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - Y.col(i)))) == 0) + if (prediction(i) == Y(i)) { error++; } } - double classificationError = 1 - double(error) / nPoints; - BOOST_REQUIRE_LE(classificationError, 0.6); -} - -/** - * Train the vanilla network on a larger dataset. - */ -BOOST_AUTO_TEST_CASE(VanillaNetworkTest) -{ - BuildVanillaNetwork(); + double classificationError = 1 - double(error) / X.n_cols; + BOOST_REQUIRE_LE(classificationError, 0.2); } BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp index 4477bf22568..883fe9e6b00 100644 --- a/src/mlpack/tests/feedforward_network_test.cpp +++ b/src/mlpack/tests/feedforward_network_test.cpp @@ -12,21 +12,9 @@ */ #include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include #include +#include +#include #include #include "test_tools.hpp" @@ -40,16 +28,12 @@ BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest); /** * Train and evaluate a vanilla network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildVanillaNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -76,35 +60,32 @@ void BuildVanillaNetwork(MatType& trainData, * +-----+ +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> inputBiasLayer(hiddenLayerSize); - BaseLayer inputBaseLayer; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows); - BaseLayer outputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - OutputLayerType classOutputLayer; + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer, - hiddenLayer1, hiddenBiasLayer1, outputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - FFN net(modules, classOutputLayer); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -125,23 +106,36 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildVanillaNetwork - (trainData, trainLabels, testData, testLabels, 8, 200, 0.1); + BuildVanillaNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -151,33 +145,22 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 30, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 10, 30, 0.4); + BuildVanillaNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a Dropout network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropoutNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -204,35 +187,33 @@ void BuildDropoutNetwork(MatType& trainData, * +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - DropoutLayer<> dropoutLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, dropoutLayer0, - hiddenLayer1, outputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - FFN net(modules, classOutputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -253,23 +234,36 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropoutNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropoutNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -279,34 +273,23 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropoutNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a DropConnect network(with a baselayer) with the * specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropConnectNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -334,122 +317,42 @@ void BuildDropConnectNetwork(MatType& trainData, * * */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - DropConnectLayer dropConnectLayer0(hiddenLayer1); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - FFN net(modules, classOutputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + model.Train(std::move(trainData), std::move(trainLabels), opt); - net.Train(trainData, trainLabels, opt); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - MatType prediction; - net.Predict(testData, prediction); - - size_t error = 0; - for (size_t i = 0; i < testData.n_cols; i++) + for (size_t i = 0; i < predictionTemp.n_cols; ++i) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; } - double classificationError = 1 - double(error) / testData.n_cols; - BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); -} - -/** - * Train and evaluate a DropConnect network(with a linearlayer) with the - * specified structure. - */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> -void BuildDropConnectNetworkLinear(MatType& trainData, - MatType& trainLabels, - MatType& testData, - MatType& testLabels, - const size_t hiddenLayerSize, - const size_t maxEpochs, - const double classificationErrorThreshold) -{ - /* - * Construct a feed forward network with trainData.n_rows input nodes, - * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The - * network struct that looks like: - * - * Input Hidden DropConnect Output - * Layer Layer Layer Layer - * +-----+ +-----+ +-----+ +-----+ - * | | | | | | | | - * | +------>| +------>| +------>| | - * | | +>| | | | | | - * +-----+ | +--+--+ +-----+ +-----+ - * | - * Bias | - * Layer | - * +-----+ | - * | | | - * | +-----+ - * | | - * +-----+ - * - * - */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - DropConnectLayer<> dropConnectLayer0(hiddenLayerSize, trainLabels.n_rows); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); - - FFN net(modules, classOutputLayer); - - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); - size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) + { + error++; + } } double classificationError = 1 - double(error) / testData.n_cols; BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); } + /** * Train the dropconnect network on a larger dataset. */ @@ -461,28 +364,36 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropConnectNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); - - BuildDropConnectNetworkLinear - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropConnectNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -492,18 +403,11 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropConnectNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - - BuildDropConnectNetworkLinear - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropConnectNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } -BOOST_AUTO_TEST_SUITE_END(); +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index ff5daae9ede..f7546e83b50 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,6 +3,11 @@ * @author Marcus Edel * * Tests the recurrent network. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include From 1172374fd176450546887f422f91c43185af36a2 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 13 Dec 2016 23:05:53 +0100 Subject: [PATCH 36/82] Add ann module test. --- src/mlpack/tests/ann_layer_test.cpp | 524 ++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 src/mlpack/tests/ann_layer_test.cpp diff --git a/src/mlpack/tests/ann_layer_test.cpp b/src/mlpack/tests/ann_layer_test.cpp new file mode 100644 index 00000000000..101a0170751 --- /dev/null +++ b/src/mlpack/tests/ann_layer_test.cpp @@ -0,0 +1,524 @@ +/** + * @file ann_layer_test.cpp + * @author Marcus Edel + * + * Tests the ann layer modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::ann; + +BOOST_AUTO_TEST_SUITE(ANNLayerTest); + +// Helper function whcih calls the Reset function of the given module. +template +void ResetFunction( + T& layer, + typename std::enable_if::value>::type* = 0) +{ + layer.Reset(); +} + +template +void ResetFunction( + T& /* layer */, + typename std::enable_if::value>::type* = 0) +{ + /* Nothing to do here */ +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianTest(ModuleType& module, + arma::mat& input, + const double minValue = -2, + const double maxValue = -1, + const double perturbation = 1e-6) +{ + arma::mat output, outputA, outputB, jacobianA, jacobianB; + + // Initialize the input matrix. + RandomInitialization init(minValue, maxValue); + init.Initialize(input, input.n_rows, input.n_cols); + + // Initialize the module parameters. + ResetFunction(module); + + // Initialize the jacobian matrix. + module.Forward(std::move(input), std::move(output)); + jacobianA = arma::zeros(input.n_elem, output.n_elem); + + // Share the input paramter matrix. + arma::mat sin = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + double original = sin(i); + sin(i) = original - perturbation; + module.Forward(std::move(input), std::move(outputA)); + sin(i) = original + perturbation; + module.Forward(std::move(input), std::move(outputB)); + sin(i) = original; + + outputB -= outputA; + outputB /= 2 * perturbation; + jacobianA.row(i) = outputB.t(); + } + + // Initialize the derivative parameter. + arma::mat deriv = arma::zeros(output.n_rows, output.n_cols); + + // Share the derivative parameter. + arma::mat derivTemp = arma::mat(deriv.memptr(), deriv.n_rows, deriv.n_cols, + false, false); + + // Initialize the jacobian matrix. + jacobianB = arma::zeros(input.n_elem, output.n_elem); + + for (size_t i = 0; i < derivTemp.n_elem; ++i) + { + deriv.zeros(); + derivTemp(i) = 1; + + arma::mat delta; + module.Backward(std::move(input), std::move(deriv), std::move(delta)); + + jacobianB.col(i) = delta; + } + + return arma::max(arma::max(arma::abs(jacobianA - jacobianB))); +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianPerformanceTest(ModuleType& module, + arma::mat& input, + arma::mat& target, + const double eps = 1e-6) +{ + module.Forward(std::move(input), std::move(target)); + + arma::mat delta; + module.Backward(std::move(input), std::move(target), std::move(delta)); + + arma::mat centralDifference = arma::zeros(delta.n_rows, delta.n_cols); + arma::mat inputTemp = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + arma::mat centralDifferenceTemp = arma::mat(centralDifference.memptr(), + centralDifference.n_rows, centralDifference.n_cols, false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + inputTemp(i) = inputTemp(i) + eps; + double outputA = module.Forward(std::move(input), std::move(target)); + inputTemp(i) = inputTemp(i) - (2 * eps); + double outputB = module.Forward(std::move(input), std::move(target)); + + centralDifferenceTemp(i) = (outputA - outputB) / ( 2 * eps); + inputTemp(i) = inputTemp(i) + eps; + } + + return arma::max(arma::max(arma::abs(centralDifference - delta))); +} + +/** + * Simple add module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddLayerTest) +{ + arma::mat output, input, delta; + Add<> module(10); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(module.Parameters()), arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(10 + arma::accu(module.Parameters()), + arma::accu(output), 1e-3); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_CLOSE(arma::accu(output), arma::accu(delta), 1e-3); +} + +/** + * Jacobian add module test. + */ +BOOST_AUTO_TEST_CASE(JacobianAddLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Add<> module(elements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple constant module test. + */ +BOOST_AUTO_TEST_CASE(SimpleConstantLayerTest) +{ + arma::mat output, input, delta; + Constant<> module(10, 3.0); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian constant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Constant<> module(elements, 1.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple dropout module test. + */ +BOOST_AUTO_TEST_CASE(SimpleDropoutLayerTest) +{ + // Initialize the probability of setting a value to zero and the scale + // parameter. + const double p = 0.2; + const double scale = 1.0 / (1.0 - p); + + // Initialize the input parameter. + arma::mat input(1000, 1); + input.fill(1 - p); + + Dropout<> module(p); + module.Deterministic() = false; + + // Test the Forward function. + arma::mat output; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(output) - (1 - p))), 0.05); + + // Test the Backward function. + arma::mat delta; + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(delta) - (1 - p))), 0.05); + + // Test the Forward function. + module.Deterministic() = true; + module.Rescale() = false; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output)); + + // Test the Forward function. + module.Rescale() = true; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu(input) * scale, arma::accu(output), 1e-3); +} + +/** + * Simple linear module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearLayerTest) +{ + arma::mat output, input, delta; + Linear<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu( + module.Parameters().submat(100, 0, module.Parameters().n_elem - 1, 0)), + arma::accu(output), 1e-3); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + Linear<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearNoBiasLayerTest) +{ + arma::mat output, input, delta; + LinearNoBias<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(0, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearNoBiasLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LinearNoBias<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian negative log likelihood module test. + */ +BOOST_AUTO_TEST_CASE(JacobianNegativeLogLikelihoodLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + NegativeLogLikelihood<> module; + const size_t inputElements = math::RandInt(5, 100); + arma::mat input; + RandomInitialization init(0, 1); + init.Initialize(input, inputElements, 1); + + arma::mat target(1, 1); + target(0) = math::RandInt(1, inputElements - 1); + + double error = JacobianPerformanceTest(module, input, target); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian LeakyReLU module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLeakyReLULayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LeakyReLU<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian MultiplyConstant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianMultiplyConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + MultiplyConstant<> module(3.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian HardTanH module test. + */ +BOOST_AUTO_TEST_CASE(JacobianHardTanHLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + HardTanH<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple select module test. + */ +BOOST_AUTO_TEST_CASE(SimpleSelectLayerTest) +{ + arma::mat outputA, outputB, input, delta; + + input = arma::ones(10, 5); + for (size_t i = 0; i < input.n_cols; ++i) + { + input.col(i) *= i; + } + + // Test the Forward function. + Select<> moduleA(3); + moduleA.Forward(std::move(input), std::move(outputA)); + BOOST_REQUIRE_EQUAL(30, arma::accu(outputA)); + + // Test the Forward function. + Select<> moduleB(3, 5); + moduleB.Forward(std::move(input), std::move(outputB)); + BOOST_REQUIRE_EQUAL(15, arma::accu(outputB)); + + // Test the Backward function. + moduleA.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(30, arma::accu(delta)); + + // Test the Backward function. + moduleB.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(15, arma::accu(delta)); +} + +/** + * Simple join module test. + */ +BOOST_AUTO_TEST_CASE(SimpleJoinLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 5); + + // Test the Forward function. + Join<> module; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(50, arma::accu(output)); + + bool b = output.n_rows == 1 || output.n_cols == 1; + BOOST_REQUIRE_EQUAL(b, true); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(50, arma::accu(delta)); + + b = delta.n_rows == input.n_rows && input.n_cols; + BOOST_REQUIRE_EQUAL(b, true); +} + +/** + * Simple add merge module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddMergeLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 1); + + for (size_t i = 0; i < 5; ++i) + { + AddMerge<> module; + const size_t numMergeModules = math::RandInt(2, 10); + for (size_t m = 0; m < numMergeModules; ++m) + { + IdentityLayer<> identityLayer; + identityLayer.Forward(std::move(input), + std::move(identityLayer.OutputParameter())); + + module.Add(identityLayer); + } + + // Test the Forward function. + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(10 * numMergeModules, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + } +} + +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file From 00c43d95ef5f7b5e2e6f21727d147be1fa2a64ca Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 14 Dec 2016 22:46:38 +0100 Subject: [PATCH 37/82] Split layer modules into definition and implementation. --- src/mlpack/methods/ann/CMakeLists.txt | 6 - src/mlpack/methods/ann/layer/CMakeLists.txt | 77 +++- src/mlpack/methods/ann/layer/add.hpp | 28 +- src/mlpack/methods/ann/layer/add_impl.hpp | 68 ++++ src/mlpack/methods/ann/layer/add_merge.hpp | 28 +- .../methods/ann/layer/add_merge_impl.hpp | 61 ++++ src/mlpack/methods/ann/layer/concat.hpp | 125 +------ src/mlpack/methods/ann/layer/concat_impl.hpp | 159 +++++++++ .../methods/ann/layer/concat_performance.hpp | 58 +--- .../ann/layer/concat_performance_impl.hpp | 118 +++++++ src/mlpack/methods/ann/layer/constant.hpp | 37 +- .../methods/ann/layer/constant_impl.hpp | 65 ++++ src/mlpack/methods/ann/layer/convolution.hpp | 198 +---------- .../methods/ann/layer/convolution_impl.hpp | 328 ++++++++++++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 84 +---- .../methods/ann/layer/dropconnect_impl.hpp | 118 +++++++ src/mlpack/methods/ann/layer/dropout.hpp | 48 +-- src/mlpack/methods/ann/layer/dropout_impl.hpp | 84 +++++ src/mlpack/methods/ann/layer/glimpse.hpp | 190 +--------- src/mlpack/methods/ann/layer/glimpse_impl.hpp | 224 ++++++++++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 93 +---- .../methods/ann/layer/hard_tanh_impl.hpp | 72 ++++ src/mlpack/methods/ann/layer/join.hpp | 26 +- src/mlpack/methods/ann/layer/join_impl.hpp | 60 ++++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 29 +- .../methods/ann/layer/leaky_relu_impl.hpp | 60 ++++ src/mlpack/methods/ann/layer/linear.hpp | 45 +-- src/mlpack/methods/ann/layer/linear_impl.hpp | 87 +++++ .../methods/ann/layer/linear_no_bias.hpp | 40 +-- .../methods/ann/layer/linear_no_bias_impl.hpp | 83 +++++ src/mlpack/methods/ann/layer/log_softmax.hpp | 53 +-- .../methods/ann/layer/log_softmax_impl.hpp | 85 +++++ src/mlpack/methods/ann/layer/lookup.hpp | 33 +- src/mlpack/methods/ann/layer/lookup_impl.hpp | 74 ++++ src/mlpack/methods/ann/layer/lstm.hpp | 297 +--------------- src/mlpack/methods/ann/layer/lstm_impl.hpp | 273 +++++++++++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 121 +------ .../methods/ann/layer/max_pooling_impl.hpp | 149 ++++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 94 +---- .../methods/ann/layer/mean_pooling_impl.hpp | 126 +++++++ .../methods/ann/layer/mean_squared_error.hpp | 28 +- .../ann/layer/mean_squared_error_impl.hpp | 57 +++ .../methods/ann/layer/multiply_constant.hpp | 27 +- .../ann/layer/multiply_constant_impl.hpp | 51 +++ .../ann/layer/negative_log_likelihood.hpp | 44 +-- .../layer/negative_log_likelihood_impl.hpp | 76 ++++ src/mlpack/methods/ann/layer/recurrent.hpp | 159 +-------- .../methods/ann/layer/recurrent_attention.hpp | 161 +-------- .../ann/layer/recurrent_attention_impl.hpp | 204 +++++++++++ .../methods/ann/layer/recurrent_impl.hpp | 206 +++++++++++ .../methods/ann/layer/reinforce_normal.hpp | 47 +-- .../ann/layer/reinforce_normal_impl.hpp | 69 ++++ src/mlpack/methods/ann/layer/select.hpp | 46 +-- src/mlpack/methods/ann/layer/select_impl.hpp | 75 ++++ src/mlpack/methods/ann/layer/sequential.hpp | 117 +------ .../methods/ann/layer/sequential_impl.hpp | 154 ++++++++ .../methods/ann/layer/vr_class_reward.hpp | 74 +--- .../ann/layer/vr_class_reward_impl.hpp | 101 ++++++ 58 files changed, 3658 insertions(+), 2042 deletions(-) create mode 100644 src/mlpack/methods/ann/layer/add_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/add_merge_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_performance_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/constant_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/convolution_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/dropconnect_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/dropout_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/glimpse_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/hard_tanh_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/join_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/leaky_relu_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/log_softmax_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/lookup_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/lstm_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/max_pooling_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_pooling_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/multiply_constant_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/select_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/sequential_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt index 6ff7011404b..46174b88578 100644 --- a/src/mlpack/methods/ann/CMakeLists.txt +++ b/src/mlpack/methods/ann/CMakeLists.txt @@ -1,12 +1,8 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - cnn.hpp - cnn_impl.hpp ffn.hpp ffn_impl.hpp - network_util.hpp - network_util_impl.hpp rnn.hpp rnn_impl.hpp ) @@ -23,6 +19,4 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) add_subdirectory(activation_functions) add_subdirectory(init_rules) add_subdirectory(layer) -add_subdirectory(performance_functions) -add_subdirectory(pooling_rules) add_subdirectory(convolution_rules) diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt index b639cdad785..4211aeaaaa7 100644 --- a/src/mlpack/methods/ann/layer/CMakeLists.txt +++ b/src/mlpack/methods/ann/layer/CMakeLists.txt @@ -1,23 +1,68 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - layer_traits.hpp - binary_classification_layer.hpp + add.hpp + add_impl.hpp + add_merge.hpp + add_merge_impl.hpp base_layer.hpp - empty_layer.hpp - bias_layer.hpp - dropout_layer.hpp - dropconnect_layer.hpp - hard_tanh_layer.hpp - leaky_relu_layer.hpp - linear_layer.hpp - conv_layer.hpp - pooling_layer.hpp - recurrent_layer.hpp - lstm_layer.hpp - sparse_bias_layer.hpp - sparse_input_layer.hpp - sparse_output_layer.hpp + concat.hpp + concat_impl.hpp + concat_performance.hpp + concat_performance_impl.hpp + constant.hpp + constant_impl.hpp + convolution.hpp + convolution_impl.hpp + dropconnect.hpp + dropconnect_impl.hpp + dropout.hpp + dropout_impl.hpp + glimpse.hpp + glimpse_impl.hpp + hard_tanh.hpp + hard_tanh_impl.hpp + join.hpp + join_impl.hpp + layer.hpp + layer_traits.hpp + layer_visitor.hpp + layer_visitor_impl.hpp + layer_types.hpp + leaky_relu.hpp + leaky_relu_impl.hpp + linear.hpp + linear_impl.hpp + linear_no_bias.hpp + linear_no_bias_impl.hpp + log_softmax.hpp + log_softmax_impl.hpp + lookup.hpp + lookup_impl.hpp + lstm.hpp + lstm_impl.hpp + max_pooling.hpp + max_pooling_impl.hpp + mean_pooling.hpp + mean_pooling_impl.hpp + mean_squared_error.hpp + mean_squared_error_impl.hpp + multiply_constant.hpp + multiply_constant_impl.hpp + negative_log_likelihood.hpp + negative_log_likelihood_impl.hpp + recurrent.hpp + recurrent_impl.hpp + recurrent_attention.hpp + recurrent_attention_impl.hpp + reinforce_normal.hpp + reinforce_normal_impl.hpp + select.hpp + select_impl.hpp + sequential.hpp + sequential_impl.hpp + vr_class_reward_impl.hpp + vr_class_reward_impl.hpp ) # Add directory name to sources. diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp index be8fc60e31b..1afb1121300 100644 --- a/src/mlpack/methods/ann/layer/add.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -39,10 +39,7 @@ class Add * * @param outSize The number of output units. */ - Add(const size_t outSize) : outSize(outSize) - { - weights.set_size(outSize, 1); - } + Add(const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -52,10 +49,7 @@ class Add * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = input + weights; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -69,10 +63,7 @@ class Add template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -84,10 +75,7 @@ class Add template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -118,10 +106,7 @@ class Add * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of output units. @@ -146,4 +131,7 @@ class Add } // namespace ann } // namespace mlpack +// Include implementation. +#include "add_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/add_impl.hpp b/src/mlpack/methods/ann/layer/add_impl.hpp new file mode 100644 index 00000000000..8e87078dcef --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_impl.hpp @@ -0,0 +1,68 @@ +/** + * @file add_impl.hpp + * @author Marcus Edel + * + * Implementation of the Add class that applies a bias term to the incoming + * data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP + +// In case it hasn't yet been included. +#include "add.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Add::Add(const size_t outSize) : + outSize(outSize) +{ + weights.set_size(outSize, 1); +} + +template +template +void Add::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = input + weights; +} + +template +template +void Add::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Add::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = error; +} + +template +template +void Add::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp index 7a01792d250..222c3ef1a2f 100644 --- a/src/mlpack/methods/ann/layer/add_merge.hpp +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -38,10 +38,7 @@ class AddMerge { public: //! Create the AddMerge object. - AddMerge() - { - // Nothing to do here. - } + AddMerge(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -51,15 +48,7 @@ class AddMerge * @param output Resulting output activation. */ template - void Forward(const InputType&& /* input */, OutputType&& output) - { - output = boost::apply_visitor(outputParameterVisitor, network.front()); - - for (size_t i = 1; i < network.size(); ++i) - { - output += boost::apply_visitor(outputParameterVisitor, network[i]); - } - } + void Forward(const InputType&& /* input */, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -73,10 +62,7 @@ class AddMerge template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Add a new module to the model. @@ -120,10 +106,7 @@ class AddMerge * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(network, "network"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: std::vector network; @@ -150,4 +133,7 @@ class AddMerge } // namespace ann } // namespace mlpack +// Include implementation. +#include "add_merge_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp new file mode 100644 index 00000000000..0ef62c2d0ac --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp @@ -0,0 +1,61 @@ +/** + * @file add_merge_impl.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP + +// In case it hasn't yet been included. +#include "add_merge_impl.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +AddMerge::AddMerge() +{ + // Nothing to do here. +} + +template +template +void AddMerge::Forward( + const InputType&& /* input */, OutputType&& output) +{ + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } +} + +template +template +void AddMerge::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = gy; +} + + +template +template +void AddMerge::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(network, "network"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp index bd836c7e26b..808309f7122 100644 --- a/src/mlpack/methods/ann/layer/concat.hpp +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -45,12 +45,7 @@ class Concat * @param model Expose all network modules. * @param same Merge the error in the backward pass. */ - Concat(const bool model = true, const bool same = true) : - model(model), - same(same) - { - parameters.set_size(0, 0); - } + Concat(const bool model = true, const bool same = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,42 +55,7 @@ class Concat * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - size_t outSize = 0; - - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (boost::apply_visitor( - outputParameterVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - } - } - - output = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - if (elements < outSize) - { - output.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(outputParameterVisitor, network[i])); - } - else - { - output.col(i) = arma::vectorise(boost::apply_visitor( - outputParameterVisitor, network[i])); - } - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -109,67 +69,7 @@ class Concat template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - size_t outSize = 0; - size_t elements = 0; - - for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) - { - elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - arma::mat delta; - if (gy.n_cols == 1) - { - delta = gy.submat(j, 0, j + elements - 1, 0); - } - else - { - delta = gy.submat(0, i, elements - 1, i); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(delta), std::move( - boost::apply_visitor(deltaVisitor, network[i]))), network[i]); - - if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - } - - if (same) - { - if (i == 0) - { - g = std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g += std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - - if (!same) - { - g = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - if (elements < outSize) - { - g.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g.col(i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -181,14 +81,7 @@ class Concat template void Gradient(arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(error)), network[i]); - } - } + arma::Mat&& /* gradient */); /* * Add a new module to the model. @@ -240,6 +133,12 @@ class Concat //! Modify the gradient. arma::mat& Gradient() { return gradient; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -278,8 +177,10 @@ class Concat arma::mat gradient; }; // class Concat - } // namespace ann } // namespace mlpack +// Include implementation. +#include "concat_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/concat_impl.hpp b/src/mlpack/methods/ann/layer/concat_impl.hpp new file mode 100644 index 00000000000..44efe499231 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_impl.hpp @@ -0,0 +1,159 @@ +/** + * @file concat_impl.hpp + * @author Marcus Edel + * + * Implementation of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Concat::Concat( + const bool model, const bool same) : model(model), same(same) +{ + parameters.set_size(0, 0); +} + +template +template +void Concat::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } +} + +template +template +void Concat::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } +} + +template +template +void Concat::Gradient( + arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } +} + +template +template +void Concat::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp index 0f03cbc72e1..39b7e10f126 100644 --- a/src/mlpack/methods/ann/layer/concat_performance.hpp +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -47,12 +47,7 @@ class ConcatPerformance * @param outputLayer Output layer used to evaluate the network. */ ConcatPerformance(const size_t inSize, - OutputLayerType&& outputLayer = OutputLayerType()) : - inSize(inSize), - outputLayer(std::move(outputLayer)) - { - /* Nothing to do here. */ - } + OutputLayerType&& outputLayer = OutputLayerType()); /* * Computes the Negative log likelihood. @@ -61,20 +56,7 @@ class ConcatPerformance * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - const size_t elements = input.n_elem / inSize; - - double output = 0; - for (size_t i = 0; i < input.n_elem; i+= elements) - { - arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); - output += outputLayer.Forward(std::move(subInput), std::move(target)); - } - - return output; - } - + double Forward(const arma::Mat&& input, arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log * likelihood layer expectes that the input contains log-probabilities for @@ -89,28 +71,7 @@ class ConcatPerformance template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - const size_t elements = input.n_elem / inSize; - - arma::mat subInput = input.submat(0, 0, elements - 1, 0); - arma::mat subOutput; - - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output = arma::zeros(subOutput.n_elem, inSize); - output.col(0) = subOutput; - - for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) - { - subInput = input.submat(i, 0, i + elements - 1, 0); - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output.col(j) = subOutput; - } - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -127,6 +88,12 @@ class ConcatPerformance //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored number of inputs. size_t inSize; @@ -144,7 +111,10 @@ class ConcatPerformance OutputDataType outputParameter; }; // class ConcatPerformance -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/concat_performance_impl.hpp b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp new file mode 100644 index 00000000000..f014ac33f01 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file concat_performance_impl.hpp + * @author Marcus Edel + * + * Implementation of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat_performance.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::ConcatPerformance(const size_t inSize, OutputLayerType&& outputLayer) : + inSize(inSize), + outputLayer(std::move(outputLayer)) +{ + // Nothing to do here. +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +double ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& target) +{ + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Serialize(Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp index 58816acfbd3..b24b44aa802 100644 --- a/src/mlpack/methods/ann/layer/constant.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -41,13 +41,7 @@ class Constant * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ - Constant(const size_t outSize, const double scalar) : - inSize(0), - outSize(outSize) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } + Constant(const size_t outSize, const double scalar); /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -57,15 +51,7 @@ class Constant * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - if (inSize == 0) - { - inSize = input.n_elem; - } - - output = constantOutput; - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -76,10 +62,9 @@ class Constant * @param g The calculated gradient. */ template - void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) - { - g = arma::zeros(inSize, 1); - } + void Backward(const DataType&& /* input */, + DataType&& /* gy */, + DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -100,10 +85,7 @@ class Constant * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input units. @@ -125,7 +107,10 @@ class Constant OutputDataType outputParameter; }; // class ConstantLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "constant_impl.hpp" #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/constant_impl.hpp b/src/mlpack/methods/ann/layer/constant_impl.hpp new file mode 100644 index 00000000000..09e0624c6e0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/constant_impl.hpp @@ -0,0 +1,65 @@ +/** + * @file constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the Constant class, which outputs a constant value given + * any input. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Constant::Constant( + const size_t outSize, + const double scalar) : + inSize(0), + outSize(outSize) +{ + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); +} + +template +template +void Constant::Forward( + const InputType&& input, OutputType&& output) +{ + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; +} + +template +template +void Constant::Backward( + const DataType&& /* input */, DataType&& /* gy */, DataType&& g) +{ + g = arma::zeros(inSize, 1); +} + +template +template +void Constant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(constantOutput, "constantOutput"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index 03477c6ec1e..a7f647ed226 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -47,10 +47,7 @@ class Convolution { public: //! Create the Convolution object. - Convolution() - { - /* Nothing to do here. */ - } + Convolution(); /** * Create the Convolution object using the specified number of input maps, @@ -76,33 +73,12 @@ class Convolution const size_t padW = 0, const size_t padH = 0, const size_t inputWidth = 0, - const size_t inputHeight = 0) : - inSize(inSize), - outSize(outSize), - kW(kW), - kH(kH), - dW(dW), - dH(dH), - padW(padW), - padH(padH), - inputWidth(inputWidth), - inputHeight(inputHeight), - outputWidth(0), - outputHeight(0) - { - weights.set_size((outSize * inSize * kW * kH) + outSize, 1); - } + const size_t inputHeight = 0); /* * Set the weight and bias term. */ - void Reset() - { - weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -112,48 +88,7 @@ class Convolution * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); - - if (padW != 0 || padH != 0) - { - Pad(inputTemp, padW, padH, inputPaddedTemp); - } - - size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); - size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); - - outputTemp = arma::zeros >(wConv, hConv, outSize); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat convOutput; - - if (padW != 0 || padH != 0) - { - ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - else - { - ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - - outputTemp.slice(outMap) += convOutput; - } - - outputTemp.slice(outMap) += bias(outMap); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -167,40 +102,7 @@ class Convolution template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), - outputWidth, outputHeight, outSize); - gTemp = arma::zeros >(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weight.slice(outMapIdx), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(mappedError.slice(outMap), - rotatedFilter, output, dW, dH); - - if (padW != 0 || padH != 0) - { - gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, - rotatedFilter.n_cols / 2, - rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, - rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); - } - else - { - gTemp.slice(inMap) += output; - } - } - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -212,74 +114,7 @@ class Convolution template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& gradient) - { - arma::cube mappedError; - if (padW != 0 && padH != 0) - { - mappedError = arma::cube(error.memptr(), outputWidth / padW, - outputHeight / padH, outSize); - } - else - { - mappedError = arma::cube(error.memptr(), outputWidth, - outputHeight, outSize); - } - - gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, - weight.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, - s += outSize) - { - arma::Cube inputSlices; - if (padW != 0 || padH != 0) - { - inputSlices = inputPaddedTemp.slices(inMap, inMap); - } - else - { - inputSlices = inputTemp.slices(inMap, inMap); - } - - arma::Cube deltaSlices = mappedError.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, - output, dW, dH); - - if ((padW != 0 || padH != 0) && - (gradientTemp.n_rows < output.n_rows && - gradientTemp.n_cols < output.n_cols)) - { - for (size_t i = 0; i < output.n_slices; i++) - { - arma::mat subOutput = output.slice(i); - - gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, - subOutput.n_cols / 2, - subOutput.n_rows / 2 + gradientTemp.n_rows - 1, - subOutput.n_cols / 2 + gradientTemp.n_cols - 1); - } - } - else - { - for (size_t i = 0; i < output.n_slices; i++) - { - gradientTemp.slice(s) += output.slice(i); - } - } - } - - gradient.submat(weight.n_elem + outMap, 0, - weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( - outMap, outMap)); - } - - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -330,22 +165,7 @@ class Convolution * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - ar & data::CreateNVP(padW, "padW"); - ar & data::CreateNVP(padH, "padH"); - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inputWidth, "inputWidth"); - ar & data::CreateNVP(inputHeight, "inputHeight"); - ar & data::CreateNVP(outputWidth, "outputWidth"); - ar & data::CreateNVP(outputHeight, "outputHeight"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -515,8 +335,10 @@ class Convolution OutputDataType outputParameter; }; // class Convolution - } // namespace ann } // namespace mlpack +// Include implementation. +#include "convolution_impl.hpp" + #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp new file mode 100644 index 00000000000..99164d18166 --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution_impl.hpp @@ -0,0 +1,328 @@ +/** + * @file convolution_impl.hpp + * @author Marcus Edel + * + * Implementation of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "convolution.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution() +{ + // Nothing to do here. +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution( + const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const size_t padW, + const size_t padH, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) +{ + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Reset() +{ + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); +} + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp index 6180c812572..f74c8e6aaac 100644 --- a/src/mlpack/methods/ann/layer/dropconnect.hpp +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -63,10 +63,7 @@ class DropConnect { public: //! Create the DropConnect object. - DropConnect() - { - /* Nothing to do here. */ - } + DropConnect(); /** * Creates the DropConnect Layer as a Linear Object that takes input size, @@ -78,18 +75,9 @@ class DropConnect */ DropConnect(const size_t inSize, const size_t outSize, - const double ratio = 0.5) : - ratio(ratio), - scale(1.0 / (1 - ratio)), - baseLayer(new Linear(inSize, outSize)) - { - network.push_back(baseLayer); - } + const double ratio = 0.5); - ~DropConnect() - { - boost::apply_visitor(DeleteVisitor(), baseLayer); - } + ~DropConnect(); /** * Ordinary feed forward pass of the DropConnect layer. @@ -98,42 +86,7 @@ class DropConnect * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - } - else - { - // Save weights for denoising. - boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); - - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(denoise.n_rows, denoise.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), - baseLayer); - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - - output = output * scale; - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the DropConnect layer. @@ -145,16 +98,7 @@ class DropConnect template void Backward(arma::Mat&& input, arma::Mat&& gy, - arma::Mat&& g) - { - boost::apply_visitor( - BackwardVisitor( - std::move(input), - std::move(gy), - std::move(g) - ), - baseLayer); - } + arma::Mat&& g); /** * Calculate the gradient using the output delta and the input activation. @@ -166,14 +110,7 @@ class DropConnect template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - baseLayer); - - // Denoise the weights. - boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -219,6 +156,12 @@ class DropConnect scale = 1.0 / (1.0 - ratio); } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! The probability of setting a value to zero. double ratio; @@ -260,4 +203,7 @@ class DropConnect } // namespace ann } // namespace mlpack +// Include implementation. +#include "dropconnect_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_impl.hpp b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp new file mode 100644 index 00000000000..294952b9bcf --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file dropconnect_impl.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Implementation of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropconnect.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +DropConnect::DropConnect() +{ + // Nothing to do here. +} + +template +DropConnect::DropConnect( + const size_t inSize, + const size_t outSize, + const double ratio) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) +{ + network.push_back(baseLayer); +} + +template +DropConnect::~DropConnect() +{ + boost::apply_visitor(DeleteVisitor(), baseLayer); +} + +template +template +void DropConnect::Forward( + arma::Mat&& input, + arma::Mat&& output) +{ + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + + output = output * scale; + } +} + +template +template +void DropConnect::Backward( + arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy), + std::move(g)), baseLayer); +} + +template +template +void DropConnect::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); +} + +template +template +void DropConnect::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(scale, "scale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index b5bfa9a3e2c..4371f21dad3 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -61,14 +61,7 @@ class Dropout * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ - Dropout(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } + Dropout(const double ratio = 0.5, const bool rescale = true); /** * Ordinary feed forward pass of the dropout layer. @@ -77,30 +70,7 @@ class Dropout * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the dropout layer. @@ -112,10 +82,7 @@ class Dropout template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy % mask * scale; - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -156,11 +123,7 @@ class Dropout * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored delta object. @@ -191,4 +154,7 @@ class Dropout } // namespace ann } // namespace mlpack +// Include implementation. +#include "dropout_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/dropout_impl.hpp b/src/mlpack/methods/ann/layer/dropout_impl.hpp new file mode 100644 index 00000000000..b805a94d464 --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropout_impl.hpp @@ -0,0 +1,84 @@ +/** + * @file dropout_impl.hpp + * @author Marcus Edel + * + * Implementation of the Dropout class, which implements a regularizer that + * randomly sets units to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropout.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Dropout::Dropout( + const double ratio, const bool rescale) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) +{ + // Nothing to do here. +} + +template +template +void Dropout::Forward( + const arma::Mat&& input, + arma::Mat&& output) +{ + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } +} + +template +template +void Dropout::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy % mask * scale; +} + +template +template +void Dropout::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp index 37db36b1edc..e007fae30b9 100644 --- a/src/mlpack/methods/ann/layer/glimpse.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -100,16 +100,7 @@ class Glimpse const size_t depth = 3, const size_t scale = 2, const size_t inputWidth = 0, - const size_t inputHeight = 0) : - inSize(inSize), - size(size), - depth(depth), - scale(scale), - inputWidth(inputWidth), - inputHeight(inputHeight) - { - // Nothing to do here. - } + const size_t inputHeight = 0); /** * Ordinary feed forward pass of the glimpse layer. @@ -118,86 +109,7 @@ class Glimpse * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); - outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); - - location = input.submat(0, 1, 1, 1); - - if (!deterministic) - { - locationParameter.push_back(location); - } - - inputDepth = inputTemp.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, - inputTemp.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, - padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, - inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - outputTemp.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); - } - else - { - ReSampling(poolingInput, outputTemp.slice(j)); - } - } - } - } - } - - for (size_t i = 0; i < outputTemp.n_slices; ++i) - { - outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the glimpse layer. @@ -209,90 +121,7 @@ class Glimpse template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(outputWidth, - outputHeight, 1); - - location = locationParameter.back(); - locationParameter.pop_back(); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - mappedError.slice(s + i) = arma::Mat(gy.memptr(), - outputWidth, outputHeight); - } - } - - gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, - inputTemp.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + - padSize * 2, inputTemp.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputTemp.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - gTemp += inputPadded.tube(padSize, padSize, padSize + - inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); - } - } - - Transform(gTemp); - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -341,6 +170,12 @@ class Glimpse //! Modify the value of the deterministic parameter. bool& Deterministic() { return deterministic; } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: /* * Transform the given input by changing rows to columns. @@ -586,7 +421,10 @@ class Glimpse bool deterministic; }; // class GlimpseLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "glimpse_impl.hpp" #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/glimpse_impl.hpp b/src/mlpack/methods/ann/layer/glimpse_impl.hpp new file mode 100644 index 00000000000..40ec37c932d --- /dev/null +++ b/src/mlpack/methods/ann/layer/glimpse_impl.hpp @@ -0,0 +1,224 @@ +/** + * @file glimpse_impl.hpp + * @author Marcus Edel + * + * Implementation of the GlimpseLayer class, which takes an input image and a + * location to extract a retina-like representation of the input image at + * different increasing scales. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP + +// In case it hasn't yet been included. +#include "glimpse.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Glimpse::Glimpse( + const size_t inSize, + const size_t size, + const size_t depth, + const size_t scale, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) +{ + // Nothing to do here. +} + +template +template +void Glimpse::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template +template +void Glimpse::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void Glimpse::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(size, "size"); + ar & data::CreateNVP(depth, "depth"); + ar & data::CreateNVP(scale, "scale"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(location, "location"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index 76b19f964af..88c8ad2d853 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -57,11 +57,7 @@ class HardTanH * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanH(const double maxValue = 1, const double minValue = -1) : - maxValue(maxValue), minValue(minValue) - { - // Nothing to do here. - } + HardTanH(const double maxValue = 1, const double minValue = -1); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -71,15 +67,7 @@ class HardTanH * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - output = input; - for (size_t i = 0; i < input.n_elem; i++) - { - output(i) = (output(i) > maxValue ? maxValue : - (output(i) < minValue ? minValue : output(i))); - } - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -93,17 +81,7 @@ class HardTanH template void Backward(const DataType&& input, DataType&& gy, - DataType&& g) - { - g = gy; - for (size_t i = 0; i < input.n_elem; i++) - { - if (input(i) < minValue || input(i) > maxValue) - { - g(i) = 0; - } - } - } + DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -134,69 +112,9 @@ class HardTanH * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(maxValue, "maxValue"); - ar & data::CreateNVP(minValue, "minValue"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: - /** - * Computes the HardTanH function. - * - * @param x Input data. - * @return f(x). - */ - double Fn(const double x) - { - if (x > maxValue) - return maxValue; - else if (x < minValue) - return minValue; - return x; - } - - /** - * Computes the HardTanH function using a dense matrix as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - - template - void Fn(const arma::Mat& x, arma::Mat& y) - { - y = x; - y.transform( [&](eT val) { return std::min( - std::max( val, minValue ), maxValue ); } ); - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param x Input data. - * @return f'(x) - */ - double Deriv(const double x) - { - return (x > maxValue || x < minValue) ? 0 : 1; - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param y Input activations. - * @param x The resulting derivatives. - */ - template - void Deriv(const InputType&& x, OutputType& y) - { - y = x; - - for (size_t i = 0; i < x.n_elem; i++) - y(i) = Deriv(x(i)); - } - //! Locally-stored delta object. OutputDataType delta; @@ -216,4 +134,7 @@ class HardTanH } // namespace ann } // namespace mlpack +// Include implementation. +#include "hard_tanh_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp new file mode 100644 index 00000000000..55b92812072 --- /dev/null +++ b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp @@ -0,0 +1,72 @@ +/** + * @file hard_tanh_impl.hpp + * @author Dhawal Arora + * + * Implementation and implementation of the HardTanH layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP + +// In case it hasn't yet been included. +#include "hard_tanh.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +HardTanH::HardTanH( + const double maxValue, + const double minValue) : + maxValue(maxValue), + minValue(minValue) +{ + // Nothing to do here. +} + +template +template +void HardTanH::Forward( + const InputType&& input, OutputType&& output) +{ + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } +} + +template +template +void HardTanH::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + g = gy; + for (size_t i = 0; i < input.n_elem; i++) + { + if (input(i) < minValue || input(i) > maxValue) + { + g(i) = 0; + } + } +} + +template +template +void HardTanH::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(maxValue, "maxValue"); + ar & data::CreateNVP(minValue, "minValue"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp index 2933ef181ef..bda31d787ab 100644 --- a/src/mlpack/methods/ann/layer/join.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -34,10 +34,7 @@ class Join { public: //! Create the Join object. - Join() - { - // Nothing to do here. - } + Join(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -47,12 +44,7 @@ class Join * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - inSizeRows = input.n_rows; - inSizeCols = input.n_cols; - output = arma::vectorise(input); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -66,10 +58,7 @@ class Join template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -90,11 +79,7 @@ class Join * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSizeRows, "inSizeRows"); - ar & data::CreateNVP(inSizeCols, "inSizeCols"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input rows. @@ -116,4 +101,7 @@ class Join } // namespace ann } // namespace mlpack +// Include implementation. +#include "join_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/join_impl.hpp b/src/mlpack/methods/ann/layer/join_impl.hpp new file mode 100644 index 00000000000..47584324871 --- /dev/null +++ b/src/mlpack/methods/ann/layer/join_impl.hpp @@ -0,0 +1,60 @@ +/** + * @file join_impl.hpp + * @author Marcus Edel + * + * Implementation of the Join module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP + +// In case it hasn't yet been included. +#include "join.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Join::Join() +{ + // Nothing to do here. +} + +template +template +void Join::Forward( + const InputType&& input, OutputType&& output) +{ + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); +} + +template +template +void Join::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); +} + +template +template +void Join::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp index 8e69712b7f9..d8160f1a50b 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -2,8 +2,8 @@ * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLU layer first introduced - * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * Definition of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * * mlpack is free software; you may redistribute it and/or modify it under the @@ -51,10 +51,7 @@ class LeakyReLU * * @param alpha Non zero gradient */ - LeakyReLU(const double alpha = 0.03) : alpha(alpha) - { - // Nothing to do here. - } + LeakyReLU(const double alpha = 0.03); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,10 +61,7 @@ class LeakyReLU * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - Fn(input, output); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -79,12 +73,7 @@ class LeakyReLU * @param g The calculated gradient. */ template - void Backward(const DataType&& input, DataType&& gy, DataType&& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } + void Backward(const DataType&& input, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -110,10 +99,7 @@ class LeakyReLU * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(alpha, "alpha"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: /** @@ -185,4 +171,7 @@ class LeakyReLU } // namespace ann } // namespace mlpack +// Include implementation. +#include "leaky_relu_impl.hpp" + #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp new file mode 100644 index 00000000000..24bc13d9769 --- /dev/null +++ b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp @@ -0,0 +1,60 @@ +/** + * @file leaky_relu_impl.hpp + * @author Dhawal Arora + * + * Implementation of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP + +// In case it hasn't yet been included. +#include "leaky_relu.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LeakyReLU::LeakyReLU( + const double alpha) : alpha(alpha) +{ + // Nothing to do here. +} + +template +template +void LeakyReLU::Forward( + const InputType&& input, OutputType&& output) +{ + Fn(input, output); +} + +template +template +void LeakyReLU::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + DataType derivative; + Deriv(input, derivative); + g = gy % derivative; +} + +template +template +void LeakyReLU::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(alpha, "alpha"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear.hpp b/src/mlpack/methods/ann/layer/linear.hpp index d7f8e6f9982..0f6be78b647 100644 --- a/src/mlpack/methods/ann/layer/linear.hpp +++ b/src/mlpack/methods/ann/layer/linear.hpp @@ -37,7 +37,7 @@ class Linear { public: //! Create the Linear object. - Linear() {} + Linear(); /** * Create the Linear layer object using the specified number of units. @@ -45,22 +45,12 @@ class Linear * @param inSize The number of input units. * @param outSize The number of output units. */ - Linear(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize + outSize, 1); - } + Linear(const size_t inSize, const size_t outSize);; /* * Reset the layer parameter. */ - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -70,10 +60,7 @@ class Linear * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = (weight * input) + bias; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -85,12 +72,9 @@ class Linear * @param g The calculated gradient. */ template - void Backward(const arma::Mat&& /* unused */, + void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = weight.t() * gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -102,12 +86,7 @@ class Linear template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -138,12 +117,7 @@ class Linear * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input units. @@ -177,4 +151,7 @@ class Linear } // namespace ann } // namespace mlpack +// Include implementation. +#include "linear_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/linear_impl.hpp b/src/mlpack/methods/ann/layer/linear_impl.hpp new file mode 100644 index 00000000000..8b726e5d255 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_impl.hpp @@ -0,0 +1,87 @@ +/** + * @file linear_impl.hpp + * @author Marcus Edel + * + * Implementation of the Linear layer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Linear::Linear() +{ + // Nothing to do here. +} + +template +Linear::Linear( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize + outSize, 1); +} + +template +void Linear::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template +template +void Linear::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = (weight * input) + bias; +} + +template +template +void Linear::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void Linear::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; +} + +template +template +void Linear::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp index 92064727822..972f72db8ec 100644 --- a/src/mlpack/methods/ann/layer/linear_no_bias.hpp +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -37,27 +37,19 @@ class LinearNoBias { public: //! Create the LinearNoBias object. - LinearNoBias() {} + LinearNoBias(); /** * Create the LinearNoBias object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ - LinearNoBias(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize, 1); - } + LinearNoBias(const size_t inSize, const size_t outSize); /* * Reset the layer parameter. */ - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -67,10 +59,7 @@ class LinearNoBias * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weight * input; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -84,10 +73,7 @@ class LinearNoBias template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = weight.t() * gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -99,11 +85,7 @@ class LinearNoBias template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -134,12 +116,7 @@ class LinearNoBias * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -171,4 +148,7 @@ class LinearNoBias } // namespace ann } // namespace mlpack +// Include implementation. +#include "linear_no_bias_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp new file mode 100644 index 00000000000..15f96cdb537 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp @@ -0,0 +1,83 @@ +/** + * @file linear_no_bias_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearNoBias class also known as fully-connected layer + * or affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear_no_bias.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LinearNoBias::LinearNoBias() +{ + // Nothing to do here. +} + +template +LinearNoBias::LinearNoBias( + const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize, 1); +} + +template +void LinearNoBias::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); +} + +template +template +void LinearNoBias::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weight * input; +} + +template +template +void LinearNoBias::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void LinearNoBias::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); +} + +template +template +void LinearNoBias::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp index 95a79c97b91..df9872d6c8a 100644 --- a/src/mlpack/methods/ann/layer/log_softmax.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -39,7 +39,7 @@ class LogSoftMax /** * Create the LogSoftmax object. */ - LogSoftMax() { /* Nothing to do here. */ } + LogSoftMax(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -49,38 +49,7 @@ class LogSoftMax * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -94,10 +63,7 @@ class LogSoftMax template void Backward(const arma::Mat&& input, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -114,6 +80,12 @@ class LogSoftMax //! Modify the delta. InputDataType& Delta() { return delta; } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -125,7 +97,10 @@ class LogSoftMax OutputDataType outputParameter; }; // class LogSoftmax -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "log_softmax_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_impl.hpp b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp new file mode 100644 index 00000000000..68fba8d8459 --- /dev/null +++ b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp @@ -0,0 +1,85 @@ +/** + * @file log_softmax_impl.hpp + * @author Marcus Edel + * + * Implementation of the LogSoftmax class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP + +// In case it hasn't yet been included. +#include "log_softmax.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LogSoftMax::LogSoftMax() +{ + // Nothing to do here. +} + +template +template +void LogSoftMax::Forward( + const InputType&& input, OutputType&& output) +{ + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); +} + +template +template +void LogSoftMax::Backward( + const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy - arma::exp(input) * arma::accu(gy); +} + +template +template +void LogSoftMax::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp index 0f9f0b4978d..080d4b5746e 100644 --- a/src/mlpack/methods/ann/layer/lookup.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -42,12 +42,7 @@ class Lookup * @param inSize The number of input units. * @param outSize The number of output units. */ - Lookup(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } + Lookup(const size_t inSize, const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -57,10 +52,7 @@ class Lookup * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weights.cols(arma::conv_to::from(input) - 1); - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -74,10 +66,7 @@ class Lookup template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -89,11 +78,7 @@ class Lookup template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient = arma::zeros >(weights.n_rows, weights.n_cols); - gradient.cols(arma::conv_to::from(input) - 1) = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -124,12 +109,7 @@ class Lookup * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -158,4 +138,7 @@ class Lookup } // namespace ann } // namespace mlpack +// Include implementation. +#include "lookup_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/lookup_impl.hpp b/src/mlpack/methods/ann/layer/lookup_impl.hpp new file mode 100644 index 00000000000..cbe24468ed3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lookup_impl.hpp @@ -0,0 +1,74 @@ +/** + * @file lookup_impl.hpp + * @author Marcus Edel + * + * Implementation of the Lookup class a particular convolution, where the width + * of the convolution is 1. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP + +// In case it hasn't yet been included. +#include "lookup.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Lookup::Lookup( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize, inSize); +} + +template +template +void Lookup::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weights.cols(arma::conv_to::from(input) - 1); +} + +template +template +void Lookup::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Lookup::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; +} + +template +template +void Lookup::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp index 6cd9dc630fb..cf70e344d94 100644 --- a/src/mlpack/methods/ann/layer/lstm.hpp +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -44,7 +44,7 @@ class LSTM { public: //! Create the LSTM object. - LSTM() { /* Nothing to do here */ } + LSTM(); /** * Create the LSTM layer object using the specified parameters. @@ -53,42 +53,7 @@ class LSTM * @param outSize The number of output units. * @param rho Maximum number of steps to backpropagate through time (BPTT). */ - LSTM(const size_t inSize, const size_t outSize, const size_t rho) : - inSize(inSize), - outSize(outSize), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - { - input2GateModule = new Linear<>(inSize, 4 * outSize); - output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); - - network.push_back(input2GateModule); - network.push_back(output2GateModule); - - inputGateModule = new SigmoidLayer<>(); - hiddenStateModule = new TanHLayer<>(); - forgetGateModule = new SigmoidLayer<>(); - outputGateModule = new SigmoidLayer<>(); - - network.push_back(inputGateModule); - network.push_back(hiddenStateModule); - network.push_back(forgetGateModule); - network.push_back(outputGateModule); - - cellModule = new IdentityLayer<>(); - cellActivationModule = new TanHLayer<>(); - - network.push_back(cellModule); - network.push_back(cellActivationModule); - - prevOutput = arma::zeros(outSize, 1); - prevCell = arma::zeros(outSize, 1); - prevError = arma::zeros(4 * outSize, 1); - cellActivationError = arma::zeros(outSize, 1); - } + LSTM(const size_t inSize, const size_t outSize, const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -98,112 +63,7 @@ class LSTM * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - cellParameter.push_back(prevCell); - outParameter.push_back(prevOutput); - } - - arma::mat output1; - arma::mat output2; - arma::mat output3; - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(prevOutput), - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)) - ), - output2GateModule); - - output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + - boost::apply_visitor(outputParameterVisitor, output2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(0, 0, 1 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)) - ), - outputGateModule); - - arma::mat cell = prevCell; - - // Input gate * hidden state. - arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % boost::apply_visitor(outputParameterVisitor, - hiddenStateModule); - - // Forget gate * cell. - arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cell; - - arma::mat nextCell = cmul1 + cmul2; - - boost::apply_visitor( - ForwardVisitor( - std::move(nextCell), - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) - ), - cellModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)) - ), - cellActivationModule); - - output = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % boost::apply_visitor(outputParameterVisitor, - outputGateModule); - - prevCell = nextCell; - prevOutput = output; - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - prevOutput.zeros(); - prevCell.zeros(); - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -217,121 +77,7 @@ class LSTM template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (backwardStep > 0) - { - gy += boost::apply_visitor(deltaVisitor, output2GateModule); - } - - arma::mat g1 = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % gy; - - arma::mat g2 = boost::apply_visitor(outputParameterVisitor, - outputGateModule) % gy; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)), - std::move(g2), - std::move(boost::apply_visitor(deltaVisitor, - cellActivationModule)) - ), - cellActivationModule); - - cellActivationError = boost::apply_visitor(deltaVisitor, - cellActivationModule); - - if (backwardStep > 0) - { - cellActivationError += forgetGateError; - } - - arma::mat g4 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % cellActivationError; - - arma::mat g5 = boost::apply_visitor(outputParameterVisitor, - hiddenStateModule) % cellActivationError; - - forgetGateError = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cellActivationError; - - arma::mat g7 = cellParameter[cellParameter.size() - - backwardStep - 1] % cellActivationError; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)), - std::move(g5), - std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)), - std::move(g4), - std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)), - std::move(g7), - std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)), - std::move(g1), - std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) - ), - outputGateModule); - - prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, inputGateModule); - prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, hiddenStateModule); - prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, forgetGateModule); - prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, outputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) - ), - output2GateModule); - - backwardStep++; - if (backwardStep == rho) - { - backwardStep = 0; - cellParameter.clear(); - } - - g = boost::apply_visitor(deltaVisitor, input2GateModule); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -343,29 +89,7 @@ class LSTM template void Gradient(arma::Mat&& input, arma::Mat&& /* error */, - arma::Mat&& /* gradient */) - { - boost::apply_visitor( - GradientVisitor( - std::move(input), - std::move(prevError) - ), - input2GateModule); - - boost::apply_visitor( - GradientVisitor( - std::move(outParameter[outParameter.size() - gradientStep - 1]), - std::move(prevError) - ), - output2GateModule); - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - outParameter.clear(); - } - } + arma::Mat&& /* gradient */); //! The value of the deterministic parameter. bool Deterministic() const { return deterministic; } @@ -409,13 +133,7 @@ class LSTM * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(rho, "rho"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -513,4 +231,7 @@ class LSTM } // namespace ann } // namespace mlpack +// Include implementation. +#include "lstm_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/lstm_impl.hpp b/src/mlpack/methods/ann/layer/lstm_impl.hpp new file mode 100644 index 00000000000..6ebe89b2907 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm_impl.hpp @@ -0,0 +1,273 @@ +/** + * @file lstm_impl.hpp + * @author Marcus Edel + * + * Implementation of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LSTM::LSTM() +{ + // Nothing to do here. +} + +template +LSTM::LSTM( + const size_t inSize, + const size_t outSize, + const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); +} + +template +template +void LSTM::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(prevOutput), std::move( + boost::apply_visitor(outputParameterVisitor, output2GateModule))), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 0, 0, 1 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule))), inputGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 1 * outSize, 0, 2 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule))), hiddenStateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 2 * outSize, 0, 3 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule))), forgetGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 3 * outSize, 0, 4 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule))), outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor(ForwardVisitor(std::move(nextCell), std::move( + boost::apply_visitor(outputParameterVisitor, cellModule))), cellModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellModule)), std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule))), cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } +} + +template +template +void LSTM::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule)), std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, cellActivationModule))), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule)), std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule))), + inputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule)), std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule))), + hiddenStateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule)), std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule))), + forgetGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule)), std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule))), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, input2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, output2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule))), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); +} + +template +template +void LSTM::Gradient( + arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(prevError)), + input2GateModule); + + boost::apply_visitor(GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError)), output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } +} + +template +template +void LSTM::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp index e93077ebbad..6bf179b7a5a 100644 --- a/src/mlpack/methods/ann/layer/max_pooling.hpp +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -53,11 +53,8 @@ template < class MaxPooling { public: - //! Create the PoolingLayer object. - MaxPooling() - { - /* Nothing to do here */ - } + //! Create the MaxPooling object. + MaxPooling(); /** * Create the MaxPooling object using the specified number of units. @@ -69,25 +66,10 @@ class MaxPooling * @param floor Rounding operator (floor or ceil). */ MaxPooling(const size_t kW, - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - reset(false), - floor(floor), - offset(0), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - deterministic(false) - { - /* Nothing to do here. */ - } + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -97,63 +79,7 @@ class MaxPooling * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - const size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - if (!deterministic) - { - poolingIndices.push_back(outputTemp); - } - - if (!reset) - { - size_t elements = inputWidth * inputHeight; - indicesCol = arma::linspace >(0, (elements - 1), - elements); - - indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); - - reset = true; - } - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - if (!deterministic) - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - poolingIndices.back().slice(s)); - } - else - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - inputTemp.slice(s)); - } - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -167,24 +93,7 @@ class MaxPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(mappedError.slice(s), gTemp.slice(s), - poolingIndices.back().slice(s)); - } - - poolingIndices.pop_back(); - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -230,13 +139,7 @@ class MaxPooling * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -368,8 +271,10 @@ class MaxPooling std::vector poolingIndices; }; // class MaxPooling - } // namespace ann } // namespace mlpack -#endif \ No newline at end of file +// Include implementation. +#include "max_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling_impl.hpp b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp new file mode 100644 index 00000000000..95aeea86049 --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp @@ -0,0 +1,149 @@ +/** + * @file max_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "max_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MaxPooling::MaxPooling() +{ + // Nothing to do here. +} + +template +MaxPooling::MaxPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) +{ + // Nothing to do here. +} + +template +template +void MaxPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MaxPooling::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MaxPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp index e0c097f6ddc..70e061c7e2b 100644 --- a/src/mlpack/methods/ann/layer/mean_pooling.hpp +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -34,10 +34,7 @@ class MeanPooling { public: //! Create the MeanPooling object. - MeanPooling() - { - /* Nothing to do here */ - } + MeanPooling(); /** * Create the MeanPooling object using the specified number of units. @@ -48,26 +45,10 @@ class MeanPooling * @param dH Width of the stride operation. */ MeanPooling(const size_t kW, - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - reset(false), - floor(floor), - deterministic(false), - offset(0) - - { - /* Nothing to do here. */ - } + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -77,41 +58,7 @@ class MeanPooling * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - - Pooling(inputTemp.slice(s), outputTemp.slice(s)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -125,21 +72,7 @@ class MeanPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -185,13 +118,7 @@ class MeanPooling * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -319,4 +246,7 @@ class MeanPooling } // namespace ann } // namespace mlpack -#endif \ No newline at end of file +// Include implementation. +#include "mean_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp new file mode 100644 index 00000000000..5008763752b --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp @@ -0,0 +1,126 @@ +/** + * @file mean_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanPooling::MeanPooling() +{ + // Nothing to do here. +} + +template +MeanPooling::MeanPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) +{ + // Nothing to do here. +} + +template +template +void MeanPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MeanPooling::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MeanPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp index 6abdc15c21c..280b6e79b11 100644 --- a/src/mlpack/methods/ann/layer/mean_squared_error.hpp +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -2,7 +2,7 @@ * @file mean_squared_error.hpp * @author Marcus Edel * - * Definition and implementation of the mean squared error performance function. + * Definition of the mean squared error performance function. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -37,7 +37,7 @@ class MeanSquaredError /** * Create the MeanSquaredError object. */ - MeanSquaredError() { /* Nothing to do here. */ } + MeanSquaredError(); /* * Computes the mean squared error function. @@ -46,11 +46,7 @@ class MeanSquaredError * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - return arma::mean(arma::mean(arma::square(input - target))); - } - + double Forward(const arma::Mat&& input, const arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. * @@ -61,10 +57,7 @@ class MeanSquaredError template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = (input - target); - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -81,6 +74,12 @@ class MeanSquaredError //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -92,7 +91,10 @@ class MeanSquaredError OutputDataType outputParameter; }; // class MeanSquaredError -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "mean_squared_error_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp new file mode 100644 index 00000000000..037a9445295 --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp @@ -0,0 +1,57 @@ +/** + * @file mean_squared_error_impl.hpp + * @author Marcus Edel + * + * Implementation of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_squared_error.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanSquaredError::MeanSquaredError() +{ + // Nothing to do here. +} + +template +template +double MeanSquaredError::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + return arma::mean(arma::mean(arma::square(input - target))); +} + +template +template +void MeanSquaredError::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = (input - target); +} + +template +template +void MeanSquaredError::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp index 2caa8a37a35..b2985b27b83 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -32,10 +32,7 @@ class MultiplyConstant /** * Create the MultiplyConstant object. */ - MultiplyConstant(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } + MultiplyConstant(const double scalar); /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -45,10 +42,7 @@ class MultiplyConstant * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - output = input * scalar; - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass @@ -59,10 +53,7 @@ class MultiplyConstant * @param g The calculated gradient. */ template - void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) - { - g = gy * scalar; - } + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -83,10 +74,7 @@ class MultiplyConstant * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored constant scalar value. @@ -102,7 +90,10 @@ class MultiplyConstant OutputDataType outputParameter; }; // class MultiplyConstant -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "multiply_constant_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp new file mode 100644 index 00000000000..07dc4799779 --- /dev/null +++ b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp @@ -0,0 +1,51 @@ +/** + * @file multiply_constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the MultiplyConstantLayer class, which multiplies the + * input by a (non-learnable) constant. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "multiply_constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MultiplyConstant::MultiplyConstant( + const double scalar) : scalar(scalar) +{ + // Nothing to do here. +} + +template +template +void MultiplyConstant::Forward( + const InputType&& input, OutputType&& output) +{ + output = input * scalar; +} + +template +template +void MultiplyConstant::Backward( + const DataType&& /* input */, DataType&& gy, DataType&& g) +{ + g = gy * scalar; +} + +template +template +void MultiplyConstant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(scalar, "scalar"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 0de8cb7cd5b..84c6a2d7904 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -38,7 +38,7 @@ class NegativeLogLikelihood /** * Create the NegativeLogLikelihoodLayer object. */ - NegativeLogLikelihood() { /* Nothing to do here. */ } + NegativeLogLikelihood(); /* * Computes the Negative log likelihood. @@ -47,21 +47,7 @@ class NegativeLogLikelihood * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - return output; - } + double Forward(const arma::Mat&& input, arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log @@ -77,18 +63,7 @@ class NegativeLogLikelihood template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -105,6 +80,12 @@ class NegativeLogLikelihood //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -116,7 +97,10 @@ class NegativeLogLikelihood OutputDataType outputParameter; }; // class NegativeLogLikelihood -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "negative_log_likelihood_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp new file mode 100644 index 00000000000..2ec7799efe0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp @@ -0,0 +1,76 @@ +/** + * @file negative_log_likelihood_impl.hpp + * @author Marcus Edel + * + * Implementation of the NegativeLogLikelihood class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP + +// In case it hasn't yet been included. +#include "negative_log_likelihood.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +NegativeLogLikelihood::NegativeLogLikelihood() +{ + // Nothing to do here. +} + +template +template +double NegativeLogLikelihood::Forward( + const arma::Mat&& input, arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + return output; +} + +template +template +void NegativeLogLikelihood::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } +} + +template +template +void NegativeLogLikelihood::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp index 5870071faef..297127a6ae8 100644 --- a/src/mlpack/methods/ann/layer/recurrent.hpp +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -56,41 +56,7 @@ class Recurrent const InputModuleType& input, const FeedbackModuleType& feedback, const TransferModuleType& transfer, - const size_t rho) : - startModule(new StartModuleType(start)), - inputModule(new InputModuleType(input)), - feedbackModule(new FeedbackModuleType(feedback)), - transferModule(new TransferModuleType(transfer)), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - - { - initialModule = new Sequential<>(); - mergeModule = new AddMerge<>(); - recurrentModule = new Sequential<>(false); - - boost::apply_visitor(AddVisitor(inputModule), initialModule); - boost::apply_visitor(AddVisitor(startModule), initialModule); - boost::apply_visitor(AddVisitor(transferModule), initialModule); - - boost::apply_visitor(weightSizeVisitor, startModule); - boost::apply_visitor(weightSizeVisitor, inputModule); - boost::apply_visitor(weightSizeVisitor, feedbackModule); - boost::apply_visitor(weightSizeVisitor, transferModule); - - boost::apply_visitor(AddVisitor(inputModule), mergeModule); - boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); - boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); - boost::apply_visitor(AddVisitor(transferModule), recurrentModule); - - network.push_back(initialModule); - network.push_back(mergeModule); - network.push_back(feedbackModule); - network.push_back(recurrentModule); - } + const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -100,48 +66,7 @@ class Recurrent * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - initialModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, inputModule))), - inputModule); - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, transferModule)), std::move( - boost::apply_visitor(outputParameterVisitor, feedbackModule))), - feedbackModule); - - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - recurrentModule); - } - - output = boost::apply_visitor(outputParameterVisitor, transferModule); - - // Save the feedback output parameter when training the module. - if (!deterministic) - { - feedbackOutputParameter.push_back(output); - } - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - backwardStep = 0; - - if (!recurrentError.is_empty()) - { - recurrentError.zeros(); - } - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -155,44 +80,7 @@ class Recurrent template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (!recurrentError.is_empty()) - { - recurrentError += gy; - } - else - { - recurrentError = gy; - } - - if (backwardStep < (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, recurrentModule)), std::move(recurrentError), - std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), - recurrentModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, inputModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), - inputModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, feedbackModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( - boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, initialModule)), std::move(recurrentError), - std::move(g)), initialModule); - } - - recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); - backwardStep++; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -204,38 +92,7 @@ class Recurrent template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - if (gradientStep < (rho - 1)) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - recurrentModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); - - boost::apply_visitor(GradientVisitor(std::move( - feedbackOutputParameter[feedbackOutputParameter.size() - 2 - - gradientStep]), std::move(boost::apply_visitor(deltaVisitor, - mergeModule))), feedbackModule); - } - else - { - boost::apply_visitor(GradientZeroVisitor(), recurrentModule); - boost::apply_visitor(GradientZeroVisitor(), inputModule); - boost::apply_visitor(GradientZeroVisitor(), feedbackModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, startModule))), initialModule); - } - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - feedbackOutputParameter.clear(); - } - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -274,10 +131,7 @@ class Recurrent * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored start module. @@ -353,4 +207,7 @@ class Recurrent } // namespace ann } // namespace mlpack +// Include implementation. +#include "recurrent_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp index 1d1405d8863..ffb7320b232 100644 --- a/src/mlpack/methods/ann/layer/recurrent_attention.hpp +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -62,18 +62,7 @@ class RecurrentAttention RecurrentAttention(const size_t outSize, const RNNModuleType& rnn, const ActionModuleType& action, - const size_t rho) : - outSize(outSize), - rnnModule(new RNNModuleType(rnn)), - actionModule(new ActionModuleType(action)), - rho(rho), - forwardStep(0), - backwardStep(0), - deterministic(false) - { - network.push_back(rnnModule); - network.push_back(actionModule); - } + const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -83,57 +72,7 @@ class RecurrentAttention * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // Initialize the action input. - if (initialInput.is_empty()) - { - initialInput = arma::zeros(outSize, input.n_cols); - } - - // Propagate through the action and recurrent module. - for (forwardStep = 0; forwardStep < rho; ++forwardStep) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( - boost::apply_visitor(outputParameterVisitor, actionModule))), - actionModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( - outputParameterVisitor, actionModule))), actionModule); - } - - // Initialize the glimpse input. - arma::mat glimpseInput = arma::zeros(input.n_elem, 2); - glimpseInput.col(0) = input; - glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, - actionModule).n_elem - 1, 1) = boost::apply_visitor( - outputParameterVisitor, actionModule); - - boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), - std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), - rnnModule); - - // Save the output parameter when training the module. - if (!deterministic) - { - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(SaveOutputParameterVisitor( - std::move(moduleOutputParameter)), network[l]); - } - } - } - - output = boost::apply_visitor(outputParameterVisitor, rnnModule); - - forwardStep = 0; - backwardStep = 0; - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -147,81 +86,7 @@ class RecurrentAttention template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (intermediateGradient.is_empty() && backwardStep == 0) - { - // Initialize the attention gradients. - size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + - boost::apply_visitor(weightSizeVisitor, actionModule); - - intermediateGradient = arma::zeros(weights, 1); - attentionGradient = arma::zeros(weights, 1); - - // Initialize the action error. - actionError = arma::zeros( - boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, - boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); - } - - // Propagate the attention gradients. - if (backwardStep == 0) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), rnnModule); - boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), actionModule); - - attentionGradient.zeros(); - } - - // Back-propagate through time. - for (; backwardStep < rho; backwardStep++) - { - if (backwardStep == 0) - { - recurrentError = gy; - } - else - { - recurrentError = actionDelta; - } - - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(LoadOutputParameterVisitor( - std::move(moduleOutputParameter)), network[network.size() - 1 - l]); - } - - if (backwardStep == (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, actionModule)), std::move(actionError), - std::move(actionDelta)), actionModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(initialInput), - std::move(actionError), std::move(actionDelta)), actionModule); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(recurrentError), - std::move(rnnDelta)), rnnModule); - - if (backwardStep == 0) - { - g = rnnDelta.col(1); - } - else - { - g += rnnDelta.col(1); - } - - IntermediateGradient(); - } - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -233,14 +98,7 @@ class RecurrentAttention template void Gradient(arma::Mat&& /* input */, arma::Mat&& /* error */, - arma::Mat&& /* gradient */) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), rnnModule); - boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), actionModule); - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -279,13 +137,7 @@ class RecurrentAttention * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(forwardStep, "forwardStep"); - ar & data::CreateNVP(backwardStep, "backwardStep"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Calculate the gradient of the attention module. @@ -405,4 +257,7 @@ class RecurrentAttention } // namespace ann } // namespace mlpack +// Include implementation. +#include "recurrent_attention_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp new file mode 100644 index 00000000000..6642894fa88 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp @@ -0,0 +1,204 @@ +/** + * @file recurrent_attention_impl.hpp + * @author Marcus Edel + * + * Implementation of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent_attention.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template +RecurrentAttention::RecurrentAttention( + const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) +{ + network.push_back(rnnModule); + network.push_back(actionModule); +} + +template +template +void RecurrentAttention::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; +} + +template +template +void RecurrentAttention::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } +} + +template +template +void RecurrentAttention::Gradient( + arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); +} + +template +template +void RecurrentAttention::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_impl.hpp new file mode 100644 index 00000000000..8f1525c4551 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_impl.hpp @@ -0,0 +1,206 @@ +/** + * @file recurrent_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearLayer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template< + typename StartModuleType, + typename InputModuleType, + typename FeedbackModuleType, + typename TransferModuleType +> +Recurrent::Recurrent( + const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); +} + +template +template +void Recurrent::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } +} + +template +template +void Recurrent::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; +} + +template +template +void Recurrent::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } +} + +template +template +void Recurrent::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp index bc938d1a766..fd192f0a1f2 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -34,10 +34,7 @@ class ReinforceNormal * * @param stdev Standard deviation used during the forward and backward pass. */ - ReinforceNormal(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } + ReinforceNormal(const double stdev); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -47,22 +44,7 @@ class ReinforceNormal * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - - moduleInputParameter.push_back(input); - } - else - { - // Use maximum a posteriori. - output = input; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -74,17 +56,7 @@ class ReinforceNormal * @param g The calculated gradient. */ template - void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) - { - g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= reward; - g *= -1; - - moduleInputParameter.pop_back(); - } - + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -111,6 +83,12 @@ class ReinforceNormal //! Modify the value of the deterministic parameter. double& Reward() { return reward; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -134,7 +112,10 @@ class ReinforceNormal bool deterministic; }; // class ReinforceNormal -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "reinforce_normal_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp new file mode 100644 index 00000000000..1eaa25d8fae --- /dev/null +++ b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp @@ -0,0 +1,69 @@ +/** + * @file reinforce_normal_impl.hpp + * @author Marcus Edel + * + * Implementation of the ReinforceNormalLayer class, which implements the + * REINFORCE algorithm for the normal distribution. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "reinforce_normal.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +ReinforceNormal::ReinforceNormal( + const double stdev) : stdev(stdev) +{ + // Nothing to do here. +} + +template +template +void ReinforceNormal::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } +} + +template +template +void ReinforceNormal::Backward( + const DataType&& input, DataType&& /* gy */, DataType&& g) +{ + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); +} + +template +template +void ReinforceNormal::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index d683830cc88..d3c42a008c9 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -2,7 +2,7 @@ * @file select.hpp * @author Marcus Edel * - * Definition and implementation of the Select module. + * Definition of the Select module. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -38,12 +38,7 @@ class Select * @param index The column which should be extracted from the given input. * @param elements The number of elements that should be used. */ - Select(const size_t index, const size_t elements = 0) : - index(index), - elements(elements) - { - /* Nothing to do here. */ - } + Select(const size_t index, const size_t elements = 0); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -53,17 +48,7 @@ class Select * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (elements == 0) - { - output = input.col(index); - } - else - { - output = input.submat(0, index, elements - 1, index); - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -77,17 +62,7 @@ class Select template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (elements == 0) - { - g = gy; - } - else - { - g = gy.submat(0, 0, elements - 1, 0); - } - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -104,6 +79,12 @@ class Select //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! Locally-stored column index. size_t index; @@ -121,7 +102,10 @@ class Select OutputDataType outputParameter; }; // class Select -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "select_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/select_impl.hpp b/src/mlpack/methods/ann/layer/select_impl.hpp new file mode 100644 index 00000000000..a40cb968461 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select_impl.hpp @@ -0,0 +1,75 @@ +/** + * @file select_impl.hpp + * @author Marcus Edel + * + * Implementation of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Select::Select( + const size_t index, + const size_t elements) : + index(index), + elements(elements) + { + // Nothing to do here. + } + +template +template +void Select::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } +} + +template +template +void Select::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } +} + +template +template +void Select::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(index, "index"); + ar & data::CreateNVP(elements, "elements"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp index 277b3342dc2..ca729c9da13 100644 --- a/src/mlpack/methods/ann/layer/sequential.hpp +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -47,22 +47,10 @@ class Sequential * * @param model Expose the all network modules. */ - Sequential(const bool model = true) : model(model), reset(false) - { - /* Nothing to do here. */ - } + Sequential(const bool model = true); //! Destroy the Sequential object. - ~Sequential() - { - if (!model) - { - for (LayerTypes& layer : network) - { - boost::apply_visitor(deleteVisitor, layer); - } - } - } + ~Sequential(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -72,64 +60,7 @@ class Sequential * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network.front()))), - network.front()); - - if (!reset) - { - if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network.front()); - } - - if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network.front()); - } - } - - for (size_t i = 1; i < network.size(); ++i) - { - if (!reset) - { - // Set the input width. - boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); - - // Set the input height. - boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); - } - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (!reset) - { - // Get the output width. - if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network[i]); - } - - // Get the output height. - if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network[i]); - } - } - } - - if (!reset) - { - reset = true; - } - - output = boost::apply_visitor(outputParameterVisitor, network.back()); - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -143,24 +74,7 @@ class Sequential template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network.back())), std::move(gy), - std::move(boost::apply_visitor(deltaVisitor, network.back()))), - network.back()); - - for (size_t i = 2; i < network.size() + 1; ++i) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[network.size() - i])), std::move( - boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), - std::move(boost::apply_visitor(deltaVisitor, - network[network.size() - i]))), network[network.size() - i]); - } - - g = boost::apply_visitor(deltaVisitor, network.front()); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -172,18 +86,7 @@ class Sequential template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - network.front()); - - for (size_t i = 1; i < network.size() - 1; ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); - } - } + arma::Mat&& /* gradient */); /* * Add a new module to the model. @@ -235,6 +138,12 @@ class Sequential //! Modify the gradient. arma::mat& Gradient() { return gradient; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -285,8 +194,10 @@ class Sequential size_t height; }; // class Sequential - } // namespace ann } // namespace mlpack +// Include implementation. +#include "sequential_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/sequential_impl.hpp b/src/mlpack/methods/ann/layer/sequential_impl.hpp new file mode 100644 index 00000000000..df20a7a7668 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential_impl.hpp @@ -0,0 +1,154 @@ +/** + * @file sequential_impl.hpp + * @author Marcus Edel + * + * Implementation of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Sequential::Sequential( + const bool model) : model(model), reset(false) +{ + // Nothing to do here. +} + +template +Sequential::~Sequential() +{ + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } +} + +template +template +void Sequential::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + +if (!reset) +{ + reset = true; +} + + output = boost::apply_visitor(outputParameterVisitor, network.back()); +} + +template +template +void Sequential::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); +} + +template +template +void Sequential::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } +} + +template +template +void Sequential::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp index d2802dacabc..f820e351aa8 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -40,12 +40,7 @@ class VRClassReward * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ - VRClassReward(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } + VRClassReward(const double scale = 1, const bool sizeAverage = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -56,35 +51,7 @@ class VRClassReward * between 1 and the number of classes. */ template - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols - 1; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols - 1; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return output - reward / (input.n_cols - 1); - } - - return output - reward; - } + double Forward(const arma::Mat&& input, const arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log @@ -100,29 +67,7 @@ class VRClassReward template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < (input.n_cols - 1); ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - - double vrReward = reward - input(0, 1); - if (sizeAverage) - { - vrReward /= input.n_cols - 1; - } - - const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; - - output(0, 1) = norm * (input(0, 1) - reward); - boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -159,6 +104,12 @@ class VRClassReward */ void Add(LayerTypes layer) { network.push_back(layer); } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored value to scale the reward. const double scale; @@ -185,7 +136,10 @@ class VRClassReward std::vector network; }; // class VRClassReward -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "vr_class_reward_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp new file mode 100644 index 00000000000..a4a2703545b --- /dev/null +++ b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp @@ -0,0 +1,101 @@ +/** + * @file vr_class_reward_impl.hpp + * @author Marcus Edel + * + * Implementation of the VRClassReward class, which implements the variance + * reduced classification reinforcement layer. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP + +// In case it hasn't yet been included. +#include "vr_class_reward.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +VRClassReward::VRClassReward( + const double scale, + const bool sizeAverage) : + scale(scale), + sizeAverage(sizeAverage) +{ + // Nothing to do here. +} + +template +template +double VRClassReward::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; +} + +template +template +void VRClassReward::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); +} + +template +template +void VRClassReward::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif From aa04427e6a085621f9c5d9aa4bc6c6dcb5c1ef32 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 6 Nov 2016 23:20:43 +0100 Subject: [PATCH 38/82] Remove the RMVA model. --- src/mlpack/methods/rmva/CMakeLists.txt | 17 - src/mlpack/methods/rmva/rmva.hpp | 963 ------------------------- src/mlpack/methods/rmva/rmva_impl.hpp | 740 ------------------- src/mlpack/methods/rmva/rmva_main.cpp | 285 -------- 4 files changed, 2005 deletions(-) delete mode 100644 src/mlpack/methods/rmva/CMakeLists.txt delete mode 100644 src/mlpack/methods/rmva/rmva.hpp delete mode 100644 src/mlpack/methods/rmva/rmva_impl.hpp delete mode 100644 src/mlpack/methods/rmva/rmva_main.cpp diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt deleted file mode 100644 index ced53a30ea7..00000000000 --- a/src/mlpack/methods/rmva/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - rmva.hpp - rmva_impl.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) - -add_cli_executable(rmva) diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp deleted file mode 100644 index 5f4f03112c8..00000000000 --- a/src/mlpack/methods/rmva/rmva.hpp +++ /dev/null @@ -1,963 +0,0 @@ -/** - * @file rmva.hpp - * @author Marcus Edel - * - * Definition of the RecurrentNeuralAttention class, which implements the - * Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP -#define __MLPACK_METHODS_RMVA_RMVA_HPP - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * This class implements the Recurrent Model for Visual Attention, using a - * variety of possible layer implementations. - * - * For more information, see the following paper. - * - * @code - * @article{MnihHGK14, - * title={Recurrent Models of Visual Attention}, - * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, - * journal={CoRR}, - * volume={abs/1406.6247}, - * year={2014} - * } - * @endcode - * - * @tparam LocatorType Type of locator network. - * @tparam LocationSensorType Type of location sensor network. - * @tparam GlimpseSensorType Type of glimpse sensor network. - * @tparam GlimpseType Type of glimpse network. - * @tparam StartType Type of start network. - * @tparam FeedbackType Type of feedback network. - * @tparam TransferType Type of transfer network. - * @tparam ClassifierType Type of classifier network. - * @tparam RewardPredictorType Type of reward predictor network. - * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam MatType Matrix type (arma::mat or arma::sp_mat). - */ -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType = RandomInitialization, - typename MatType = arma::mat -> -class RecurrentNeuralAttention -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType>; - - /** - * Construct the RecurrentNeuralAttention object, which will construct the - * recurrent model for visual attentionh using the specified networks. - * - * @param locator The locator network. - * @param locationSensor The location sensor network. - * @param glimpseSensor The glimpse sensor network. - * @param glimpse The glimpse network. - * @param start The start network. - * @param feedback The feedback network. - * @param transfer The transfer network. - * @param classifier The classifier network. - * @param rewardPredictor The reward predictor network. - * @param nStep Number of steps for the back-propagate through time. - * @param initializeRule Rule used to initialize the weight matrix. - */ - template - RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule = - InitializationRuleType()); - /** - * Train the network on the given input data using the given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::mat& predictors, arma::mat& responses); - - /** - * Evaluate the network with the given parameters. This function is usually - * called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the network with the given parameters, and with - * respect to only one point in the dataset. This is useful for - * optimizers such as SGD, which require a separable objective function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - //! Return the number of steps to back-propagate through time. - const size_t& Rho() const { return nStep; } - //! Modify the number of steps to back-propagate through time. - size_t& Rho() { return nStep; } - - //! Return the current location. - const arma::mat& Location(); - - //! Serialize the model. - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const InputType& input, OutputType& output) - { - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Sample an initial starting actions by forwarding zeros through the - // locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - output = std::get::value - 1>( - classifier).OutputParameter(); - } - - /** - * Update the layer reward for all layer that implement the Rewards function. - */ - template - typename std::enable_if::type - ResetReward(const double reward, std::tuple& network) - { - SetReward(reward, std::get(network)); - ResetReward(reward, network); - } - - template - typename std::enable_if::type - ResetReward(const double /* reward */, std::tuple& /* network */) - { - } - - template - typename std::enable_if< - HasRewardCheck::value, void>::type - SetReward(const double reward, T& layer) - { - layer.Reward() = reward; - } - - template - typename std::enable_if< - !HasRewardCheck::value, void>::type - SetReward(const double /* reward */, T& /* layer */) - { - /* Nothing to do here */ - } - - /** - * Reset the network by clearing the delta and by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* layer */) { /* Nothing to do here */ } - - /** - * Reset the location by updating the location for all layer that implement - * the Location function. - */ - template - typename std::enable_if::type - ResetLocation(const arma::mat& /* location */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - typename std::enable_if::type - ResetLocation(const arma::mat& location, std::tuple& network) - { - SetLocation(std::get(network), location); - ResetLocation(location, network); - } - - template - typename std::enable_if< - HasLocationCheck::value, void>::type - SetLocation(T& layer, const arma::mat& location) - { - layer.Location(location); - } - - template - typename std::enable_if< - !HasLocationCheck::value, void>::type - SetLocation(T& /* layer */, const arma::mat& /* location */) - { - // Nothing to do here. - } - - /** - * Save the network layer activations. - */ - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activations, - std::tuple& network, - size_t& activationCounter) - { - Save(I, activations, std::get(network), - std::get(network).InputParameter()); - - activationCounter++; - SaveActivations(activations, network, activationCounter); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activations */, - std::tuple& /* network */, - size_t& /* activationCounter */) - { - // Nothing to do here. - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.RecurrentParameter())); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t /* layerNumber */, - boost::ptr_vector& activations, - T& layer, - P& /* unused */) - { - activations.push_back(new MatType(layer.OutputParameter())); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - std::get(network), std::get(network).OutputParameter()); - - SaveActivations( - activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - SaveActivations(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& /* network */) - { - // Nothing to do here. - } - - template - void Save(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* unused */) - { - activationsA.push_back(new DataTypeA(layer.OutputParameter())); - dataTypeACounter++; - } - - template - void Save(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* unused */) - { - activationsB.push_back(new DataTypeB(layer.OutputParameter())); - dataTypeBCounter++; - } - - /** - * Load the network layer activations. - */ - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activations */, - size_t& /* activationCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activations, - size_t& activationCounter, - std::tuple& network) - { - Load(--activationCounter, activations, - std::get(network), - std::get(network).InputParameter()); - - LoadActivations(input, activations, - activationCounter, network); - } - - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.RecurrentParameter() = activations[layerNumber]; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, - boost::ptr_vector& activations, - T& layer, - P& /* output */) - { - layer.OutputParameter() = activations[layerNumber]; - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& activationsA, - boost::ptr_vector& activationsB, - size_t& dataTypeACounter, - size_t& dataTypeBCounter, - std::tuple& network) - { - Load(activationsA, - activationsB, - dataTypeACounter, - dataTypeBCounter, - std::get(network), - std::get(network).OutputParameter()); - - LoadActivations( - input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter, - network); - } - - template - typename std::enable_if::type - LoadActivations(DataType& input, - boost::ptr_vector& /* activationsA */, - boost::ptr_vector& /* activationsB */, - size_t& /* dataTypeACounter */, - size_t& /* dataTypeBCounter */, - std::tuple& network) - { - std::get<0>(network).InputParameter() = input; - LinkParameter(network); - } - - template - void Load(boost::ptr_vector& activationsA, - boost::ptr_vector& /* activationsB */, - size_t& dataTypeACounter, - size_t& /* dataTypeBCounter */, - T& layer, - DataTypeA& /* output */) - { - layer.OutputParameter() = activationsA[--dataTypeACounter]; - } - - template - void Load(boost::ptr_vector& /* activationsA */, - boost::ptr_vector& activationsB, - size_t& /* dataTypeACounter */, - size_t& dataTypeBCounter, - T& layer, - DataTypeB& /* output */) - { - layer.OutputParameter() = activationsB[--dataTypeBCounter]; - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& t) - { - std::get(t).InputParameter() = input; - std::get(t).Forward(std::get(t).InputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& t) - { - std::get(t).Forward(std::get(t).OutputParameter(), - std::get(t).OutputParameter()); - - ForwardTail(t); - } - - /** - * Run a single iteration of the backward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - } - - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), error, - std::get(t).Delta()); - - BackwardTail(error, t); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* error */, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& t) - { - std::get(t).Backward( - std::get(t).OutputParameter(), - std::get(t).Delta(), - std::get(t).Delta()); - - BackwardTail(error, t); - } - - /** - * Link the calculated activation with the correct layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* network */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template - void UpdateGradients(const InputType& input, - const ErrorType& error, - std::tuple& network) - { - Update(std::get<0>(network), - input, - std::get<1>(network).Delta(), - std::get<1>(network).OutputParameter()); - - UpdateGradients<1, ErrorType, Tp...>(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta(), - std::get(network).OutputParameter()); - - UpdateGradients(error, network); - } - - template - typename std::enable_if::type - UpdateGradients(const ErrorType& error, std::tuple& network) - { - Update(std::get(network), - std::get(network).InputParameter(), - error, - std::get(network).OutputParameter()); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(LayerType& layer, - const InputType& input, - const ErrorType& error, - GradientType& /* gradient */) - { - layer.Gradient(input, error, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(LayerType& /* layer */, - const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - // Nothing to do here - } - - //! The locator network. - LocatorType locator; - - //! The location sensor network. - LocationSensorType locationSensor; - - //! The glimpse sensor network. - GlimpseSensorType glimpseSensor; - - //! The glimpse network. - GlimpseType glimpse; - - //! The start network. - StartType start; - - //! The feedback network. - FeedbackType feedback; - - //! The transfer network. - TransferType transfer; - - //! The classifier network. - ClassifierType classifier; - - //! The reward predictor network. - RewardPredictorType rewardPredictor; - - //! The number of steps for the back-propagate through time. - size_t nStep; - - //! Locally stored network input size. - size_t inputSize; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! The index of the current step. - size_t step; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse network. - boost::ptr_vector glimpseActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator network. - boost::ptr_vector locatorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the feedback network. - boost::ptr_vector feedbackActivations; - - //! The activation storage we are using to save the feedback network input. - boost::ptr_vector feedbackActivationsInput; - - //! The activation storage we are using to perform the feed backward pass for - //! the transfer network. - boost::ptr_vector transferActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the location sensor network. - boost::ptr_vector locationSensorActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the glimpse sensor network. - boost::ptr_vector glimpseSensorMatActivations; - boost::ptr_vector glimpseSensorCubeActivations; - - //! The activation storage we are using to perform the feed backward pass for - //! the locator input. - boost::ptr_vector locatorInput; - - //! The storage we are using to save the location. - boost::ptr_vector location; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatCounter; - size_t glimpseSensorCubeCounter; - - //! The current number of activations in the glimpse network. - size_t glimpseActivationsCounter; - - //! The current number of activations in the glimpse start network. - size_t startActivationsCounter; - - //! The current number of activations in the feedback network. - size_t feedbackActivationsCounter; - - //! The current number of activations in the transfer network. - size_t transferActivationsCounter; - - //! The current number of activations in the locator network. - size_t locatorActivationsCounter; - - //! The current number of activations in the location sensor network. - size_t locationSensorActivationsCounter; - - //! The current number of activations in the glimpse sensor network. - size_t glimpseSensorMatActivationsCounter; - size_t glimpseSensorCubeActivationsCounter; - - //! The current number of location for the location storage. - size_t locationCounter; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::mat predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Storage the merge the reward input. - arma::field rewardInput; - - //! The current input. - arma::cube input; - - //! The current target. - arma::mat target; - - //! Locally stored performance functions. - NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction; - VRClassRewardLayer<> vRClassRewardFunction; - - //! Locally stored size of the locator network. - size_t locatorSize; - - //! Locally stored size of the location sensor network. - size_t locationSensorSize; - - //! Locally stored size of the glimpse sensor network. - size_t glimpseSensorSize; - - //! Locally stored size of the glimpse network. - size_t glimpseSize; - - //! Locally stored size of the start network. - size_t startSize; - - //! Locally stored size of the feedback network. - size_t feedbackSize; - - //! Locally stored size of the transfer network. - size_t transferSize; - - //! Locally stored size of the classifier network. - size_t classifierSize; - - //! Locally stored size of the reward predictor network. - size_t rewardPredictorSize; - - //! Locally stored recurrent gradient. - arma::mat recurrentGradient; - - //! Locally stored action error. - arma::mat actionError; - - //! Locally stored current location. - arma::mat evaluationLocation; -}; // class RecurrentNeuralAttention - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "rmva_impl.hpp" - -#endif diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp deleted file mode 100644 index cfb310b3993..00000000000 --- a/src/mlpack/methods/rmva/rmva_impl.hpp +++ /dev/null @@ -1,740 +0,0 @@ -/** - * @file rmva_impl.hpp - * @author Marcus Edel - * - * Implementation of the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP -#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP - -// In case it hasn't been included yet. -#include "rmva.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template< - typename TypeLocator, - typename TypeLocationSensor, - typename TypeGlimpseSensor, - typename TypeGlimpse, - typename TypeStart, - typename TypeFeedback, - typename TypeTransfer, - typename TypeClassifier, - typename TypeRewardPredictor -> -RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::RecurrentNeuralAttention(TypeLocator&& locator, - TypeLocationSensor&& locationSensor, - TypeGlimpseSensor&& glimpseSensor, - TypeGlimpse&& glimpse, - TypeStart&& start, - TypeFeedback&& feedback, - TypeTransfer&& transfer, - TypeClassifier&& classifier, - TypeRewardPredictor&& rewardPredictor, - const size_t nStep, - InitializationRuleType initializeRule) : - locator(std::forward(locator)), - locationSensor(std::forward(locationSensor)), - glimpseSensor(std::forward(glimpseSensor)), - glimpse(std::forward(glimpse)), - start(std::forward(start)), - feedback(std::forward(feedback)), - transfer(std::forward(transfer)), - classifier(std::forward(classifier)), - rewardPredictor(std::forward(rewardPredictor)), - nStep(nStep), - inputSize(0) -{ - // Set the network size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize + - glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1); - - // Set the network weights. - NetworkWeights(initializeRule, parameter, this->locator); - NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize); - NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkWeights(initializeRule, parameter, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkWeights(initializeRule, parameter, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(initializeRule, parameter, this->classifier, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize); - NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(initializeRule, parameter, this->start, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize + rewardPredictorSize); - - rewardInput = arma::field(2, 1); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template class OptimizerType> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Predict(arma::mat& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < predictors.n_cols; i++) - { - SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp); - responses.col(i) = responsesTemp.col(0); - } -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -double RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - input = arma::cube(predictors.colptr(i), 28, 28, 1); - target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - - // Get the locator input size. - if (!inputSize) - { - inputSize = NetworkInputSize(locator); - } - - glimpseSensorMatCounter = 0; - glimpseSensorCubeCounter = 0; - glimpseActivationsCounter = 0; - locatorActivationsCounter = 0; - locationSensorActivationsCounter = 0; - glimpseSensorMatActivationsCounter = 0; - glimpseSensorCubeActivationsCounter = 0; - locationCounter = 0; - feedbackActivationsCounter = 0; - transferActivationsCounter = 0; - - // Reset networks. - ResetParameter(locator); - ResetParameter(locationSensor); - ResetParameter(glimpseSensor); - ResetParameter(glimpse); - ResetParameter(feedback); - ResetParameter(transfer); - ResetParameter(classifier); - ResetParameter(rewardPredictor); - ResetParameter(start); - - // Reset activation storage. - glimpseActivations.clear(); - locatorActivations.clear(); - locationSensorActivations.clear(); - glimpseSensorMatActivations.clear(); - glimpseSensorCubeActivations.clear(); - feedbackActivations.clear(); - transferActivations.clear(); - locatorInput.clear(); - location.clear(); - feedbackActivationsInput.clear(); - - // Sample an initial starting actions by forwarding zeros through the locator. - locatorInput.push_back(new arma::cube(arma::zeros(inputSize, 1, - input.n_slices))); - - // Forward pass throught the recurrent network. - for (step = 0; step < nStep; step++) - { - // Locator forward pass. - Forward(locatorInput.back(), locator); - SaveActivations(locatorActivations, locator, locatorActivationsCounter); - - // Location sensor forward pass. - Forward(std::get::value - 1>( - locator).OutputParameter(), locationSensor); - SaveActivations(locationSensorActivations, locationSensor, - locationSensorActivationsCounter); - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(std::get::value - 1>( - locator).OutputParameter(), glimpseSensor); - - // Save the location for the backward path. - location.push_back(new arma::mat(std::get::value - 1>(locator).OutputParameter())); - - // Glimpse sensor forward pass. - Forward(input, glimpseSensor); - SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations, - glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location sensor and - // glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Glimpse forward pass. - Forward(concatLayerOutput, glimpse); - SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter); - - if (step == 0) - { - // Start forward pass. - Forward(std::get::value - 1>( - glimpse).OutputParameter(), start); - - // Transfer forward pass. - Forward(std::get::value - 1>( - start).OutputParameter(), transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - else - { - // Feedback forward pass. - Forward(std::get::value - 1>( - transfer).OutputParameter(), feedback); - SaveActivations(feedbackActivations, feedback, - feedbackActivationsCounter); - - feedbackActivationsInput.push_back(new arma::mat( - std::get::value - 1>( - transfer).OutputParameter().memptr(), - std::get::value - 1>( - transfer).OutputParameter().n_rows, - std::get::value - 1>( - transfer).OutputParameter().n_cols)); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Transfer forward pass. - Forward(feedbackLayerOutput, transfer); - SaveActivations(transferActivations, transfer, - transferActivationsCounter); - } - - // Update the input for the next run - locatorInput.push_back(new arma::cube( - std::get::value - 1>( - transfer).OutputParameter().memptr(), locatorInput.back().n_rows, - locatorInput.back().n_cols, locatorInput.back().n_slices)); - } - - // Classifier forward pass. - Forward(locatorInput.back().slice(0), classifier); - - // Reward predictor forward pass. - Forward(std::get::value - 1>( - classifier).OutputParameter(), rewardPredictor); - - double performanceError = negativeLogLikelihoodFunction.Forward( - std::get::value - 1>( - classifier).OutputParameter(), target); - - // Create the input for the vRClassRewardFunction function. - // For which we use the output from the classifier and the rewardPredictor. - rewardInput(0, 0) = std::get::value - 1>( - classifier).OutputParameter(); - rewardInput(1, 0) = std::get::value - 1>( - rewardPredictor).OutputParameter(); - - performanceError += vRClassRewardFunction.Forward(rewardInput, target); - - return performanceError; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - // Reset the gradient. - if (gradient.is_empty()) - { - gradient = arma::zeros(parameter.n_rows, parameter.n_cols); - } - else - { - gradient.zeros(); - } - - // Reset the recurrent gradient. - if (recurrentGradient.is_empty()) - { - recurrentGradient = arma::zeros(parameter.n_rows, - parameter.n_cols); - - actionError = arma::zeros( - std::get::value - 1>( - locator).OutputParameter().n_rows, - std::get::value - 1>( - locator).OutputParameter().n_cols); - } - else - { - recurrentGradient.zeros(); - } - - // Set the recurrent gradient. - NetworkGradients(recurrentGradient, this->locator); - NetworkGradients(recurrentGradient, this->locationSensor, locatorSize); - NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkGradients(recurrentGradient, this->glimpse, locatorSize + - locationSensorSize + glimpseSensorSize); - NetworkGradients(recurrentGradient, this->feedback, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize); - NetworkGradients(recurrentGradient, this->transfer, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize); - - // Set the gradient. - NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkGradients(gradient, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkGradients(gradient, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - - // Negative log likelihood backward pass. - negativeLogLikelihoodFunction.Backward(std::get::value - 1>(classifier).OutputParameter(), target, - negativeLogLikelihoodFunction.OutputParameter()); - - const double reward = vRClassRewardFunction.Backward(rewardInput, target, - vRClassRewardFunction.OutputParameter()); - - // Propogate reward through all modules. - ResetReward(reward, locator); - ResetReward(reward, locationSensor); - ResetReward(reward, glimpseSensor); - ResetReward(reward, glimpse); - ResetReward(reward, classifier); - - // RewardPredictor backward pass. - Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor); - - arma::mat classifierError = - negativeLogLikelihoodFunction.OutputParameter() + - vRClassRewardFunction.OutputParameter()(0, 0) + - std::get<0>(rewardPredictor).Delta(); - - // Classifier backward pass. - Backward(classifierError, classifier); - - // Set the initial recurrent error for the first backward step. - arma::mat recurrentError = std::get<0>(classifier).Delta(); - - for (step = nStep - 1; nStep >= 0; step--) - { - // Load the locator activations. - LoadActivations(locatorInput[step], locatorActivations, - locatorActivationsCounter, locator); - - // Load the location sensor activations. - LoadActivations(std::get::value - 1>( - locator).OutputParameter(), locationSensorActivations, - locationSensorActivationsCounter, locationSensor); - - // Load the glimpse sensor activations. - LoadActivations(input, glimpseSensorMatActivations, - glimpseSensorCubeActivations, glimpseSensorMatCounter, - glimpseSensorCubeCounter, glimpseSensor); - - // Concat the parameter activation from the location and glimpse sensor. - arma::mat concatLayerOutput = arma::join_cols( - std::get::value - 1>( - locationSensor).OutputParameter(), - std::get::value - 1>( - glimpseSensor).OutputParameter()); - - // Load the glimpse activations. - LoadActivations(concatLayerOutput, glimpseActivations, - glimpseActivationsCounter, glimpse); - - - if (step == 0) - { - // Load the transfer activations. - LoadActivations(std::get::value - 1>( - start).OutputParameter(), transferActivations, - transferActivationsCounter, transfer); - } - else - { - // Load the feedback activations. - LoadActivations(std::get::value - 1>( - transfer).OutputParameter(), feedbackActivations, - feedbackActivationsCounter, feedback); - - arma::mat feedbackLayerOutput = - std::get::value - 1>( - glimpse).OutputParameter() + - std::get::value - 1>( - feedback).OutputParameter(); - - // Load the transfer activations. - LoadActivations(feedbackLayerOutput, transferActivations, - transferActivationsCounter, transfer); - } - - // Set the location parameter for all layer that implement a Location - // function e.g. GlimpseLayer. - ResetLocation(location[step], glimpseSensor); - - // Locator backward pass. - Backward(actionError, locator); - - // Transfer backward pass. - Backward(recurrentError, transfer); - - // glimpse network - Backward(std::get<0>(transfer).Delta(), glimpse); - - // Split up the error of the concat layer. - arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat( - 0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0); - arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat( - std::get<0>(glimpse).Delta().n_elem / 2, 0, - std::get<0>(glimpse).Delta().n_elem - 1, 0); - - // Location sensor backward pass. - Backward(locationSensorError, locationSensor); - - // Glimpse sensor backward pass. - Backward(glimpseSensorError, glimpseSensor); - - if (step != 0) - { - // Feedback backward pass. - Backward(std::get<0>(transfer).Delta(), feedback); - } - - // Update the recurrent network gradients. - UpdateGradients(std::get<0>(locationSensor).Delta(), locator); - UpdateGradients(std::get<0>(transfer).Delta(), glimpse); - UpdateGradients(std::get<0>(transfer).Delta(), locationSensor); - UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor); - - // Feedback module. - if (step != 0) - { - UpdateGradients(feedbackActivationsInput[step - 1], - std::get<0>(transfer).Delta(), feedback); - } - else - { - // Set the feedback gradient to zero. - recurrentGradient.submat(locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros(); - - UpdateGradients(std::get<0>(transfer).Delta(), start); - } - - // Update the overall recurrent gradient. - gradient += recurrentGradient; - - if (step != 0) - { - // Update the recurrent error for the next backward step. - recurrentError = std::get<0>(locator).Delta() + - std::get<0>(feedback).Delta(); - } - else - { - break; - } - } - - // Reward predictor gradient update. - UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0), - rewardPredictor); - - // Classifier gradient update. - UpdateGradients(std::get<1>(classifier).Delta(), classifier); -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -const arma::mat& RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Location() -{ - if (!location.empty()) - { - evaluationLocation = arma::mat(location[0].n_elem, location.size()); - - for (size_t i = 0; i < location.size(); i++) - { - evaluationLocation.col(i) = arma::vectorise(location[i]); - } - } - - return evaluationLocation; -} - -template< - typename LocatorType, - typename LocationSensorType, - typename GlimpseSensorType, - typename GlimpseType, - typename StartType, - typename FeedbackType, - typename TransferType, - typename ClassifierType, - typename RewardPredictorType, - typename InitializationRuleType, - typename MatType -> -template -void RecurrentNeuralAttention< - LocatorType, - LocationSensorType, - GlimpseSensorType, - GlimpseType, - StartType, - FeedbackType, - TransferType, - ClassifierType, - RewardPredictorType, - InitializationRuleType, - MatType ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(inputSize, "inputSize"); - ar & data::CreateNVP(nStep, "nStep"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - // Set the netork size. - locatorSize = NetworkSize(this->locator); - locationSensorSize = NetworkSize(this->locationSensor); - glimpseSensorSize = NetworkSize(this->glimpseSensor); - glimpseSize = NetworkSize(this->glimpse); - feedbackSize = NetworkSize(this->feedback); - transferSize = NetworkSize(this->transfer); - classifierSize = NetworkSize(this->classifier); - rewardPredictorSize = NetworkSize(this->rewardPredictor); - startSize = NetworkSize(this->start); - - // Set the network weights. - NetworkWeights(parameter, this->locator); - NetworkWeights(parameter, this->locationSensor, locatorSize); - NetworkWeights(parameter, this->glimpseSensor, locatorSize + - locationSensorSize); - NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize + - glimpseSensorSize); - NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize); - NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize); - NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize - + glimpseSensorSize + glimpseSize + feedbackSize + transferSize); - NetworkWeights(parameter, this->rewardPredictor, locatorSize + - locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize + - transferSize + classifierSize); - NetworkWeights(parameter, this->start, locatorSize + locationSensorSize + - glimpseSensorSize + glimpseSize + feedbackSize + transferSize + - classifierSize + rewardPredictorSize); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp deleted file mode 100644 index a3483d63de6..00000000000 --- a/src/mlpack/methods/rmva/rmva_main.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/** - * @file rmva_main.cpp - * @author Marcus Edel - * - * Main executable for the Recurrent Model for Visual Attention. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include "rmva.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -using namespace mlpack; -using namespace mlpack::ann; -using namespace mlpack::optimization; -using namespace std; - -PROGRAM_INFO("Recurrent Model for Visual Attention", - "This program trains the Recurrent Model for Visual Attention on the given " - "labeled training set, or loads a model from the given model file, and then" - " may use that trained model to classify the points in a given test set." - "\n\n" - "Labels are expected to be passed in separately as their own file " - "(--labels_file). If training is not desired, a pre-existing model can be " - "loaded with the --input_model_file (-m) option." - "\n\n" - "If classifying a test set is desired, the test set should be in the file " - "specified with the --test_file (-T) option, and the classifications will " - "be saved to the file specified with the --output_file (-o) option. If " - "saving a trained model is desired, the --output_model_file (-M) option " - "should be given."); - -// Model loading/saving. -PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for " - "Visual Attention.", "m", ""); -PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for" - " Visual Attention to.", "M"); - -// Training parameters. -PARAM_MATRIX_IN("training", "Matrix containing the training set.", "t"); -PARAM_MATRIX_IN("labels", "Matrix containing labels for the training set.", - "l"); - -PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or " - "'lbfgs'.", "O", "minibatch-sgd"); - -PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp" - " (0 indicates no limit).", "n", 500000); -PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or " - "RMSProp.", "e", 1e-7); - -PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent " - "(alpha),", "a", 0.01); -PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are " - "visited for SGD or mini-batch SGD.", "L"); -PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20); - -PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r", - 7); - -PARAM_INT_IN("classes", "The number of classes.", "c", 10); - -PARAM_INT_IN("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0); - -// Test parameters. -PARAM_MATRIX_IN("test", "Matrix containing the test set.", "T"); -PARAM_MATRIX_OUT("output", "The matrix in which the predicted labels for the " - "test set will be written.", "o"); - -int main(int argc, char** argv) -{ - CLI::ParseCommandLine(argc, argv); - - // Check input parameters. - if (CLI::HasParam("training") && CLI::HasParam("input_model_file")) - Log::Fatal << "Cannot specify both --training_file (-t) and " - << "--input_model_file (-m)!" << endl; - - if (!CLI::HasParam("training") && !CLI::HasParam("input_model_file")) - Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are" - << " specified!" << endl; - - if (!CLI::HasParam("training") && CLI::HasParam("labels")) - Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is " - << "not specified." << endl; - - if (!CLI::HasParam("output") && !CLI::HasParam("output_model_file")) - Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) " - << "specified; no output will be saved!" << endl; - - if (CLI::HasParam("output") && !CLI::HasParam("test")) - Log::Warn << "--output_file (-o) ignored because no test file specified " - << "with --test_file (-T)." << endl; - - if (!CLI::HasParam("output") && CLI::HasParam("test")) - Log::Warn << "--test_file (-T) specified, but classification results will " - << "not be saved because --output_file (-o) is not specified." << endl; - - const string optimizerType = CLI::GetParam("optimizer"); - - if ((optimizerType != "sgd") && (optimizerType != "lbfgs") && - (optimizerType != "minibatch-sgd")) - { - Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be " - << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl; - } - - const double stepSize = CLI::GetParam("step_size"); - const size_t maxIterations = (size_t) CLI::GetParam("max_iterations"); - const double tolerance = CLI::GetParam("tolerance"); - const bool shuffle = !CLI::HasParam("linear_scan"); - const size_t batchSize = (size_t) CLI::GetParam("batch_size"); - const size_t rho = (size_t) CLI::GetParam("rho"); - const size_t numClasses = (size_t) CLI::GetParam("classes"); - - const size_t hiddenSize = 256; - const double unitPixels = 13; - const double locatorStd = 0.11; - const size_t imageSize = 28; - const size_t locatorHiddenSize = 128; - const size_t glimpsePatchSize = 8; - const size_t glimpseDepth = 1; - const size_t glimpseScale = 2; - const size_t glimpseHiddenSize = 128; - const size_t imageHiddenSize = 256; - - - // Locator network. - LinearMappingLayer<> linearLayer0(hiddenSize, 2); - BiasLayer<> biasLayer0(2, 1); - HardTanHLayer<> hardTanhLayer0; - ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd); - HardTanHLayer<> hardTanhLayer1; - MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize); - auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0, - reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0); - - // Location sensor network. - LinearLayer<> linearLayer1(2, locatorHiddenSize); - BiasLayer<> biasLayer1(locatorHiddenSize, 1); - ReLULayer<> rectifierLayer0; - auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0); - - // Glimpse sensor network. - GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale); - LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize); - BiasLayer<> biasLayer2(glimpseHiddenSize, 1); - ReLULayer<> rectifierLayer1; - auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2, - rectifierLayer1); - - // Glimpse network. - LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize, - imageHiddenSize); - BiasLayer<> biasLayer3(imageHiddenSize, 1); - ReLULayer<> rectifierLayer2; - LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize); - BiasLayer<> biasLayer4(hiddenSize, 1); - auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2, - linearLayer4, biasLayer4); - - // Feedback network. - LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize); - BiasLayer<> recurrentLayerBias0(hiddenSize, 1); - auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0); - - // Start network. - AdditionLayer<> startLayer0(hiddenSize, 1); - auto start = std::tie(startLayer0); - - // Transfer network. - ReLULayer<> rectifierLayer3; - auto transfer = std::tie(rectifierLayer3); - - // Classifier network. - LinearLayer<> linearLayer5(hiddenSize, numClasses); - BiasLayer<> biasLayer6(numClasses, 1); - LogSoftmaxLayer<> logSoftmaxLayer0; - auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0); - - // Reward predictor network. - ConstantLayer<> constantLayer0(1, 1); - AdditionLayer<> additionLayer0(1, 1); - auto rewardPredictor = std::tie(constantLayer0, additionLayer0); - - // Recurrent Model for Visual Attention. - RecurrentNeuralAttention - net(locator, locationSensor, glimpseSensor, glimpse, start, feedback, - transfer, classifier, rewardPredictor, rho); - - // Either we have to train a model, or load a model. - if (CLI::HasParam("training")) - { - arma::mat trainingData = std::move(CLI::GetParam("training")); - - arma::mat labels; - - // Did the user pass in labels? - if (CLI::HasParam("labels")) - { - // Load labels. - labels = std::move(CLI::GetParam("labels")); - - // Do the labels need to be transposed? - if (labels.n_cols == 1) - labels = labels.t(); - } - - // Now run the optimization. - if (optimizerType == "sgd") - { - SGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - else if (optimizerType == "minibatch-sgd") - { - MiniBatchSGD opt(net); - opt.StepSize() = stepSize; - opt.MaxIterations() = maxIterations; - opt.Tolerance() = tolerance; - opt.Shuffle() = shuffle; - opt.BatchSize() = batchSize; - - Timer::Start("rmva_training"); - net.Train(trainingData, labels, opt); - Timer::Stop("rmva_training"); - } - } - else - { - // Load the model from file. - data::Load(CLI::GetParam("input_model_file"), "rmva_model", net); - } - - // Do we need to do testing? - if (CLI::HasParam("test")) - { - arma::mat testingData = std::move(CLI::GetParam("test")); - - // Time the running of the Naive Bayes Classifier. - arma::mat results; - Timer::Start("rmva_testing"); - net.Predict(testingData, results); - Timer::Stop("rmva_testing"); - - if (CLI::HasParam("output")) - CLI::GetParam("output") = std::move(results); - } - - // Save the model, if requested. - if (CLI::HasParam("output_model_file")) - data::Save(CLI::GetParam("output_model_file"), "rmva_model", net); -} From e989fb2de9347108d86ec41e3a83a5291075f431 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 7 Nov 2016 22:13:00 +0100 Subject: [PATCH 39/82] Remove unused ann functions. --- src/mlpack/methods/ann/cnn.hpp | 448 ------------------ src/mlpack/methods/ann/cnn_impl.hpp | 289 ----------- src/mlpack/methods/ann/network_traits.hpp | 55 --- src/mlpack/methods/ann/network_util.hpp | 247 ---------- src/mlpack/methods/ann/network_util_impl.hpp | 286 ----------- .../ann/performance_functions/CMakeLists.txt | 17 - .../performance_functions/cee_function.hpp | 74 --- .../performance_functions/mse_function.hpp | 61 --- .../performance_functions/sparse_function.hpp | 141 ------ .../performance_functions/sse_function.hpp | 64 --- src/mlpack/tests/network_util_test.cpp | 149 ------ .../tests/performance_functions_test.cpp | 54 --- 12 files changed, 1885 deletions(-) delete mode 100644 src/mlpack/methods/ann/cnn.hpp delete mode 100644 src/mlpack/methods/ann/cnn_impl.hpp delete mode 100644 src/mlpack/methods/ann/network_traits.hpp delete mode 100644 src/mlpack/methods/ann/network_util.hpp delete mode 100644 src/mlpack/methods/ann/network_util_impl.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/CMakeLists.txt delete mode 100644 src/mlpack/methods/ann/performance_functions/cee_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/mse_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/sparse_function.hpp delete mode 100644 src/mlpack/methods/ann/performance_functions/sse_function.hpp delete mode 100644 src/mlpack/tests/network_util_test.cpp delete mode 100644 src/mlpack/tests/performance_functions_test.cpp diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp deleted file mode 100644 index 72e0803f179..00000000000 --- a/src/mlpack/methods/ann/cnn.hpp +++ /dev/null @@ -1,448 +0,0 @@ -/** - * @file cnn.hpp - * @author Shangtong Zhang - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_HPP -#define MLPACK_METHODS_ANN_CNN_HPP - -#include - -#include -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard convolutional network. - * - * @tparam LayerTypes Contains all layer modules used to construct the network. - * @tparam OutputLayerType The outputlayer type used to evaluate the network. - * @tparam PerformanceFunction Performance strategy used to calculate the error. - */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> -> -class CNN -{ - public: - //! Convenience typedef for the internal model construction. - using NetworkType = CNN; - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network) and the given optimizer. - * Optionally, specify which initialize rule and performance function should - * be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template class OptimizerType> - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the CNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Outputlayer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network paramter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to claculate the error. - */ - template - CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - /** - * Train the convolutional neural network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, const arma::mat& responses); - - /** - * Train the convolutional neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(OptimizerType& optimizer); - - /** - * Train the convolutional neural network on the given input data using the - * given optimizer. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer); - - /** - * Predict the responses to a given set of predictors. The responses will - * reflect the output of the given output layer as returned by the - * OutputClass() function. - * - * @param predictors Input predictors. - * @param responses Matrix to put output predictions of responses into. - */ - void Predict(arma::cube& predictors, arma::mat& responses); - - /** - * Evaluate the convolutional neural network with the given parameters. This - * function is usually called by the optimizer to train the model. - * - * @param parameters Matrix model parameters. - * @param i Index of point to use for objective function evaluation. - * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. - */ - double Evaluate(const arma::mat& parameters, - const size_t i, - const bool deterministic = true); - - /** - * Evaluate the gradient of the convolutional neural network with the given - * parameters, and with respect to only one point in the dataset. This is - * useful for optimizers such as SGD, which require a separable objective - * function. - * - * @param parameters Matrix of the model parameters to be optimized. - * @param i Index of points to use for objective function gradient evaluation. - * @param gradient Matrix to output gradient into. - */ - void Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient); - - //! Return the number of separable functions (the number of predictor points). - size_t NumFunctions() const { return numFunctions; } - - //! Return the initial point for the optimization. - const arma::mat& Parameters() const { return parameter; } - //! Modify the initial point for the optimization. - arma::mat& Parameters() { return parameter; } - - /** - * Serialize the convolutional neural network. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /** - * Reset the network by setting the layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ - } - - /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. - */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - /** - * Link the calculated activation with the connection layer. - */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } - - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } - - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } - - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated convolutional neural network. - LayerTypes network; - - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; - - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; - - //! The current evaluation mode (training or testing). - bool deterministic; - - //! Matrix of (trained) parameters. - arma::mat parameter; - - //! The matrix of data points (predictors). - arma::cube predictors; - - //! The matrix of responses to the input data points. - arma::mat responses; - - //! The number of separable functions (the number of predictor points). - size_t numFunctions; - - //! Locally stored backward error. - arma::mat error; - - //! Locally stored sample size. - size_t sampleSize; -}; // class CNN - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "cnn_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/cnn_impl.hpp b/src/mlpack/methods/ann/cnn_impl.hpp deleted file mode 100644 index ba774ba3097..00000000000 --- a/src/mlpack/methods/ann/cnn_impl.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file cnn_impl.hpp - * @author Marcus Edel - * - * Definition of the CNN class, which implements convolutional neural networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_CNN_IMPL_HPP -#define MLPACK_METHODS_ANN_CNN_IMPL_HPP - -// In case it hasn't been included yet. -#include "cnn.hpp" - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - - -template -template class OptimizerType -> -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - const arma::cube& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - Train(predictors, responses); -} - -template -template -CNN::CNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, const arma::mat& responses) -{ - numFunctions = predictors.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template class OptimizerType> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::cube& predictors, - const arma::mat& responses, - OptimizerType& optimizer) -{ - numFunctions = responses.n_cols; - sampleSize = predictors.n_slices / responses.n_cols; - this->predictors = predictors; - this->responses = responses; - - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template< - template class OptimizerType -> -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ - // Train the model. - Timer::Start("cnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("cnn_optimization"); - - Log::Info << "CNN::CNN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Predict(arma::cube& predictors, arma::mat& responses) -{ - deterministic = true; - - arma::mat responsesTemp; - ResetParameter(network); - Forward(predictors.slices(0, sampleSize - 1), network); - OutputPrediction(responsesTemp, network); - - responses = arma::mat(responsesTemp.n_elem, predictors.n_slices); - responses.col(0) = responsesTemp.col(0); - - for (size_t i = 1; i < (predictors.n_slices / sampleSize); i++) - { - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); - responses.col(i) = responsesTemp.col(0); - } -} - -template -double CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, - const size_t i, - const bool deterministic) -{ - this->deterministic = deterministic; - - ResetParameter(network); - Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network); - - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); -} - -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, - const size_t i, - arma::mat& gradient) -{ - Evaluate(parameter, i, false); - - NetworkGradients(gradient, network); - - Backward<>(error, network); - UpdateGradients<>(network); -} - -template -template -void CNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Serialize(Archive& ar, const unsigned int /* version */) -{ - ar & data::CreateNVP(parameter, "parameter"); - ar & data::CreateNVP(sampleSize, "sampleSize"); - - // If we are loading, we need to initialize the weights. - if (Archive::is_loading::value) - { - NetworkWeights(parameter, network); - } -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/network_traits.hpp b/src/mlpack/methods/ann/network_traits.hpp deleted file mode 100644 index 5aa91e8ae60..00000000000 --- a/src/mlpack/methods/ann/network_traits.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/** - * @file network_traits.hpp - * @author Marcus Edel - * - * NetworkTraits class, a template class to get information about various - * networks. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP -#define MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP - -namespace mlpack { -namespace ann { - -/** - * This is a template class that can provide information about various - * networks. By default, this class will provide the weakest possible - * assumptions on networks, and each network should override values as - * necessary. If a network doesn't need to override a value, then there's no - * need to write a NetworkTraits specialization for that class. - */ -template -class NetworkTraits -{ - public: - /** - * This is true if the network is a feed forward neural network. - */ - static const bool IsFNN = false; - - /** - * This is true if the network is a recurrent neural network. - */ - static const bool IsRNN = false; - - /** - * This is true if the network is a convolutional neural network. - */ - static const bool IsCNN = false; - - /** - * This is true if the network is a sparse autoencoder. - */ - static const bool IsSAE = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp deleted file mode 100644 index 93bdf044355..00000000000 --- a/src/mlpack/methods/ann/network_util.hpp +++ /dev/null @@ -1,247 +0,0 @@ -/** - * @file network_util.hpp - * @author Marcus Edel - * - * Neural network utilities. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP - -#include - -#include - -/** - * Neural network utility functions. - */ -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Auxiliary function to get the number of weights of the specified network. - * - * @param network The network used for specifying the number of weights. - * @return The number of weights. - */ -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkSize(std::tuple& network); - -/** - * Auxiliary function to get the number of weights of the specified layer. - * - * @param layer The layer used for specifying the number of weights. - * @param output The layer output parameter. - * @return The number of weights. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network. - * - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer. - * - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output); - -/** - * Auxiliary function to set the gradients of the specified network. - * - * @param gradients The gradients used to set the gradient of the network. - * @param network The network used to set the gradients. - * @param offset The memory offset of the gradients. - * return The number of gradients. - */ -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the gradients of the specified layer. - * - * @param layer The layer used to set the gradients. - * @param gradients The gradients used to set the gradient of the layer. - * @param offset The memory offset of the gradients. - * @param output The output parameter of the layer. - * @return The number of gradients. - */ -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output); - -/** - * Auxiliary function to get the input size of the specified network. - * - * @param network The network used for specifying the input size. - * @return The input size. - */ -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network); - -/** - * Auxiliary function to get the input size of the specified layer. - * - * @param layer The layer used for specifying the input size. - * @param output The layer output parameter. - * @return The input size. - */ -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& output); - -/** - * Auxiliary function to set the weights of the specified network using a given - * initialize rule. - * - * @param initializeRule The rule used to initialize the network weights. - * @param weights The weights used to set the weights of the network. - * @param network The network used to set the weights. - * @param offset The memory offset of the weights. - */ -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset = 0); - -/** - * Auxiliary function to set the weights of the specified layer using the given - * initialize rule. - * - * @param initializeRule The rule used to initialize the layer weights. - * @param layer The layer used to set the weights. - * @param weights The weights used to set the weights of the layer. - * @param offset The memory offset of the weights. - * @param output The output parameter of the layer. - * @return The number of weights. - */ -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& output); - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& output); - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - P& output); - -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "network_util_impl.hpp" - -#endif diff --git a/src/mlpack/methods/ann/network_util_impl.hpp b/src/mlpack/methods/ann/network_util_impl.hpp deleted file mode 100644 index 32034576290..00000000000 --- a/src/mlpack/methods/ann/network_util_impl.hpp +++ /dev/null @@ -1,286 +0,0 @@ -/** - * @file network_util_impl.hpp - * @author Marcus Edel - * - * Implementation of the network auxiliary functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP -#define MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP - -#include "network_util_impl.hpp" - -#include - -namespace mlpack { -namespace ann { - -template -typename std::enable_if::type -NetworkSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkSize(std::tuple& network) -{ - return LayerSize(std::get(network), std::get( - network).OutputParameter()) + NetworkSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(weights, network, - offset + LayerWeights(std::get(network), weights, - offset, std::get(network).OutputParameter())); - -} - -template -typename std::enable_if::type -NetworkWeights(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* unused */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* unused */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& gradients, - std::tuple& network, - size_t offset) -{ - NetworkGradients(gradients, network, - offset + LayerGradients(std::get(network), gradients, - offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkGradients(arma::mat& /* unused */, - std::tuple& /* unused */, - size_t /* unused */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::mat& /* unused */) -{ - layer.Gradient() = arma::mat(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasGradientCheck::value, size_t>::type -LayerGradients(T& layer, - arma::mat& gradients, - size_t offset, - arma::cube& /* unused */) -{ - layer.Gradient() = arma::cube(gradients.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasGradientCheck::value, size_t>::type -LayerGradients(T& /* unused */, - arma::mat& /* unused */, - size_t /* unused */, - P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkInputSize(std::tuple& network) -{ - const size_t inputSize = LayerInputSize(std::get(network), std::get( - network).OutputParameter()); - - if (inputSize) - { - return inputSize; - } - - return NetworkInputSize(network); -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerInputSize(T& layer, P& /* unused */) -{ - return layer.Weights().n_cols; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerInputSize(T& /* unused */, P& /* unused */) -{ - return 0; -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& initializeRule, - arma::mat& weights, - std::tuple& network, - size_t offset) -{ - NetworkWeights(initializeRule, weights, - network, offset + LayerWeights(initializeRule, std::get(network), - weights, offset, std::get(network).OutputParameter())); -} - -template -typename std::enable_if::type -NetworkWeights(InitializationRuleType& /* initializeRule */, - arma::mat& /* weights */, - std::tuple& /* network */, - size_t /* offset */) -{ - /* Nothing to do here */ -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::mat& /* output */) -{ - layer.Weights() = arma::mat(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& initializeRule, - T& layer, - arma::mat& weights, - size_t offset, - arma::cube& /* output */) -{ - layer.Weights() = arma::cube(weights.memptr() + offset, - layer.Weights().n_rows, layer.Weights().n_cols, - layer.Weights().n_slices, false, false); - - initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, - layer.Weights().n_cols); - - return layer.Weights().n_elem; -} - -template -typename std::enable_if< - !HasWeightsCheck::value, size_t>::type -LayerWeights(InitializationRuleType& /* initializeRule */, - T& /* layer */, - arma::mat& /* weights */, - size_t /* offset */, - P& /* output */) -{ - return 0; -} - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt b/src/mlpack/methods/ann/performance_functions/CMakeLists.txt deleted file mode 100644 index c64f7263cea..00000000000 --- a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - mse_function.hpp - sse_function.hpp - cee_function.hpp - sparse_function.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp deleted file mode 100644 index 34244521dc8..00000000000 --- a/src/mlpack/methods/ann/performance_functions/cee_function.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/** - * @file cee_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the cross-entropy error performance - * function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cross-entropy error performance function measures the network's - * performance according to the cross entropy errors. The log in the cross- - * entropy take sinto account the closeness of a prediction and is a more - * granular way to calculate the error. - * - * @tparam Layer The layer that is connected with the output layer. - */ -template< - class Layer = LinearLayer< > -> -class CrossEntropyErrorFunction -{ - public: - /** - * Computes the cross-entropy error function.. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the cross-entropy error function. - * - * @param input Input data. - * @param target Target data. - * @return cross-entropy error. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - if (LayerTraits::IsBinary) - return -arma::dot(arma::trunc_log(arma::abs(target - input)), target); - - return -arma::dot(arma::trunc_log(input), target); - } - -}; // class CrossEntropyErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp deleted file mode 100644 index d2f19334d6f..00000000000 --- a/src/mlpack/methods/ann/performance_functions/mse_function.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/** - * @file mse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the mean squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The mean squared error performance function measures the network's - * performance according to the mean of squared errors. - */ -class MeanSquaredErrorFunction -{ - public: - /** - * Computes the mean squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the mean squared error function. - * - * @param input Input data. - * @param target Target data. - * @return mean of squared errors. - */ - template - static double Error(const DataType& input, const DataType& target, const DataType&) - { - return arma::mean(arma::mean(arma::square(target - input))); - } - -}; // class MeanSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp deleted file mode 100644 index 145a0b64aff..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/** - * @file sparse_function.hpp - * @author Siddharth Agrawal - * @author Tham Ngap Wei - * - * Definition and implementation of the sparse performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ - -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The cost function design for the sparse autoencoder. - */ -template -class SparseErrorFunction -{ - public: - /** - * Computes the cost of sparse autoencoder. - * - * @param lambda L2-regularization parameter. - * @param beta KL divergence parameter. - * @param rho Sparsity parameter. - */ - SparseErrorFunction(const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - lambda(lambda), beta(beta), rho(rho) - { - // Nothing to do here. - } - - SparseErrorFunction(SparseErrorFunction &&layer) noexcept - { - *this = std::move(layer); - } - - SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept - { - lambda = layer.lambda; - beta = layer.beta; - rho = layer.rho; - - return *this; - } - - //! Get the KL divergence parameter. - double Beta() const { return beta; } - //! Modify the KL divergence parameter. - void Beta(double value) { beta = value;} - - //! Get the L2-regularization parameter. - double Lambda() const { return lambda; } - //! Modify the L2-regularization parameter. - void Lambda(double value) { lambda = value;} - - //! Get the sparsity parameter. - double Rho() const { return rho; } - //! Modify the sparsity parameter. - void Rho(double value) { rho = value;} - - /** - * Computes the cost of sparse autoencoder. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const Tp& network, - const InType& target, const InType &error) - { - return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(), - std::get<3>(network).RhoCap(), target, error); - } - - /** - * Computes the cost of sparse autoencoder. - * - * @param w1 weights of hidden layer - * @param w2 weights of output layer - * @param rhoCap Average activations of the hidden layer - * @param target Target data. - * @param error different between output and the input - * @return sum of squared errors. - */ - template - double Error(const InType& w1, const InType& w2, - const InType& rhoCap, const InType& target, - const InType& error) - { - // Calculate squared L2-norms of w1 and w2. - const double wL2SquaredNorm = - arma::accu(w1 % w1) + arma::accu(w2 % w2); - - // Calculate the reconstruction error, the regularization cost and the KL - // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm - // of the reconstructed data difference. 'weightDecay' is the squared l2-norm - // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer - // activations not being low. It is given by the following formula: - // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap))) - const double sumOfSquaresError = - 0.5 * arma::accu(error % error) / target.n_cols; - - const double weightDecay = 0.5 * lambda * wL2SquaredNorm; - const double klDivergence = - beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) * - arma::trunc_log((1 - rho) / (1 - rhoCap))); - - // The cost is the sum of the terms calculated above. - return sumOfSquaresError + weightDecay + klDivergence; - } - - private: - //! Locally stored L2-regularization parameter. - double lambda; - - //! Locally stored KL divergence parameter. - double beta; - - //! Locally stored sparsity parameter. - double rho; - -}; // class SparseErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp deleted file mode 100644 index 34055fb74c0..00000000000 --- a/src/mlpack/methods/ann/performance_functions/sse_function.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/** - * @file sse_function.hpp - * @author Marcus Edel - * - * Definition and implementation of the sum squared error performance function. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP -#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The sum squared error performance function measures the network's performance - * according to the sum of squared errors. - */ -class SumSquaredErrorFunction -{ - public: - /** - * Computes the sum squared error function. - * - * @param network Network type of FFN, CNN or RNN - * @param target Target data. - * @param error same as place holder - * @return sum of squared errors. - */ - template - static double Error(const std::tuple& network, - const DataType& target, - const DataType &error) - { - return Error(std::get(network).OutputParameter(), - target, error); - } - - /** - * Computes the sum squared error function. - * - * @param input Input data. - * @param target Target data. - * @return sum of squared errors. - */ - template - static double Error(const DataType& input, - const DataType& target, - const DataType&) - { - return arma::sum(arma::square(target - input)); - } - -}; // class SumSquaredErrorFunction - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/tests/network_util_test.cpp b/src/mlpack/tests/network_util_test.cpp deleted file mode 100644 index 4f0fcf105e0..00000000000 --- a/src/mlpack/tests/network_util_test.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/** - * @file network_util_test.cpp - * @author Marcus Edel - * - * Simple tests for things in the network_util file. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(NetworkUtilTest); - -/** - * Test the network size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkSize(weightNetwork), 1100); -} - -/** - * Test the layer size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerSize(baseLayer, baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(10, 10); - BOOST_REQUIRE_EQUAL(LayerSize(linearLayer, - linearLayer.OutputParameter()), 100); -} - -/** - * Test the network input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(NetworkInputSizeTest) -{ - // Create a two layer network without weights. - BaseLayer<> baseLayer1; - BaseLayer<> baseLayer2; - auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(noneWeightNetwork), 0); - - // Create a two layer network. - LinearLayer<> linearLayer1(5, 10); - LinearLayer<> linearLayer2(10, 100); - - // Reuse the layer form the first network. - auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2, - baseLayer2); - - BOOST_REQUIRE_EQUAL(NetworkInputSize(weightNetwork), 5); -} - -/** - * Test the layer input size auxiliary function. - */ -BOOST_AUTO_TEST_CASE(LayerInputSizeTest) -{ - // Create layer without weights. - BaseLayer<> baseLayer; - BOOST_REQUIRE_EQUAL(LayerInputSize(baseLayer, - baseLayer.OutputParameter()), 0); - - // Create layer with weights. - LinearLayer<> linearLayer(5, 10); - BOOST_REQUIRE_EQUAL(LayerInputSize(linearLayer, - linearLayer.OutputParameter()), 5); -} - -/** - * Test the network weight auxiliary function using the given initialization - * rule. - */ -BOOST_AUTO_TEST_CASE(NetworkWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - LinearLayer<> linearLayer2(10, 100); - - arma::mat parameter = arma::zeros(1100, 1); - - // Create the network. - auto network = std::tie(linearLayer1, linearLayer2); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - NetworkWeights(constantInit, parameter, network); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer2.Weights()), 1000); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 1100); -} - -/** - * Test the layer weight auxiliary function using the given initialization rule. - */ -BOOST_AUTO_TEST_CASE(LayerWeightsInitTest) -{ - // Create a two layer network. - LinearLayer<> linearLayer1(10, 10); - - arma::mat parameter = arma::zeros(100, 1); - - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0); - - RandomInitialization constantInit(1, 1); - arma::mat output; - LayerWeights(constantInit, linearLayer1, parameter, 0, output); - - BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100); - BOOST_REQUIRE_EQUAL(arma::accu(parameter), 100); -} - -BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/performance_functions_test.cpp b/src/mlpack/tests/performance_functions_test.cpp deleted file mode 100644 index 35902911422..00000000000 --- a/src/mlpack/tests/performance_functions_test.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file performance_functions_test.cpp - * @author Marcus Edel - * - * Tests for the various performance functions. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PerformanceFunctionsTest); - -// Test the mean squared error performance function. -BOOST_AUTO_TEST_CASE(MeanSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(MeanSquaredErrorFunction::Error(input, target), 0.5); -} - -// Test the cross entropy performance function. -BOOST_AUTO_TEST_CASE(CrossEntropyErrorTest) -{ - arma::colvec input; - input << std::exp(-2.0) << std::exp(-1.0); - arma::colvec target = arma::ones(2); - - BOOST_REQUIRE_EQUAL(CrossEntropyErrorFunction<>::Error(input, target), 3); -} - -// Test the sum squared error performance function. -BOOST_AUTO_TEST_CASE(SumSquaredErrorTest) -{ - arma::colvec input("1.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0"); - arma::colvec target = arma::zeros(8); - - BOOST_REQUIRE_EQUAL(SumSquaredErrorFunction::Error(input, target), 4); -} - -BOOST_AUTO_TEST_SUITE_END(); From 251288a6d15407b6fd667d114bbc917afe5302e9 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 8 Nov 2016 22:46:11 +0100 Subject: [PATCH 40/82] Remove unused ann layer. --- src/mlpack/methods/ann/layer/bias_layer.hpp | 208 -------- .../ann/layer/binary_classification_layer.hpp | 106 ---- .../methods/ann/layer/constant_layer.hpp | 121 ----- src/mlpack/methods/ann/layer/conv_layer.hpp | 324 ------------ .../methods/ann/layer/dropconnect_layer.hpp | 361 ------------- .../methods/ann/layer/dropout_layer.hpp | 252 --------- src/mlpack/methods/ann/layer/empty_layer.hpp | 133 ----- .../methods/ann/layer/glimpse_layer.hpp | 484 ------------------ .../{hard_tanh_layer.hpp => hard_tanh.hpp} | 0 src/mlpack/methods/ann/layer/linear_layer.hpp | 289 ----------- .../methods/ann/layer/log_softmax_layer.hpp | 131 ----- src/mlpack/methods/ann/layer/lstm_layer.hpp | 418 --------------- .../layer/multiclass_classification_layer.hpp | 98 ---- .../ann/layer/multiply_constant_layer.hpp | 113 ---- .../methods/ann/layer/one_hot_layer.hpp | 96 ---- .../methods/ann/layer/pooling_layer.hpp | 267 ---------- .../methods/ann/layer/recurrent_layer.hpp | 192 ------- .../ann/layer/reinforce_normal_layer.hpp | 139 ----- .../methods/ann/layer/softmax_layer.hpp | 114 ----- .../methods/ann/layer/sparse_bias_layer.hpp | 177 ------- .../methods/ann/layer/sparse_input_layer.hpp | 180 ------- .../methods/ann/layer/sparse_output_layer.hpp | 227 -------- .../ann/layer/vr_class_reward_layer.hpp | 171 ------- 23 files changed, 4601 deletions(-) delete mode 100644 src/mlpack/methods/ann/layer/bias_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/binary_classification_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/constant_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/conv_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/dropconnect_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/dropout_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/empty_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/glimpse_layer.hpp rename src/mlpack/methods/ann/layer/{hard_tanh_layer.hpp => hard_tanh.hpp} (100%) delete mode 100644 src/mlpack/methods/ann/layer/linear_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/log_softmax_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/lstm_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/multiply_constant_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/one_hot_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/pooling_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/recurrent_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/softmax_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_bias_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_input_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/sparse_output_layer.hpp delete mode 100644 src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp deleted file mode 100644 index 0be535dec7b..00000000000 --- a/src/mlpack/methods/ann/layer/bias_layer.hpp +++ /dev/null @@ -1,208 +0,0 @@ -/** - * @file bias_layer.hpp - * @author Marcus Edel - * - * Definition of the BiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a standard bias layer. The BiasLayer class represents a - * single layer of a neural network. - * - * A convenient typedef is given: - * - * - 2DBiasLayer - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class BiasLayer -{ - public: - /** - * Create the BiasLayer object using the specified number of units and bias - * parameter. - * - * @param outSize The number of output units. - * @param bias The bias value. - */ - BiasLayer(const size_t outSize, const double bias = 1) : - outSize(outSize), - bias(bias) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + (weights * bias); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = input; - for (size_t s = 0; s < input.n_slices; s++) - { - output.slice(s) += weights(s) * bias; - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const arma::Mat& /* input */, - const ErrorType& error, - GradientType& gradient) - { - gradient = error * bias; - } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(bias, "bias"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored bias value. - double bias; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class BiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::cube -> -using BiasLayer2D = BiasLayer; - -/** - * Standard 2D-Bias-Layer. - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -using AdditionLayer = BiasLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp deleted file mode 100644 index 1b3d6172a74..00000000000 --- a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp +++ /dev/null @@ -1,106 +0,0 @@ -/** - * @file binary_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the BinaryClassificationLayer class, which implements a - * binary class classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a binary classification layer that can be used as - * output layer. - */ -class BinaryClassificationLayer -{ - public: - /** - * Create the BinaryClassificationLayer object. - * - * @param confidence The confidence used for the output class transformation. - */ - BinaryClassificationLayer(const double confidence = 0.5) : - confidence(confidence) - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - - for (size_t i = 0; i < output.n_elem; i++) - output(i) = output(i) > confidence ? 1 : 0; - } - - //! Get the confidence parameter. - double const& Confidence() const { return confidence; } - //! Modify the confidence parameter. - double& Confidence() { return confidence; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(confidence, "confidence"); - } - - private: - double confidence; - -}; // class BinaryClassificationLayer - -//! Layer traits for the binary class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant_layer.hpp deleted file mode 100644 index 31da87e7d9d..00000000000 --- a/src/mlpack/methods/ann/layer/constant_layer.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/** - * @file constant_layer.hpp - * @author Marcus Edel - * - * Definition of the ConstantLayer class, which outputs a constant value given - * any input. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the constant layer. The constant layer outputs a given - * constant value given any input value. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class ConstantLayer -{ - public: - /** - * Create the ConstantLayer object that outputs a given constant scalar value - * given any input value. - * - * @param outSize The number of output units. - * @param scalar The constant value used to create the constant output. - */ - ConstantLayer(const size_t outSize, const double scalar) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } - - /** - * Ordinary feed forward pass of a neural network. The forward pass fills the - * output with the specified constant parameter. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& /* input */, arma::Mat& output) - { - output = constantOutput; - } - - /** - * Ordinary feed backward pass of a neural network. The backward pass of the - * constant layer is returns always a zero output error matrix. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& /* input */, - const arma::Mat& /* gy */, - arma::Mat& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols); - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } - - private: - //! Locally-stored constant output matrix. - OutputDataType constantOutput; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class ConstantLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp deleted file mode 100644 index bbb918c8a49..00000000000 --- a/src/mlpack/methods/ann/layer/conv_layer.hpp +++ /dev/null @@ -1,324 +0,0 @@ -/** - * @file conv_layer.hpp - * @author Marcus Edel - * - * Definition of the ConvLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP - -#include -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the ConvLayer class. The ConvLayer class represents a - * single layer of a neural network. - * - * @tparam ForwardConvolutionRule Convolution to perform forward process. - * @tparam BackwardConvolutionRule Convolution to perform backward process. - * @tparam GradientConvolutionRule Convolution to calculate gradient. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename ForwardConvolutionRule = NaiveConvolution, - typename BackwardConvolutionRule = NaiveConvolution, - typename GradientConvolutionRule = NaiveConvolution, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class ConvLayer -{ - public: - /** - * Create the ConvLayer object using the specified number of input maps, - * output maps, filter size, stride and padding parameter. - * - * @param inMaps The number of input maps. - * @param outMaps The number of output maps. - * @param wfilter Width of the filter/kernel. - * @param wfilter Height of the filter/kernel. - * @param xStride Stride of filter application in the x direction. - * @param yStride Stride of filter application in the y direction. - * @param wPad Spatial padding width of the input. - * @param hPad Spatial padding height of the input. - */ - ConvLayer(const size_t inMaps, - const size_t outMaps, - const size_t wfilter, - const size_t hfilter, - const size_t xStride = 1, - const size_t yStride = 1, - const size_t wPad = 0, - const size_t hPad = 0) : - wfilter(wfilter), - hfilter(hfilter), - inMaps(inMaps), - outMaps(outMaps), - xStride(xStride), - yStride(yStride), - wPad(wPad), - hPad(hPad) - { - weights.set_size(wfilter, hfilter, inMaps * outMaps); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad); - const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad); - - output = arma::zeros >(wConv, hConv, outMaps); - for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++) - { - arma::Mat convOutput; - ForwardConvolutionRule::Convolution(input.slice(inMap), - weights.slice(outMap), convOutput); - - output.slice(outMap) += convOutput; - } - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++) - { - for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter, - output); - - g.slice(outMap) += output; - } - } - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The input parameter used for calculating the gradient. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Cube& d, - arma::Cube& g) - { - g = arma::zeros >(weights.n_rows, weights.n_cols, - weights.n_slices); - - for (size_t outMap = 0; outMap < outMaps; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps) - { - arma::Cube inputSlices = input.slices(inMap, inMap); - arma::Cube deltaSlices = d.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output); - - for (size_t i = 0; i < output.n_slices; i++) - g.slice(s) += output.slice(i); - } - } - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(wfilter, "wfilter"); - ar & data::CreateNVP(hfilter, "hfilter"); - ar & data::CreateNVP(inMaps, "inMaps"); - ar & data::CreateNVP(outMaps, "outMaps"); - ar & data::CreateNVP(xStride, "xStride"); - ar & data::CreateNVP(yStride, "yStride"); - ar & data::CreateNVP(wPad, "wPad"); - ar & data::CreateNVP(hPad, "hPad"); - } - - private: - /* - * Rotates a 3rd-order tesor counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); - - // * left-right flip, up-down flip */ - for (size_t s = 0; s < output.n_slices; s++) - output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); - } - - /* - * Rotates a dense matrix counterclockwise by 180 degrees. - * - * @param input The input data to be rotated. - * @param output The rotated output. - */ - template - void Rotate180(const arma::Mat& input, arma::Mat& output) - { - // * left-right flip, up-down flip */ - output = arma::fliplr(arma::flipud(input)); - } - - /* - * Return the convolution output size. - * - * @param size The size of the input (row or column). - * @param k The size of the filter (width or height). - * @param s The stride size (x or y direction). - * @param p The size of the padding (width or height). - * @return The convolution output size. - */ - size_t ConvOutSize(const size_t size, - const size_t k, - const size_t s, - const size_t p) - { - return std::floor(size + p * 2 - k) / s + 1; - } - - //! Locally-stored filter/kernel width. - size_t wfilter; - - //! Locally-stored filter/kernel height. - size_t hfilter; - - //! Locally-stored number of input maps. - size_t inMaps; - - //! Locally-stored number of output maps. - size_t outMaps; - - //! Locally-stored stride of the filter in x-direction. - size_t xStride; - - //! Locally-stored stride of the filter in y-direction. - size_t yStride; - - //! Locally-stored padding width. - size_t wPad; - - //! Locally-stored padding height. - size_t hPad; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class ConvLayer - -//! Layer traits for the convolution layer. -template< - typename ForwardConvolutionRule, - typename BackwardConvolutionRule, - typename GradientConvolutionRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp deleted file mode 100644 index fdb14cbf11c..00000000000 --- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp +++ /dev/null @@ -1,361 +0,0 @@ -/** - * @file dropconnect_layer.hpp - * @author Palash Ahuja - * - * Definition of the DropConnectLayer class, which implements a regularizer - * that randomly sets connections to zero. Preventing units from co-adapting. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP - -#include - -#include "empty_layer.hpp" -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The DropConnect layer is a regularizer that randomly with probability - * ratio sets the connection values to zero and scales the remaining - * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) - * when deterministic is false. In the deterministic mode(during testing), - * the layer just computes the output. The output is computed according - * to the input layer. If no input layer is given, it will take a linear layer - * as default. - * - * Note: - * During training you should set deterministic to false and during testing - * you should set deterministic to true. - * - * For more information, see the following. - * - * @code - * @inproceedings{WanICML2013, - * title={Regularization of Neural Networks using DropConnect}, - * booktitle = {Proceedings of the 30th International Conference on Machine - * Learning(ICML - 13)}, - * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and - * Rob Fergus}, - * year = {2013} - * } - * @endcode - * - * @tparam InputLayer Layer used instead of the internal linear layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template< - typename InputLayer = EmptyLayer, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class DropConnectLayer -{ - public: - /** - * Creates the DropConnect Layer as a Linear Object that takes input size, - * output size and ratio as parameter. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - * @param ratio The probability of setting a value to zero. - */ - DropConnectLayer (const size_t inSize, - const size_t outSize, - const double ratio = 0.5) : - inSize(inSize), - outSize(outSize), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(false) - { - weights.set_size(outSize, inSize); - } - - /** - * Create the DropConnectLayer object using the specified ratio and rescale - * parameter. This takes the - * - * @param ratio The probability of setting a connection to zero. - * @param inputLayer the layer object that the dropconnect connection would take. - */ - template - DropConnectLayer(InputLayerType &&inputLayer, - const double ratio = 0.5) : - baseLayer(std::forward(inputLayer)), - ratio(ratio), - scale(1.0 / (1 - ratio)), - uselayer(true) - { - static_assert(std::is_same::type, - InputLayer>::value, - "The type of the inputLayer must be InputLayerType"); - } - /** - * Ordinary feed forward pass of the DropConnect layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat &input, arma::Mat &output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (uselayer) - { - baseLayer.Forward(input, output); - } - else - { - output = weights * input; - } - } - else - { - if (uselayer) - { - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(baseLayer.Weights().n_rows, - baseLayer.Weights().n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = baseLayer.Weights(); - - baseLayer.Weights() = baseLayer.Weights() % mask; - - baseLayer.Forward(input, output); - } - else - { - // Scale the input / ( 1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(weights.n_rows, weights.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - // Save weights for denoising. - denoise = weights; - - weights = weights % mask; - output = weights * input; - } - - output = output * scale; - } - } - - /** - * Ordinary feed backward pass of the DropConnect layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, const DataType& gy, DataType& g) - { - if (uselayer) - { - baseLayer.Backward(input, gy, g); - } - else - { - g = weights.t() * gy; - } - } - - /** - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - if (uselayer) - { - baseLayer.Gradient(input, d, g); - - // Denoise the weights. - baseLayer.Weights() = denoise; - } - else - { - g = d * input.t(); - - // Denoise the weights. - weights = denoise; - } - } - - //! Get the weights. - OutputDataType const& Weights() const - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Modify the weights. - OutputDataType& Weights() - { - if (uselayer) - return baseLayer.Weights(); - - return weights; - } - - //! Get the input parameter. - InputDataType &InputParameter() const - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Modify the input parameter. - InputDataType &InputParameter() - { - if (uselayer) - return baseLayer.InputParameter(); - - return inputParameter; - } - - //! Get the output parameter. - OutputDataType &OutputParameter() const - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Modify the output parameter. - OutputDataType &OutputParameter() - { - if (uselayer) - return baseLayer.OutputParameter(); - - return outputParameter; - } - - //! Get the delta. - OutputDataType const& Delta() const - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Modify the delta. - OutputDataType& Delta() - { - if (uselayer) - return baseLayer.Delta(); - - return delta; - } - - //! Get the gradient. - OutputDataType const& Gradient() const - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! Modify the gradient. - OutputDataType& Gradient() - { - if (uselayer) - return baseLayer.Gradient(); - - return gradient; - } - - //! The value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - - //! Modify the value of the deterministic parameter. - bool &Deterministic() { return deterministic; } - - //! The probability of setting a value to zero. - double Ratio() const { return ratio; } - - //! Modify the probability of setting a value to zero. - void Ratio(const double r) - { - ratio = r; - scale = 1.0 / (1.0 - ratio); - } - -private: - //! Locally-stored layer object. - InputLayer baseLayer; - - //! Locally stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! The probability of setting a value to zero. - double ratio; - - //! The scale fraction. - double scale; - - //! If true the default layer is used otherwise a new layer will be created. - bool uselayer; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored mast object. - OutputDataType mask; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; - - //! Denoise mask for the weights. - OutputDataType denoise; -}; // class DropConnectLayer. - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout_layer.hpp deleted file mode 100644 index 3ed0bd62a60..00000000000 --- a/src/mlpack/methods/ann/layer/dropout_layer.hpp +++ /dev/null @@ -1,252 +0,0 @@ -/** - * @file dropout_layer.hpp - * @author Marcus Edel - * - * Definition of the DropoutLayer class, which implements a regularizer that - * randomly sets units to zero. Preventing units from co-adapting. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The dropout layer is a regularizer that randomly with probability ratio - * sets input values to zero and scales the remaining elements by factor 1 / - * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when - * deterministic is false. In the deterministic mode (during testing), the layer - * just scales the output. - * - * Note: During training you should set deterministic to false and during - * testing you should set deterministic to true. - * - * For more information, see the following. - * - * @code - * @article{Hinton2012, - * author = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, - * Ilya Sutskever, Ruslan Salakhutdinov}, - * title = {Improving neural networks by preventing co-adaptation of feature - * detectors}, - * journal = {CoRR}, - * volume = {abs/1207.0580}, - * year = {2012}, - * } - * @endcode - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class DropoutLayer -{ - public: - - /** - * Create the DropoutLayer object using the specified ratio and rescale - * parameter. - * - * @param ratio The probability of setting a value to zero. - * @param rescale If true the input is rescaled when deterministic is False. - */ - DropoutLayer(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of the dropout layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } - - /** - * Ordinary feed forward pass of the dropout layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols, - input.n_slices); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } - - /** - * Ordinary feed backward pass of the dropout layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const DataType& gy, - DataType& g) - { - g = gy % mask * scale; - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the detla. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! The value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - //! The probability of setting a value to zero. - double Ratio() const { return ratio; } - - //! Modify the probability of setting a value to zero. - void Ratio(const double r) - { - ratio = r; - scale = 1.0 / (1.0 - ratio); - } - - //! The value of the rescale parameter. - bool Rescale() const {return rescale; } - //! Modify the value of the rescale parameter. - bool& Rescale() {return rescale; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored mast object. - OutputDataType mask; - - //! The probability of setting a value to zero. - double ratio; - - //! The scale fraction. - double scale; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; - - //! If true the input is rescaled when deterministic is False. - bool rescale; -}; // class DropoutLayer - -//! Layer traits for the bias layer. -template < - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -/** - * Standard Dropout-Layer2D. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using DropoutLayer2D = DropoutLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp deleted file mode 100644 index cf5a70e43ae..00000000000 --- a/src/mlpack/methods/ann/layer/empty_layer.hpp +++ /dev/null @@ -1,133 +0,0 @@ -/** - * @file empty_layer.hpp - * @author Palash Ahuja - * - * Definition of the EmptyLayer class, which is basically empty. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP - -namespace mlpack{ -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the EmptyLayer class. The EmptyLayer class represents a - * single layer which is mainly used as placeholder. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class EmptyLayer -{ - public: - /** - * Creates the empty layer object. All the methods are - * empty as well. - */ - EmptyLayer() { /* Nothing to do here. */ } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType& /* input */, OutputType& /* output */) - { - /* Nothing to do here. */ - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* input */, - const ErrorType& /* gy */, - GradientType& /* g */) - { - /* Nothing to do here. */ - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const ErrorType& /* error */, - GradientType& /* gradient */) - { - /* Nothing to do here. */ - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class EmptyLayer - -} //namespace ann -} //namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp deleted file mode 100644 index 3f1e9dffabb..00000000000 --- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp +++ /dev/null @@ -1,484 +0,0 @@ -/** - * @file glimpse_layer.hpp - * @author Marcus Edel - * - * Definition of the GlimpseLayer class, which takes an input image and a - * location to extract a retina-like representation of the input image at - * different increasing scales. - * - * For more information, see the following. - * - * @code - * @article{CoRR2014, - * author = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, - * title = {Recurrent Models of Visual Attention}, - * journal = {CoRR}, - * volume = {abs/1406.6247}, - * year = {2014}, - * } - * @endcode - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The glimpse layer returns a retina-like representation - * (down-scaled cropped images) of increasing scale around a given location in a - * given image. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class GlimpseLayer -{ - public: - - /** - * Create the GlimpseLayer object using the specified ratio and rescale - * parameter. - * - * @param inSize The size of the input units. - * @param size The used glimpse size (height = width). - * @param depth The number of patches to crop per glimpse. - * @param scale The scaling factor used to create the increasing retina-like - * representation. - */ - GlimpseLayer(const size_t inSize, - const size_t size, - const size_t depth = 3, - const size_t scale = 2) : - inSize(inSize), - size(size), - depth(depth), - scale(scale) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of the glimpse layer. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::Cube(size, size, depth * input.n_slices); - - inputDepth = input.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - input.n_rows + padSize * 2, input.n_cols + padSize * 2, - input.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1, - padSize + input.n_cols - 1) = input.subcube(0, 0, - inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - output.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < output.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, output.slice(j)); - } - else - { - ReSampling(poolingInput, output.slice(j)); - } - } - } - } - } - } - - /** - * Ordinary feed backward pass of the glimpse layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& input, - const ErrorType& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - g = arma::zeros(inputParameter.n_rows, inputParameter.n_cols, - inputParameter.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputParameter.n_rows + padSize * 2, inputParameter.n_cols + - padSize * 2, inputParameter.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputParameter.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - g += inputPadded.tube(padSize, padSize, padSize + - inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1); - } - } - - Transform(g); - } - - //! Get the input parameter. - InputDataType& InputParameter() const {return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const {return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the detla. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Set the locationthe x and y coordinate of the center of the output - //! glimpse. - void Location(const arma::mat& location) - { - this->location = location; - } - - private: - /* - * Transform the given input by changing rows to columns. - * - * @param w The input matrix used to perform the transformation. - */ - void Transform(arma::mat& w) - { - arma::mat t = w; - - for (size_t i = 0, k = 0; i < w.n_elem; k++) - { - for (size_t j = 0; j < w.n_cols; j++, i++) - { - w(k, j) = t(i); - } - } - } - - /* - * Transform the given input by changing rows to columns. - * - * @param w The input matrix used to perform the transformation. - */ - void Transform(arma::cube& w) - { - for (size_t i = 0; i < w.n_slices; i++) - { - arma::mat t = w.slice(i); - Transform(t); - w.slice(i) = t; - } - } - - /** - * Apply pooling to the input and store the results to the output parameter. - * - * @param kSize the kernel size used to perform the pooling operation. - * @param input The input to be apply the pooling rule. - * @param output The pooled result. - */ - template - void Pooling(const size_t kSize, - const arma::Mat& input, - arma::Mat& output) - { - - const size_t rStep = kSize; - const size_t cStep = kSize; - - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - output(i / rStep, j / cStep) += pooling.Pooling( - input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1))); - } - } - } - - /** - * Apply unpooling to the input and store the results. - * - * @param input The input to be apply the unpooling rule. - * @param error The error used to perform the unpooling operation. - * @param output The pooled result. - */ - template - void Unpooling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - const size_t rStep = input.n_rows / error.n_rows; - const size_t cStep = input.n_cols / error.n_cols; - - arma::Mat unpooledError; - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)); - - pooling.Unpooling(inputArea, error(i / rStep, j / cStep), - unpooledError); - - output(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)) += unpooledError; - } - } - } - - /** - * Apply ReSampling to the input and store the results in the output - * parameter. - * - * @param input The input to be apply the ReSampling rule. - * @param output The pooled result. - */ - template - void ReSampling(const arma::Mat& input, arma::Mat& output) - { - double wRatio = (double) (input.n_rows - 1) / (size - 1); - double hRatio = (double) (input.n_cols - 1) / (size - 1); - - double iWidth = input.n_rows - 1; - double iHeight = input.n_cols - 1; - - for (size_t y = 0; y < size; y++) - { - for (size_t x = 0; x < size; x++) - { - double ix = wRatio * x; - double iy = hRatio * y; - - // Get the 4 nearest neighbors. - double ixNw = std::floor(ix); - double iyNw = std::floor(iy); - double ixNe = ixNw + 1; - double iySw = iyNw + 1; - - // Get surfaces to each neighbor. - double se = (ix - ixNw) * (iy - iyNw); - double sw = (ixNe - ix) * (iy - iyNw); - double ne = (ix - ixNw) * (iySw - iy); - double nw = (ixNe - ix) * (iySw - iy); - - // Calculate the weighted sum. - output(y, x) = input(iyNw, ixNw) * nw + - input(iyNw, std::min(ixNe, iWidth)) * ne + - input(std::min(iySw, iHeight), ixNw) * sw + - input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se; - } - } - } - - /** - * Apply DownwardReSampling to the input and store the results into the output - * parameter. - * - * @param input The input to be apply the DownwardReSampling rule. - * @param error The error used to perform the DownwardReSampling operation. - * @param output The DownwardReSampled result. - */ - template - void DownwardReSampling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - double iWidth = input.n_rows - 1; - double iHeight = input.n_cols - 1; - - double wRatio = iWidth / (size - 1); - double hRatio = iHeight / (size - 1); - - for (size_t y = 0; y < size; y++) - { - for (size_t x = 0; x < size; x++) - { - double ix = wRatio * x; - double iy = hRatio * y; - - // Get the 4 nearest neighbors. - double ixNw = std::floor(ix); - double iyNw = std::floor(iy); - double ixNe = ixNw + 1; - double iySw = iyNw + 1; - - // Get surfaces to each neighbor. - double se = (ix - ixNw) * (iy - iyNw); - double sw = (ixNe - ix) * (iy - iyNw); - double ne = (ix - ixNw) * (iySw - iy); - double nw = (ixNe - ix) * (iySw - iy); - - double ograd = error(y, x); - - output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd; - output(iyNw, std::min(ixNe, iWidth)) = output(iyNw, - std::min(ixNe, iWidth)) + ne * ograd; - output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight), - ixNw) + sw * ograd; - output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output( - std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd; - } - } - } - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored depth of the input. - size_t inputDepth; - - //! The size of the input units. - size_t inSize; - - //! The used glimpse size (height = width). - size_t size; - - //! The number of patches to crop per glimpse. - size_t depth; - - //! The scale fraction. - size_t scale; - - //! The x and y coordinate of the center of the output glimpse. - arma::mat location; - - //! Locally-stored object to perform the mean pooling operation. - MeanPooling pooling; -}; // class GlimpseLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/hard_tanh_layer.hpp rename to src/mlpack/methods/ann/layer/hard_tanh.hpp diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp deleted file mode 100644 index b3b3dbf0266..00000000000 --- a/src/mlpack/methods/ann/layer/linear_layer.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - * @file linear_layer.hpp - * @author Marcus Edel - * - * Definition of the LinearLayer class also known as fully-connected layer or - * affine transformation. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the LinearLayer class. The LinearLayer class represents a - * single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LinearLayer -{ - public: - /** - * Create the LinearLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - LinearLayer(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Mat& output) - { - arma::Mat data(input.n_elem, 1); - - for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++) - { - for (size_t i = 0; i < data.n_cols; i++, c++) - { - data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) * - input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise( - input.slice(c), 1)); - } - } - - output = weights * data; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = weights.t() * gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param error The calculated error. - * @param gradient The calculated gradient. - */ - template - void Gradient(const InputType& input, - const ErrorType& error, - GradientType& gradient) - { - GradientDelta(input, error, gradient); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } - - private: - /* - * Calculate the gradient using the output delta (3rd order tensor) and the - * input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - arma::Mat data = arma::Mat(d.n_cols, - input.n_elem / d.n_cols); - - for (size_t s = 0, c = 0; s < input.n_slices / - data.n_rows; s++) - { - for (size_t i = 0; i < data.n_rows; i++, c++) - { - data.row(i).subvec(s * input.n_rows * - input.n_cols, (s + 1) * - input.n_rows * - input.n_cols - 1) = arma::vectorise( - input.slice(c), 1); - } - } - - g.slice(0) = d * data / d.n_cols; - } - - /* - * Calculate the gradient (3rd order tensor) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Cube& g) - { - g = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, g.slice(0)); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (3rd order tensor). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Cube& input, - const arma::Mat& d, - arma::Mat& g) - { - arma::Cube grad = arma::Cube(weights.n_rows, weights.n_cols, 1); - Gradient(input, d, grad); - g = grad.slice(0); - } - - /* - * Calculate the gradient (dense matrix) using the output delta - * (dense matrix) and the input activation (dense matrix). - * - * @param input The input parameter used for calculating the gradient. - * @param d The output delta. - * @param g The calculated gradient. - */ - template - void GradientDelta(const arma::Mat& input, - const arma::Mat& d, - arma::Mat& g) - { - g = d * input.t(); - } - - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class LinearLayer - -/** - * Linear Mapping layer to map between 3rd order tensors and dense matrices. - */ -template < - typename InputDataType = arma::cube, - typename OutputDataType = arma::mat -> -using LinearMappingLayer = LinearLayer; - -//! Layer traits for the linear layer. -template< - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp deleted file mode 100644 index 2b417e32b61..00000000000 --- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp +++ /dev/null @@ -1,131 +0,0 @@ -/** - * @file log_softmax_layer.hpp - * @author Marcus Edel - * - * Definition of the LogSoftmaxLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the log softmax layer. The log softmax loss layer computes - * the multinomial logistic loss of the softmax of its inputs. This layer is - * meant to be used in combination with the negative log likelihood layer - * (NegativeLogLikelihoodLayer), which expects that the input contains - * log-probabilities for each class. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LogSoftmaxLayer -{ - public: - /** - * Create the LogSoftmaxLayer object. - */ - LogSoftmaxLayer() { /* Nothing to do here. */ } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& input, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - InputDataType& Delta() const { return delta; } - //! Modify the delta. - InputDataType& Delta() { return delta; } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class LogSoftmaxLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp deleted file mode 100644 index 6ccd2fc65fd..00000000000 --- a/src/mlpack/methods/ann/layer/lstm_layer.hpp +++ /dev/null @@ -1,418 +0,0 @@ -/** - * @file lstm_layer.hpp - * @author Marcus Edel - * - * Definition of the LSTMLayer class, which implements a lstm network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a lstm network layer. - * - * This class allows specification of the type of the activation functions used - * for the gates and cells and also of the type of the function used to - * initialize and update the peephole weights. - * - * @tparam GateActivationFunction Activation function used for the gates. - * @tparam StateActivationFunction Activation function used for the state. - * @tparam OutputActivationFunction Activation function used for the output. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - class GateActivationFunction = LogisticFunction, - class StateActivationFunction = TanhFunction, - class OutputActivationFunction = TanhFunction, - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LSTMLayer -{ - public: - /** - * Create the LSTMLayer object using the specified parameters. - * - * @param outSize The number of output units. - * @param peepholes The flag used to indicate if peephole connections should - * be used (Default: false). - * @param WeightInitRule The weight initialization rule used to initialize the - * weight matrix. - */ - LSTMLayer(const size_t outSize, const bool peepholes = false) : - outSize(outSize), - peepholes(peepholes), - seqLen(1), - offset(0) - { - if (peepholes) - { - peepholeWeights.set_size(outSize, 3); - peepholeDerivatives = arma::zeros(outSize, 3); - } - else - { - peepholeWeights.set_size(0, 0); - } - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (inGate.n_cols < seqLen) - { - inGate = arma::zeros(outSize, seqLen); - inGateAct = arma::zeros(outSize, seqLen); - inGateError = arma::zeros(outSize, seqLen); - outGate = arma::zeros(outSize, seqLen); - outGateAct = arma::zeros(outSize, seqLen); - outGateError = arma::zeros(outSize, seqLen); - forgetGate = arma::zeros(outSize, seqLen); - forgetGateAct = arma::zeros(outSize, seqLen); - forgetGateError = arma::zeros(outSize, seqLen); - state = arma::zeros(outSize, seqLen); - stateError = arma::zeros(outSize, seqLen); - cellAct = arma::zeros(outSize, seqLen); - } - - // Split up the inputactivation into the 3 parts (inGate, forgetGate, - // outGate). - inGate.col(offset) = input.submat(0, 0, outSize - 1, 0); - - forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0); - outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0); - - if (peepholes && offset > 0) - { - inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1); - forgetGate.col(offset) += peepholeWeights.col(1) % - state.col(offset - 1); - } - - arma::Col inGateActivation = inGateAct.unsafe_col(offset); - GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation); - - arma::Col forgetGateActivation = forgetGateAct.unsafe_col(offset); - GateActivationFunction::fn(forgetGate.unsafe_col(offset), - forgetGateActivation); - - arma::Col cellActivation = cellAct.unsafe_col(offset); - StateActivationFunction::fn(input.submat(outSize * 2, 0, - (outSize * 3) - 1, 0), cellActivation); - - state.col(offset) = inGateAct.col(offset) % cellActivation; - - if (offset > 0) - state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1); - - if (peepholes) - outGate.col(offset) += peepholeWeights.col(2) % state.col(offset); - - arma::Col outGateActivation = outGateAct.unsafe_col(offset); - GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation); - - OutputActivationFunction::fn(state.unsafe_col(offset), output); - output = outGateAct.col(offset) % output; - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - queryOffset = seqLen - offset - 1; - - arma::Col outGateDerivative; - GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset), - outGateDerivative); - - arma::Col stateActivation; - StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation); - - outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation; - - arma::Col stateDerivative; - StateActivationFunction::deriv(stateActivation, stateDerivative); - - stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) % - stateDerivative; - - if (queryOffset < (seqLen - 1)) - { - stateError.col(queryOffset) += stateError.col(queryOffset + 1) % - forgetGateAct.col(queryOffset + 1); - - if (peepholes) - { - stateError.col(queryOffset) += inGateError.col(queryOffset + 1) % - peepholeWeights.col(0); - stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) % - peepholeWeights.col(1); - } - } - - if (peepholes) - { - stateError.col(queryOffset) += outGateError.col(queryOffset) % - peepholeWeights.col(2); - } - - arma::Col cellDerivative; - StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative); - - arma::Col cellError = inGateAct.col(queryOffset) % cellDerivative % - stateError.col(queryOffset); - - if (queryOffset > 0) - { - arma::Col forgetGateDerivative; - GateActivationFunction::deriv(forgetGateAct.col(queryOffset), - forgetGateDerivative); - - forgetGateError.col(queryOffset) = forgetGateDerivative % - stateError.col(queryOffset) % state.col(queryOffset - 1); - } - - arma::Col inGateDerivative; - GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative); - - inGateError.col(queryOffset) = inGateDerivative % - stateError.col(queryOffset) % cellAct.col(queryOffset); - - if (peepholes) - { - peepholeDerivatives.col(2) += outGateError.col(queryOffset) % - state.col(queryOffset); - - if (queryOffset > 0) - { - peepholeDerivatives.col(0) += inGateError.col(queryOffset) % - state.col(queryOffset - 1); - peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) % - state.col(queryOffset - 1); - } - } - - g = arma::zeros >(outSize * 4, 1); - g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset); - g.submat(outSize, 0, (outSize * 2) - 1, 0) = - forgetGateError.col(queryOffset); - g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError; - g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) = - outGateError.col(queryOffset); - - offset = (offset + 1) % seqLen; - } - - /** - * Ordinary feed backward pass of the lstm layer. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& /* gy */, - GradientDataType& /* g */) - { - if (peepholes && offset == 0) - { - peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() * - (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) * - inGate.col(queryOffset).t()); - - peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() * - (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) * - forgetGate.col(queryOffset).t()); - - peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() * - (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) * - outGate.col(queryOffset).t()); - - peepholeDerivatives.zeros(); - } - } - - //! Get the peephole weights. - OutputDataType const& Weights() const { return peepholeWeights; } - //! Modify the peephole weights. - OutputDataType& Weights() { return peepholeWeights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the peephole gradient. - OutputDataType const& Gradient() const { return peepholeGradient; } - //! Modify the peephole gradient. - OutputDataType& Gradient() { return peepholeGradient; } - - //! Get the sequence length. - size_t SeqLen() const { return seqLen; } - //! Modify the sequence length. - size_t& SeqLen() { return seqLen; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(peepholes, "peepholes"); - - if (peepholes) - { - ar & data::CreateNVP(peepholeWeights, "peepholeWeights"); - - if (Archive::is_loading::value) - { - peepholeDerivatives = arma::zeros( - peepholeWeights.n_rows, 3); - } - } - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored peephole indication flag. - bool peepholes; - - //! Locally-stored length of the the input sequence. - size_t seqLen; - - //! Locally-stored sequence offset. - size_t offset; - - //! Locally-stored query offset. - size_t queryOffset; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored ingate object. - InputDataType inGate; - - //! Locally-stored ingate activation object. - InputDataType inGateAct; - - //! Locally-stored ingate error object. - InputDataType inGateError; - - //! Locally-stored outgate object. - InputDataType outGate; - - //! Locally-stored outgate activation object. - InputDataType outGateAct; - - //! Locally-stored outgate error object. - InputDataType outGateError; - - //! Locally-stored forget object. - InputDataType forgetGate; - - //! Locally-stored forget activation object. - InputDataType forgetGateAct; - - //! Locally-stored forget error object. - InputDataType forgetGateError; - - //! Locally-stored state object. - InputDataType state; - - //! Locally-stored state erro object. - InputDataType stateError; - - //! Locally-stored cell activation object. - InputDataType cellAct; - - //! Locally-stored peephole weight object. - OutputDataType peepholeWeights; - - //! Locally-stored derivatives object. - OutputDataType peepholeDerivatives; - - //! Locally-stored peephole gradient object. - OutputDataType peepholeGradient; -}; // class LSTMLayer - -//! Layer traits for the lstm layer. -template< - class GateActivationFunction, - class StateActivationFunction, - class OutputActivationFunction, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = true; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp deleted file mode 100644 index 7705b52205a..00000000000 --- a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/** - * @file multiclass_classification_layer.hpp - * @author Marcus Edel - * - * Definition of the MulticlassClassificationLayer class, which implements a - * multiclass classification layer that can be used as output layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a multiclass classification layer that can be used as - * output layer. - * - * A convenience typedef is given: - * - * - ClassificationLayer - */ -class MulticlassClassificationLayer -{ - public: - /** - * Create the MulticlassClassificationLayer object. - */ - MulticlassClassificationLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - } - - /** - * Serialize the layer - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - } -}; // class MulticlassClassificationLayer - -//! Layer traits for the multiclass classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -/*** - * Alias ClassificationLayer. - */ -using ClassificationLayer = MulticlassClassificationLayer; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp deleted file mode 100644 index afa0f42e63c..00000000000 --- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file multiply_constant_layer.hpp - * @author Marcus Edel - * - * Definition of the MultiplyConstantLayer class, which multiplies the input by - * a (non-learnable) constant. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the multiply constant layer. The multiply constant layer - * multiplies the input by a (non-learnable) constant. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class MultiplyConstantLayer -{ - public: - /** - * Create the BaseLayer object. - */ - MultiplyConstantLayer(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network. Multiply the input with the - * specified constant scalar value. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType& input, OutputType& output) - { - output = input * scalar; - } - - /** - * Ordinary feed backward pass of a neural network. The backward pass - * multiplies the error with the specified constant scalar value. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* input */, const DataType& gy, DataType& g) - { - g = gy * scalar; - } - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } - - private: - //! Locally-stored constant scalar value. - const double scalar; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class MultiplyConstantLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp deleted file mode 100644 index 63200b2c335..00000000000 --- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/** - * @file one_hot_layer.hpp - * @author Shangtong Zhang - * - * Definition of the OneHotLayer class, which implements a standard network - * layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a one hot classification layer that can be used as - * output layer. - */ -class OneHotLayer -{ - public: - /** - * Create the OneHotLayer object. - */ - OneHotLayer() - { - // Nothing to do here. - } - - /* - * Calculate the error using the specified input activation and the target. - * The error is stored into the given error parameter. - * - * @param inputActivations Input data used for evaluating the network. - * @param target Target data used for evaluating the network. - * @param error The calculated error with respect to the input activation and - * the given target. - */ - template - void CalculateError(const DataType& inputActivations, - const DataType& target, - DataType& error) - { - error = inputActivations - target; - } - - /* - * Calculate the output class using the specified input activation. - * - * @param inputActivations Input data used to calculate the output class. - * @param output Output class of the input activation. - */ - template - void OutputClass(const DataType& inputActivations, DataType& output) - { - output = inputActivations; - output.zeros(); - - arma::uword maxIndex = 0; - inputActivations.max(maxIndex); - output(maxIndex) = 1; - } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } -}; // class OneHotLayer - -//! Layer traits for the one-hot class classification layer. -template <> -class LayerTraits -{ - public: - static const bool IsBinary = true; - static const bool IsOutputLayer = true; - static const bool IsBiasLayer = false; - static const bool IsConnection = false; -}; - -} // namespace ann -} // namespace mlpack - - -#endif diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp deleted file mode 100644 index e8a205f44f8..00000000000 --- a/src/mlpack/methods/ann/layer/pooling_layer.hpp +++ /dev/null @@ -1,267 +0,0 @@ -/** - * @file pooling_layer.hpp - * @author Marcus Edel - * @author Nilay Jain - * - * Definition of the PoolingLayer class, which attaches various pooling - * functions to the embedding layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP - -#include -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the pooling layer. The pooling layer works as a metaclass - * which attaches various functions to the embedding layer. - * - * @tparam PoolingRule Pooling function used for the embedding layer. - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename PoolingRule = MeanPooling, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -class PoolingLayer -{ - public: - /** - * Create the PoolingLayer object using the specified number of units. - * - * @param kSize Size of the pooling window. - * @param stride The stride of the convolution operation. - * @param pooling The pooling strategy. - */ - PoolingLayer(const size_t kSize, - const size_t stride = 1, - PoolingRule pooling = PoolingRule()) : - kSize(kSize), - stride(stride), - pooling(pooling) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - Pooling(input, output); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Cube& input, arma::Cube& output) - { - output = arma::zeros >((input.n_rows - kSize) / stride + 1, - (input.n_cols - kSize) / stride + 1, input.n_slices); - - for (size_t s = 0; s < input.n_slices; s++) - Pooling(input.slice(s), output.slice(s)); - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Cube& gy, - arma::Cube& g) - { - g = arma::zeros >(inputParameter.n_rows, - inputParameter.n_cols, inputParameter.n_slices); - - for (size_t s = 0; s < gy.n_slices; s++) - { - Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s)); - } - } - - /** - * Ordinary feed backward pass of a neural network, using 3rd-order tensors as - * input, calculating the function f(x) by propagating x backwards through f. - * Using the results from the feed forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& /* unused */, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube from the error matrix. - arma::Cube mappedError = arma::zeros(outputParameter.n_rows, - outputParameter.n_cols, outputParameter.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * outputParameter.n_rows * outputParameter.n_cols, - (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - outputParameter.n_rows, outputParameter.n_cols); - } - } - - Backward(inputParameter, mappedError, g); - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - InputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - InputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kSize, "kSize"); - ar & data::CreateNVP(pooling, "pooling"); - ar & data::CreateNVP(stride, "stride"); - } - - private: - /** - * Apply pooling to the input and store the results. - * - * @param input The input to be apply the pooling rule. - * @param output The pooled result. - */ - template - void Pooling(const arma::Mat& input, arma::Mat& output) - { - const size_t rStep = kSize; - const size_t cStep = kSize; - - for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride) - { - for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride) - { - output(i, j) += pooling.Pooling(input( - arma::span(rowidx, rowidx + rStep - 1), - arma::span(colidx, colidx + cStep - 1))); - } - } - } - - /** - * Apply unpooling to the input and store the results. - * - * @param input The input to be apply the unpooling rule. - * @param output The pooled result. - */ - template - void Unpooling(const arma::Mat& input, - const arma::Mat& error, - arma::Mat& output) - { - const size_t rStep = input.n_rows / error.n_rows; - const size_t cStep = input.n_cols / error.n_cols; - - arma::Mat unpooledError; - for (size_t j = 0; j < input.n_cols; j += cStep) - { - for (size_t i = 0; i < input.n_rows; i += rStep) - { - const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)); - - pooling.Unpooling(inputArea, error(i / rStep, j / cStep), - unpooledError); - - output(arma::span(i, i + rStep - 1), - arma::span(j, j + cStep - 1)) += unpooledError; - } - } - } - - //! Locally-stored size of the pooling window. - size_t kSize; - - //! Locally-stored stride value by which we move filter. - size_t stride; - - //! Locally-stored pooling strategy. - PoolingRule pooling; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class PoolingLayer - -//! Layer traits for the pooling layer. -template< - typename PoolingRule, - typename InputDataType, - typename OutputDataType -> -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - - -} // namespace ann -} // namespace mlpack - -#endif - diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp deleted file mode 100644 index 5e231a7480b..00000000000 --- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp +++ /dev/null @@ -1,192 +0,0 @@ -/** - * @file recurrent_layer.hpp - * @author Marcus Edel - * - * Definition of the RecurrentLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the RecurrentLayer class. Recurrent layers can be used - * similarly to feed-forward layers except that the input isn't stored in the - * inputParameter, instead it's in stored in the recurrentParameter. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class RecurrentLayer -{ - public: - /** - * Create the RecurrentLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - RecurrentLayer(const size_t inSize, const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(inSize, 1)) - { - weights.set_size(outSize, inSize); - } - - /** - * Create the RecurrentLayer object using the specified number of units. - * - * @param outSize The number of output units. - */ - RecurrentLayer(const size_t outSize) : - inSize(outSize), - outSize(outSize), - recurrentParameter(arma::zeros(outSize, 1)) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + weights * recurrentParameter; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::mat& g) - { - g = (weights).t() * gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input activation. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - GradientDataType& g) - { - g = d * recurrentParameter.t(); - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the input parameter. - InputDataType const& RecurrentParameter() const { return recurrentParameter; } - //! Modify the input parameter. - InputDataType& RecurrentParameter() { return recurrentParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(recurrentParameter, "recurrentParameter"); - ar & data::CreateNVP(weights, "weights"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Locally-stored recurrent parameter object. - InputDataType recurrentParameter; -}; // class RecurrentLayer - -//! Layer traits for the recurrent layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp deleted file mode 100644 index 655e443b1e5..00000000000 --- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/** - * @file reinforce_normal_layer.hpp - * @author Marcus Edel - * - * Definition of the ReinforceNormalLayer class, which implements the REINFORCE - * algorithm for the normal distribution. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the reinforce normal layer. The reinforce normal layer - * implements the REINFORCE algorithm for the normal distribution. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class ReinforceNormalLayer -{ - public: - /** - * Create the ReinforceNormalLayer object. - * - * @param stdev Standard deviation used during the forward and backward pass. - */ - ReinforceNormalLayer(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - } - else - { - // Use maximum a posteriori. - output = input; - } - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& /* gy */, - DataType& g) - { - g = (input - inputParameter) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= -reward; - } - - - //! Get the input parameter. - InputDataType& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - //! Get the value of the reward parameter. - double Reward() const { return reward; } - //! Modify the value of the deterministic parameter. - double& Reward() { return reward; } - - private: - //! Standard deviation used during the forward and backward pass. - const double stdev; - - //! Locally-stored reward parameter. - double reward; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! If true use maximum a posteriori during the forward pass. - bool deterministic; -}; // class ReinforceNormalLayer - -}; // namespace ann -}; // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/softmax_layer.hpp deleted file mode 100644 index a2d3323eed8..00000000000 --- a/src/mlpack/methods/ann/layer/softmax_layer.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/** - * @file softmax_layer.hpp - * @author Marcus Edel - * - * Definition of the SoftmaxLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the softmax layer. The softmax loss layer computes the - * multinomial logistic loss of the softmax of its inputs. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SoftmaxLayer -{ - public: - /** - * Create the SoftmaxLayer object. - */ - SoftmaxLayer() - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = arma::trunc_exp(input - - arma::repmat(arma::max(input), input.n_rows, 1)); - output /= arma::accu(output); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Mat& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - InputDataType const& Delta() const { return delta; } - //! Modify the delta. - InputDataType& Delta() { return delta; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& /* ar */, const unsigned int /* version */) - { - /* Nothing to do here */ - } - - private: - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SoftmaxLayer - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp deleted file mode 100644 index c3b723f17d9..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/** - * @file sparse_bias_layer.hpp - * @author Tham Ngap Wei - * - * Definition of the SparseBiasLayer class. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * An implementation of a bias layer design for sparse autoencoder. - * The BiasLayer class represents a single layer of a neural network. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseBiasLayer -{ - public: - /** - * Create the SparseBiasLayer object using the specified number of units and - * bias parameter. - * - * @param outSize The number of output units. - * @param batchSize The batch size used to train the network. - * @param bias The bias value. - */ - SparseBiasLayer(const size_t outSize, const size_t batchSize) : - outSize(outSize), - batchSize(batchSize) - { - weights.set_size(outSize, 1); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = input + arma::repmat(weights, 1, input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& /* unused */, - const ErrorType& gy, - ErrorType& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the bias. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& /* input */, - const arma::Mat& d, - InputDataType& g) - { - g = arma::sum(d, 1) / static_cast( - batchSize); - } - - //! Get the batch size - size_t BatchSize() const { return batchSize; } - //! Modify the batch size - size_t& BatchSize() { return batchSize; } - - //! Get the weights. - InputDataType const& Weights() const { return weights; } - //! Modify the weights. - InputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - InputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - InputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(batchSize, "batchSize"); - } - - private: - //! Locally-stored number of output units. - size_t outSize; - - //! The batch size used to train the network. - size_t batchSize; - - //! Locally-stored weight object. - InputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - InputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseBiasLayer - -//! Layer traits for the bias layer. -template -class LayerTraits > -{ - public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = true; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp deleted file mode 100644 index 6b1d9d118f9..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp +++ /dev/null @@ -1,180 +0,0 @@ -/** - * @file sparse_input_layer.hpp - * @author Tham Ngap Wei - * - * Definition of the sparse input class which serve as the first layer - * of the sparse autoencoder - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP - -#include -#include - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the SparseInputLayer. The SparseInputLayer class represents - * the first layer of sparse autoencoder - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat - > -class SparseInputLayer -{ - public: - /** - * Create the SparseInputLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - * @param lambda L2-regularization parameter. - */ - SparseInputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001) : - inSize(inSize), - outSize(outSize), - lambda(lambda) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& /* unused */, - const arma::Mat& gy, - arma::Mat& g) - { - g = gy; - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType& input, - const arma::Mat& d, - GradientDataType& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! L2-regularization parameter. - double lambda; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseInputLayer - -//! Layer traits for the SparseInputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp deleted file mode 100644 index 33a2a72f7f3..00000000000 --- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/** - * @file sparse_output_layer.hpp - * @author Tham Ngap Wei - * - * This is the fourth layer of sparse autoencoder. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP - -#include -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the SparseOutputLayer class. The SparseOutputLayer class - * represents the fourth layer of the sparse autoencoder. - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class SparseOutputLayer -{ - public: - /** - * Create the SparseLayer object using the specified number of units. - * - * @param inSize The number of input units. - * @param outSize The number of output units. - */ - SparseOutputLayer(const size_t inSize, - const size_t outSize, - const double lambda = 0.0001, - const double beta = 3, - const double rho = 0.01) : - inSize(inSize), - outSize(outSize), - lambda(lambda), - beta(beta), - rho(rho) - { - weights.set_size(outSize, inSize); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const arma::Mat& input, arma::Mat& output) - { - output = weights * input; - // Average activations of the hidden layer. - rhoCap = arma::sum(input, 1) / static_cast(input.n_cols); - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards trough f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const InputType& input, - const arma::Mat& gy, - arma::Mat& g) - { - const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) / - (1 - rhoCap)); - - // NOTE: if the armadillo version high enough, find_nonfinite can prevents - // overflow value: - // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros(); - g = weights.t() * gy + - arma::repmat(klDivGrad, 1, input.n_cols); - } - - /* - * Calculate the gradient using the output delta and the input activation. - * - * @param input The propagated input. - * @param d The calculated error. - * @param g The calculated gradient. - */ - template - void Gradient(const InputType input, const arma::Mat& d, arma::Mat& g) - { - g = d * input.t() / static_cast( - input.n_cols) + lambda * weights; - } - - //! Sets the KL divergence parameter. - void Beta(const double b) - { - beta = b; - } - - //! Gets the KL divergence parameter. - double Beta() const - { - return beta; - } - - //! Sets the sparsity parameter. - void Rho(const double r) - { - rho = r; - } - - //! Gets the sparsity parameter. - double Rho() const - { - return rho; - } - - //! Get the weights. - OutputDataType const& Weights() const { return weights; } - //! Modify the weights. - OutputDataType& Weights() { return weights; } - - //! Get the RhoCap. - OutputDataType const& RhoCap() const { return rhoCap; } - //! Modify the RhoCap. - OutputDataType& RhoCap() { return rhoCap; } - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the gradient. - OutputDataType const& Gradient() const { return gradient; } - //! Modify the gradient. - OutputDataType& Gradient() { return gradient; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(lambda, "lambda"); - ar & data::CreateNVP(beta, "beta"); - ar & data::CreateNVP(rho, "rho"); - } - - private: - //! Locally-stored number of input units. - size_t inSize; - - //! Locally-stored number of output units. - size_t outSize; - - //! L2-regularization parameter. - double lambda; - - //! KL divergence parameter. - double beta; - - //! Sparsity parameter. - double rho; - - //! Locally-stored weight object. - OutputDataType weights; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored gradient object. - OutputDataType gradient; - - //! Average activations of the hidden layer. - OutputDataType rhoCap; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; -}; // class SparseOutputLayer - -//! Layer traits for the SparseOutputLayer. -template -class LayerTraits > -{ -public: - static const bool IsBinary = false; - static const bool IsOutputLayer = false; - static const bool IsBiasLayer = false; - static const bool IsLSTMLayer = false; - static const bool IsConnection = true; -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp deleted file mode 100644 index 5b4da8ed0b7..00000000000 --- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp +++ /dev/null @@ -1,171 +0,0 @@ -/** - * @file vr_class_reward_layer.hpp - * @author Marcus Edel - * - * Definition of the VRClassRewardLayer class, which implements the variance - * reduced classification reinforcement layer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * Implementation of the variance reduced classification reinforcement layer. - * This layer is meant to be used in combination with the reinforce normal layer - * (ReinforceNormalLayer), which expects that an reward: - * (1 for success, 0 otherwise). - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::field, - typename OutputDataType = arma::field -> -class VRClassRewardLayer -{ - public: - /** - * Create the VRClassRewardLayer object. - * - * @param scale Parameter used to scale the reward. - * @param sizeAverage Take the average over all batches. - */ - VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. - */ - template - double Forward(const arma::field >& input, - const arma::Mat& target) - { - return Forward(input(0, 0), target); - } - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. - */ - template - double Forward(const arma::Mat& input, const arma::Mat& target) - { - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return -reward / input.n_cols; - } - - return -reward; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - double Backward(const arma::field >& input, - const arma::Mat& /* gy */, - arma::field >& g) - { - g = arma::field >(2, 1); - g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols); - - double vrReward = reward - arma::as_scalar(input(1, 0)); - if (sizeAverage) - { - vrReward /= input(0, 0).n_cols; - } - - const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0; - - g(1, 0) = norm * (input(1, 0) - reward); - - return vrReward; - } - - //! Get the input parameter. - InputDataType& InputParameter() const {return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType& OutputParameter() const {return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType& Delta() const {return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the value of the deterministic parameter. - bool Deterministic() const { return deterministic; } - //! Modify the value of the deterministic parameter. - bool& Deterministic() { return deterministic; } - - private: - //! Locally-stored value to scale the reward. - const double scale; - - //! If true take the average over all batches. - const bool sizeAverage; - - //! Locally stored reward parameter. - double reward; - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! If true dropout and scaling is disabled, see notes above. - bool deterministic; -}; // class VRClassRewardLayer - -}; // namespace ann -}; // namespace mlpack - -#endif From acf9d9ed8bffb748c4a39a7e84f1a290903a6a25 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 9 Nov 2016 01:24:04 +0100 Subject: [PATCH 41/82] Increase the number of template arguments for the boost list class. --- src/mlpack/prereqs.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index eb172ab15b5..69d396ba443 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -62,6 +62,12 @@ using enable_if_t = typename enable_if::type; #endif #endif +// Increase the number of template arguments for the boost list class. +#undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#undef BOOST_MPL_LIMIT_LIST_SIZE +#define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS +#define BOOST_MPL_LIMIT_LIST_SIZE 30 + // We'll need the necessary boost::serialization features, as well as what we // use with mlpack. In Boost 1.59 and newer, the BOOST_PFTO code is no longer // defined, but we still need to define it (as nothing) so that the mlpack @@ -95,4 +101,4 @@ using enable_if_t = typename enable_if::type; #define ARMA_USE_CXX11 #endif -#endif +#endif \ No newline at end of file From 46e6bc7938572b805a0a1dd5cb610d67fcc2e1b1 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 9 Nov 2016 22:38:06 +0100 Subject: [PATCH 42/82] Move pooling rules into the pooling class. So that we can use the MaxPooling and MeanPooling class names for the actual module name. --- .../methods/ann/pooling_rules/CMakeLists.txt | 15 ----- .../methods/ann/pooling_rules/max_pooling.hpp | 56 ------------------- .../ann/pooling_rules/mean_pooling.hpp | 56 ------------------- 3 files changed, 127 deletions(-) delete mode 100644 src/mlpack/methods/ann/pooling_rules/CMakeLists.txt delete mode 100644 src/mlpack/methods/ann/pooling_rules/max_pooling.hpp delete mode 100644 src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt deleted file mode 100644 index 99b6b803bb1..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Define the files we need to compile -# Anything not in this list will not be compiled into mlpack. -set(SOURCES - max_pooling.hpp - mean_pooling.hpp -) - -# Add directory name to sources. -set(DIR_SRCS) -foreach(file ${SOURCES}) - set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) -endforeach() -# Append sources (with directory name) to list of all mlpack sources (used at -# the parent scope). -set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp deleted file mode 100644 index f50b0419fe3..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file max_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MaxPooling class, which implements max pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The max pooling rule for convolution neural networks. Take the maximum value - * within the receptive block. - */ -class MaxPooling -{ - public: - /* - * Return the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return input.max(); - } - - /* - * Set the maximum value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp deleted file mode 100644 index 7ab88c329f4..00000000000 --- a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file mean_pooling.hpp - * @author Shangtong Zhang - * - * Definition of the MeanPooling class, which implements mean pooling. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP -#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/* - * The mean pooling rule for convolution neural networks. Average all values - * within the receptive block. - */ -class MeanPooling -{ - public: - /* - * Return the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - */ - template - double Pooling(const MatType& input) - { - return arma::mean(arma::mean(input)); - } - - /* - * Set the average value within the receptive block. - * - * @param input Input used to perform the pooling operation. - * @param value The unpooled value. - * @param output The unpooled output data. - */ - template - void Unpooling(const MatType& input, const double value, MatType& output) - { - output = MatType(input.n_rows, input.n_cols); - output.fill(value / input.n_elem); - } -}; - -} // namespace ann -} // namespace mlpack - -#endif From 27b46ab83f98e0b5e7c7b72bd7f1d70341708931 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 26 Nov 2016 20:48:26 +0100 Subject: [PATCH 43/82] Use the stride parameter inside the convolution function. --- .../ann/convolution_rules/fft_convolution.hpp | 30 +++++++++-- .../convolution_rules/naive_convolution.hpp | 54 +++++++++++++------ .../ann/convolution_rules/svd_convolution.hpp | 24 +++++++-- 3 files changed, 82 insertions(+), 26 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index bbcfecdaebb..af1b6f5c9e9 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -47,13 +47,17 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -82,13 +86,17 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -130,11 +138,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -162,11 +174,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -191,11 +207,15 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index fc7fc6926fb..6fe5bf6e672 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -41,16 +41,20 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { - output = arma::zeros >(input.n_rows - filter.n_rows + 1, - input.n_cols - filter.n_cols + 1); + output = arma::zeros >((input.n_rows - filter.n_rows + 1) / + dW, (input.n_cols - filter.n_cols + 1) / dH); // It seems to be about 3.5 times faster to use pointers instead of // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j). @@ -63,7 +67,7 @@ class NaiveConvolution const eT* kernelPtr = filter.memptr(); for (size_t kj = 0; kj < filter.n_cols; ++kj) { - const eT* inputPtr = input.colptr(kj + j) + i; + const eT* inputPtr = input.colptr(kj + j * dW) + i * dH; for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr) *outputPtr += *kernelPtr * (*inputPtr); } @@ -77,13 +81,17 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1); const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1); @@ -92,11 +100,11 @@ class NaiveConvolution arma::Mat inputPadded = arma::zeros >(outputRows, outputCols); inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1, - filter.n_rows - 1 + input.n_rows - 1, - filter.n_cols - 1 + input.n_cols - 1) = input; + filter.n_rows - 1 + input.n_rows - 1, + filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, - output); + output, dW, dH); } /* @@ -105,15 +113,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -122,7 +134,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -133,15 +145,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input, filter.slice(0), - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, filter.n_slices); @@ -150,7 +166,7 @@ class NaiveConvolution for (size_t i = 1; i < filter.n_slices; i++) { NaiveConvolution::Convolution(input, filter.slice(i), - output.slice(i)); + output.slice(i), dW, dH); } } @@ -161,15 +177,19 @@ class NaiveConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; NaiveConvolution::Convolution(input.slice(0), filter, - convOutput); + convOutput, dW, dH); output = arma::Cube(convOutput.n_rows, convOutput.n_cols, input.n_slices); @@ -178,7 +198,7 @@ class NaiveConvolution for (size_t i = 1; i < input.n_slices; i++) { NaiveConvolution::Convolution(input.slice(i), filter, - output.slice(i)); + output.slice(i), dW, dH); } } diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index a0b317ebb0c..e61b735bf8d 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -49,11 +49,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -113,11 +117,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -142,11 +150,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -170,11 +182,15 @@ class SVDConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From 9a6c2342416778ec44883ab024943884b1a78260 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 3 Dec 2016 22:56:33 +0100 Subject: [PATCH 44/82] Increase the number of template arguments for the boost list class. --- .../methods/ann/layer/{leaky_relu_layer.hpp => leaky_relu.hpp} | 0 ...ative_log_likelihood_layer.hpp => negative_log_likelihood.hpp} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/mlpack/methods/ann/layer/{leaky_relu_layer.hpp => leaky_relu.hpp} (100%) rename src/mlpack/methods/ann/layer/{negative_log_likelihood_layer.hpp => negative_log_likelihood.hpp} (100%) diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/leaky_relu_layer.hpp rename to src/mlpack/methods/ann/layer/leaky_relu.hpp diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp similarity index 100% rename from src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp rename to src/mlpack/methods/ann/layer/negative_log_likelihood.hpp From 86fac9dabe05825c8dd4a80d26b86bbcb11b33f2 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:03:20 +0100 Subject: [PATCH 45/82] Remove stride paramater from svd and fft convolution rule. --- .../ann/convolution_rules/fft_convolution.hpp | 20 +++++-------------- .../convolution_rules/naive_convolution.hpp | 6 +++--- .../ann/convolution_rules/svd_convolution.hpp | 16 ++++----------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index af1b6f5c9e9..225626e34b8 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -55,9 +55,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -94,9 +92,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -144,9 +140,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -180,9 +174,7 @@ class FFTConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -213,9 +205,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index 6fe5bf6e672..c90574293ba 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -93,8 +93,8 @@ class NaiveConvolution const size_t dW = 1, const size_t dH = 1) { - const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1); - const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1); + const size_t outputRows = (input.n_rows + 2 * (filter.n_rows - 1)) * dW; + const size_t outputCols = (input.n_cols + 2 * (filter.n_cols - 1)) * dH; // Pad filter and input to the working output shape. arma::Mat inputPadded = arma::zeros >(outputRows, @@ -104,7 +104,7 @@ class NaiveConvolution filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, - output, dW, dH); + output, 1, 1); } /* diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index e61b735bf8d..5206ec1996f 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -55,9 +55,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -123,9 +121,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -156,9 +152,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -188,9 +182,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From 7ede24fe50d9fd9f7f8487161719c323b0e1ab67 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:27:30 +0100 Subject: [PATCH 46/82] Refactor ann layer. --- src/mlpack/methods/ann/layer/add.hpp | 149 +++++ src/mlpack/methods/ann/layer/add_merge.hpp | 153 +++++ src/mlpack/methods/ann/layer/base_layer.hpp | 63 +-- src/mlpack/methods/ann/layer/concat.hpp | 285 ++++++++++ .../methods/ann/layer/concat_performance.hpp | 150 +++++ src/mlpack/methods/ann/layer/constant.hpp | 131 +++++ src/mlpack/methods/ann/layer/convolution.hpp | 524 ++++++++++++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 263 +++++++++ src/mlpack/methods/ann/layer/dropout.hpp | 194 +++++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 86 +-- src/mlpack/methods/ann/layer/join.hpp | 119 ++++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 78 +-- src/mlpack/methods/ann/layer/linear.hpp | 180 ++++++ .../methods/ann/layer/linear_no_bias.hpp | 174 ++++++ src/mlpack/methods/ann/layer/log_softmax.hpp | 131 +++++ src/mlpack/methods/ann/layer/lookup.hpp | 161 ++++++ src/mlpack/methods/ann/layer/lstm.hpp | 516 +++++++++++++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 375 +++++++++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 322 +++++++++++ .../methods/ann/layer/mean_squared_error.hpp | 98 ++++ .../methods/ann/layer/multiply_constant.hpp | 108 ++++ .../ann/layer/negative_log_likelihood.hpp | 46 +- src/mlpack/methods/ann/layer/recurrent.hpp | 356 ++++++++++++ src/mlpack/methods/ann/layer/select.hpp | 127 +++++ src/mlpack/methods/ann/layer/sequential.hpp | 292 ++++++++++ 25 files changed, 4875 insertions(+), 206 deletions(-) create mode 100644 src/mlpack/methods/ann/layer/add.hpp create mode 100644 src/mlpack/methods/ann/layer/add_merge.hpp create mode 100644 src/mlpack/methods/ann/layer/concat.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_performance.hpp create mode 100644 src/mlpack/methods/ann/layer/constant.hpp create mode 100644 src/mlpack/methods/ann/layer/convolution.hpp create mode 100644 src/mlpack/methods/ann/layer/dropconnect.hpp create mode 100644 src/mlpack/methods/ann/layer/dropout.hpp create mode 100644 src/mlpack/methods/ann/layer/join.hpp create mode 100644 src/mlpack/methods/ann/layer/linear.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_no_bias.hpp create mode 100644 src/mlpack/methods/ann/layer/log_softmax.hpp create mode 100644 src/mlpack/methods/ann/layer/lookup.hpp create mode 100644 src/mlpack/methods/ann/layer/lstm.hpp create mode 100644 src/mlpack/methods/ann/layer/max_pooling.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_pooling.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_squared_error.hpp create mode 100644 src/mlpack/methods/ann/layer/multiply_constant.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent.hpp create mode 100644 src/mlpack/methods/ann/layer/select.hpp create mode 100644 src/mlpack/methods/ann/layer/sequential.hpp diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp new file mode 100644 index 00000000000..be8fc60e31b --- /dev/null +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -0,0 +1,149 @@ +/** + * @file add.hpp + * @author Marcus Edel + * + * Definition of the Add class that applies a bias term to the incoming data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_HPP + +#include +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Add module class. The Add module applies a bias term + * to the incoming data. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Add +{ + public: + /** + * Create the Add object using the specified number of output units. + * + * @param outSize The number of output units. + */ + Add(const size_t outSize) : outSize(outSize) + { + weights.set_size(outSize, 1); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = input + weights; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The propagated input. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + } + + private: + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Add + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp new file mode 100644 index 00000000000..7a01792d250 --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -0,0 +1,153 @@ +/** + * @file add_merge.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_HPP + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the AddMerge module class. The AddMerge class accumulates + * the output of various modules. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class AddMerge +{ + public: + //! Create the AddMerge object. + AddMerge() + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& /* input */, OutputType&& output) + { + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(network, "network"); + } + + private: + std::vector network; + + //! Locally-stored delete visitor module object. + DeleteVisitor deleteVisitor; + + //! Locally-stored output parameter visitor module object. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor module object. + DeltaVisitor deltaVisitor; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class AddMerge + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp index 2b915a116d5..68afe1cefeb 100644 --- a/src/mlpack/methods/ann/layer/base_layer.hpp +++ b/src/mlpack/methods/ann/layer/base_layer.hpp @@ -32,7 +32,6 @@ namespace ann /** Artificial Neural Network. */ { * - IdentityLayer * - ReLULayer * - TanHLayer - * - BaseLayer2D * * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, @@ -64,33 +63,14 @@ class BaseLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { ActivationFunction::fn(input, output); } /** * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - ActivationFunction::deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed + * f(x) by propagating x backwards trough f. Using the results from the feed * forward pass. * * @param input The propagated input activation. @@ -98,30 +78,13 @@ class BaseLayer * @param g The calculated gradient. */ template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; + arma::Mat derivative; ActivationFunction::deriv(input, derivative); - g = mappedError % derivative; + g = gy % derivative; } //! Get the input parameter. @@ -205,18 +168,6 @@ template < using TanHLayer = BaseLayer< ActivationFunction, InputDataType, OutputDataType>; -/** - * Standard Base-Layer2D using the logistic activation function. - */ -template < - class ActivationFunction = LogisticFunction, - typename InputDataType = arma::cube, - typename OutputDataType = arma::cube -> -using BaseLayer2D = BaseLayer< - ActivationFunction, InputDataType, OutputDataType>; - - } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp new file mode 100644 index 00000000000..bd836c7e26b --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -0,0 +1,285 @@ +/** + * @file concat.hpp + * @author Marcus Edel + * + * Definition of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Concat class. The Concat class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Concat +{ + public: + /** + * Create the Concat object using the specified parameters. + * + * @param model Expose all network modules. + * @param same Merge the error in the backward pass. + */ + Concat(const bool model = true, const bool same = true) : + model(model), + same(same) + { + parameters.set_size(0, 0); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } + } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! If true merge the error in the backward pass. + bool same; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; +}; // class Concat + + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp new file mode 100644 index 00000000000..0f03cbc72e1 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -0,0 +1,150 @@ +/** + * @file concat_performance.hpp + * @author Marcus Edel + * + * Definition of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the concat performance class. The class works as a + * feed-forward fully connected network container which plugs performance layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename OutputLayerType = NegativeLogLikelihood<>, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class ConcatPerformance +{ + public: + /** + * Create the ConcatPerformance object. + * + * @param inSize The number of inputs. + * @param outputLayer Output layer used to evaluate the network. + */ + ConcatPerformance(const size_t inSize, + OutputLayerType&& outputLayer = OutputLayerType()) : + inSize(inSize), + outputLayer(std::move(outputLayer)) + { + /* Nothing to do here. */ + } + + /* + * Computes the Negative log likelihood. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, arma::Mat&& target) + { + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; + } + + /** + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. + * + * @param input The propagated input activation. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored number of inputs. + size_t inSize; + + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class ConcatPerformance + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp new file mode 100644 index 00000000000..58816acfbd3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -0,0 +1,131 @@ +/** + * @file constant.hpp + * @author Marcus Edel + * + * Definition of the Constant class, which outputs a constant value given + * any input. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the constant layer. The constant layer outputs a given + * constant value given any input value. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Constant +{ + public: + /** + * Create the Constant object that outputs a given constant scalar value + * given any input value. + * + * @param outSize The number of output units. + * @param scalar The constant value used to create the constant output. + */ + Constant(const size_t outSize, const double scalar) : + inSize(0), + outSize(outSize) + { + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); + } + + /** + * Ordinary feed forward pass of a neural network. The forward pass fills the + * output with the specified constant parameter. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; + } + + /** + * Ordinary feed backward pass of a neural network. The backward pass of the + * constant layer is returns always a zero output error matrix. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) + { + g = arma::zeros(inSize, 1); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(constantOutput, "constantOutput"); + } + + private: + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored constant output matrix. + OutputDataType constantOutput; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class ConstantLayer + +}; // namespace ann +}; // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp new file mode 100644 index 00000000000..be7fb7d6a5b --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -0,0 +1,524 @@ +/** + * @file convolution.hpp + * @author Marcus Edel + * + * Definition of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_HPP + +#include + +#include +#include +#include +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Convolution class. The Convolution class represents a + * single layer of a neural network. + * + * @tparam ForwardConvolutionRule Convolution to perform forward process. + * @tparam BackwardConvolutionRule Convolution to perform backward process. + * @tparam GradientConvolutionRule Convolution to calculate gradient. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename ForwardConvolutionRule = NaiveConvolution, + typename BackwardConvolutionRule = NaiveConvolution, + typename GradientConvolutionRule = NaiveConvolution, + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Convolution +{ +public: + //! Create the Convolution object. + Convolution() + { + /* Nothing to do here. */ + } + + /** + * Create the Convolution object using the specified number of input maps, + * output maps, filter size, stride and padding parameter. + * + * @param inSize The number of input maps. + * @param outSize The number of output maps. + * @param kW Width of the filter/kernel. + * @param kH Height of the filter/kernel. + * @param dW Stride of filter application in the x direction. + * @param dH Stride of filter application in the y direction. + * @param padW Padding width of the input. + * @param padH Padding height of the input. + * @param inputWidth The widht of the input data. + * @param inputHeight The height of the input data. + */ + Convolution(const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const size_t padW = 0, + const size_t padH = 0, + const size_t inputWidth = 0, + const size_t inputHeight = 0) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) + { + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); + } + + /* + * Set the weight and bias term. + */ + void Reset() + { + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false,false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + + + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) + { + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); + } + + private: + + /* + * Return the convolution output size. + * + * @param size The size of the input (row or column). + * @param k The size of the filter (width or height). + * @param s The stride size (x or y direction). + * @param p The size of the padding (width or height). + * @return The convolution output size. + */ + size_t ConvOutSize(const size_t size, + const size_t k, + const size_t s, + const size_t p) + { + return std::floor(size + p * 2 - k) / s + 1; + } + + /* + * Rotates a 3rd-order tensor counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Cube& input, arma::Cube& output) + { + output = arma::Cube(input.n_rows, input.n_cols, input.n_slices); + + // * left-right flip, up-down flip */ + for (size_t s = 0; s < output.n_slices; s++) + output.slice(s) = arma::fliplr(arma::flipud(input.slice(s))); + } + + /* + * Rotates a dense matrix counterclockwise by 180 degrees. + * + * @param input The input data to be rotated. + * @param output The rotated output. + */ + template + void Rotate180(const arma::Mat& input, arma::Mat& output) + { + // * left-right flip, up-down flip */ + output = arma::fliplr(arma::flipud(input)); + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Mat& input, + size_t wPad, + size_t hPad, + arma::Mat& output) + { + if (output.n_rows != input.n_rows + wPad * 2 || + output.n_cols != input.n_cols + hPad * 2) + { + output = arma::zeros(input.n_rows + wPad * 2, input.n_cols + hPad * 2); + } + + output.submat(wPad, hPad, wPad + input.n_rows - 1, + hPad + input.n_cols - 1) = input; + } + + /* + * Pad the given input data. + * + * @param input The input to be padded. + * @param wPad Padding width of the input. + * @param hPad Padding height of the input. + * @param output The padded output data. + */ + template + void Pad(const arma::Cube& input, + size_t wPad, + size_t hPad, + arma::Cube& output) + { + output = arma::zeros(input.n_rows + wPad * 2, + input.n_cols + hPad * 2, input.n_slices); + + for (size_t i = 0; i < input.n_slices; ++i) + { + Pad(input.slice(i), wPad, hPad, output.slice(i)); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored filter/kernel width. + size_t kW; + + //! Locally-stored filter/kernel height. + size_t kH; + + //! Locally-stored stride of the filter in x-direction. + size_t dW; + + //! Locally-stored stride of the filter in y-direction. + size_t dH; + + //! Locally-stored padding width. + size_t padW; + + //! Locally-stored padding height. + size_t padH; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight object. + arma::cube weight; + + //! Locally-stored bias term object. + arma::mat bias; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored transformed output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed padded input parameter. + arma::cube inputPaddedTemp; + + //! Locally-stored transformed error parameter. + arma::cube gTemp; + + //! Locally-stored transformed gradient parameter. + arma::cube gradientTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Convolution + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp new file mode 100644 index 00000000000..6180c812572 --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -0,0 +1,263 @@ +/** + * @file dropconnect.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Definition of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_HPP + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "linear.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The DropConnect layer is a regularizer that randomly with probability + * ratio sets the connection values to zero and scales the remaining + * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p) + * when deterministic is false. In the deterministic mode(during testing), + * the layer just computes the output. The output is computed according + * to the input layer. If no input layer is given, it will take a linear layer + * as default. + * + * Note: + * During training you should set deterministic to false and during testing + * you should set deterministic to true. + * + * For more information, see the following. + * + * @code + * @inproceedings{WanICML2013, + * title={Regularization of Neural Networks using DropConnect}, + * booktitle = {Proceedings of the 30th International Conference on Machine + * Learning(ICML - 13)}, + * author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and + * Rob Fergus}, + * year = {2013} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class DropConnect +{ + public: + //! Create the DropConnect object. + DropConnect() + { + /* Nothing to do here. */ + } + + /** + * Creates the DropConnect Layer as a Linear Object that takes input size, + * output size and ratio as parameter. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param ratio The probability of setting a value to zero. + */ + DropConnect(const size_t inSize, + const size_t outSize, + const double ratio = 0.5) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) + { + network.push_back(baseLayer); + } + + ~DropConnect() + { + boost::apply_visitor(DeleteVisitor(), baseLayer); + } + + /** + * Ordinary feed forward pass of the DropConnect layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + + output = output * scale; + } + } + + /** + * Ordinary feed backward pass of the DropConnect layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) + { + boost::apply_visitor( + BackwardVisitor( + std::move(input), + std::move(gy), + std::move(g) + ), + baseLayer); + } + + /** + * Calculate the gradient using the output delta and the input activation. + * + * @param input The propagated input. + * @param d The calculated error. + * @param g The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + + //! Modify the value of the deterministic parameter. + bool &Deterministic() { return deterministic; } + + //! The probability of setting a value to zero. + double Ratio() const { return ratio; } + + //! Modify the probability of setting a value to zero. + void Ratio(const double r) + { + ratio = r; + scale = 1.0 / (1.0 - ratio); + } + +private: + //! The probability of setting a value to zero. + double ratio; + + //! The scale fraction. + double scale; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored mask object. + OutputDataType mask; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Denoise mask for the weights. + OutputDataType denoise; + + //! Locally-stored layer module. + LayerTypes baseLayer; + + //! Locally-stored network modules. + std::vector network; +}; // class DropConnect. + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp new file mode 100644 index 00000000000..57e76f6170d --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -0,0 +1,194 @@ +/** + * @file dropout.hpp + * @author Marcus Edel + * + * Definition of the Dropout class, which implements a regularizer that + * randomly sets units to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The dropout layer is a regularizer that randomly with probability ratio + * sets input values to zero and scales the remaining elements by factor 1 / + * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when + * deterministic is false. In the deterministic mode (during testing), the layer + * just scales the output. + * + * Note: During training you should set deterministic to false and during + * testing you should set deterministic to true. + * + * For more information, see the following. + * + * @code + * @article{Hinton2012, + * author = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, + * Ilya Sutskever, Ruslan Salakhutdinov}, + * title = {Improving neural networks by preventing co-adaptation of feature + * detectors}, + * journal = {CoRR}, + * volume = {abs/1207.0580}, + * year = {2012}, + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Dropout +{ + public: + /** + * Create the Dropout object using the specified ratio and rescale + * parameter. + * + * @param ratio The probability of setting a value to zero. + * @param rescale If true the input is rescaled when deterministic is False. + */ + Dropout(const double ratio = 0.5, + const bool rescale = true) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of the dropout layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } + } + + /** + * Ordinary feed backward pass of the dropout layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy % mask * scale; + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the detla. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! The probability of setting a value to zero. + double Ratio() const { return ratio; } + + //! Modify the probability of setting a value to zero. + void Ratio(const double r) + { + ratio = r; + scale = 1.0 / (1.0 - ratio); + } + + //! The value of the rescale parameter. + bool Rescale() const {return rescale; } + //! Modify the value of the rescale parameter. + bool& Rescale() {return rescale; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); + } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored mast object. + OutputDataType mask; + + //! The probability of setting a value to zero. + double ratio; + + //! The scale fraction. + double scale; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! If true the input is rescaled when deterministic is False. + bool rescale; +}; // class Dropout + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index c707017fcbf..76b19f964af 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -1,16 +1,16 @@ /** - * @file hard_tanh_layer.hpp + * @file hard_tanh.hpp * @author Dhawal Arora * - * Definition and implementation of the HardTanHLayer layer. + * Definition and implementation of the HardTanH layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP #include @@ -46,18 +46,18 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class HardTanHLayer +class HardTanH { public: /** - * Create the HardTanHLayer object using the specified parameters. The range + * Create the HardTanH object using the specified parameters. The range * of the linear region can be adjusted by specifying the maxValue and * minValue. Default (maxValue = 1, minValue = -1). * * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanHLayer(const double maxValue = 1, const double minValue = -1) : + HardTanH(const double maxValue = 1, const double minValue = -1) : maxValue(maxValue), minValue(minValue) { // Nothing to do here. @@ -71,9 +71,14 @@ class HardTanHLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { - Fn(input, output); + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } } /** @@ -86,49 +91,18 @@ class HardTanHLayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const DataType&& input, + DataType&& gy, + DataType&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + g = gy; + for (size_t i = 0; i < input.n_elem; i++) { - for (size_t i = 0; i < gy.n_cols; i++) + if (input(i) < minValue || input(i) > maxValue) { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); + g(i) = 0; } } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; } //! Get the input parameter. @@ -197,20 +171,6 @@ class HardTanHLayer std::max( val, minValue ), maxValue ); } ); } - /** - * Computes the HardTanH function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - Fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the HardTanH function. * @@ -229,7 +189,7 @@ class HardTanHLayer * @param x The resulting derivatives. */ template - void Deriv(const InputType& x, OutputType& y) + void Deriv(const InputType&& x, OutputType& y) { y = x; @@ -251,7 +211,7 @@ class HardTanHLayer //! Minimum value for the HardTanH function. double minValue; -}; // class HardTanHLayer +}; // class HardTanH } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp new file mode 100644 index 00000000000..2933ef181ef --- /dev/null +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -0,0 +1,119 @@ +/** + * @file join.hpp + * @author Marcus Edel + * + * Definition of the Join module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Join module class. The Join class accumulates + * the output of various modules. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template< + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Join +{ + public: + //! Create the Join object. + Join() + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); + } + + private: + //! Locally-stored number of input rows. + size_t inSizeRows; + + //! Locally-stored number of input cols. + size_t inSizeCols; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Join + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp index a87792e4295..8e69712b7f9 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -1,8 +1,8 @@ /** - * @file leaky_relu_layer.hpp + * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLULayer layer first introduced + * Definition and implementation of LeakyReLU layer first introduced * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * @@ -11,8 +11,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP #include @@ -41,17 +41,17 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class LeakyReLULayer +class LeakyReLU { public: /** - * Create the LeakyReLULayer object using the specified parameters. + * Create the LeakyReLU object using the specified parameters. * The non zero gradient can be adjusted by specifying tha parameter * alpha in the range 0 to 1. Default (alpha = 0.03) * * @param alpha Non zero gradient */ - LeakyReLULayer(const double alpha = 0.03) : alpha(alpha) + LeakyReLU(const double alpha = 0.03) : alpha(alpha) { // Nothing to do here. } @@ -64,7 +64,7 @@ class LeakyReLULayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { Fn(input, output); } @@ -79,51 +79,13 @@ class LeakyReLULayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) + void Backward(const DataType&& input, DataType&& gy, DataType&& g) { DataType derivative; Deriv(input, derivative); g = gy % derivative; } - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); - } - } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; - } - //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } //! Modify the input parameter. @@ -177,20 +139,6 @@ class LeakyReLULayer y = arma::max(x, alpha * x); } - /** - * Computes the LeakyReLU function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the LeakyReLU function. * @@ -215,11 +163,11 @@ class LeakyReLULayer y = x; for (size_t i = 0; i < x.n_elem; i++) + { y(i) = Deriv(x(i)); + } } - - //! Locally-stored delta object. OutputDataType delta; @@ -232,9 +180,9 @@ class LeakyReLULayer //! Leakyness Parameter in the range 0 + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Linear layer class. The Linear class represents a + * single layer of a neural network. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Linear +{ + public: + //! Create the Linear object. + Linear() {} + + /** + * Create the Linear layer object using the specified number of units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + Linear(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize + outSize, 1); + } + + /* + * Reset the layer parameter. + */ + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = (weight * input) + bias; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* unused */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = weight.t() * gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight paramters. + OutputDataType weight; + + //! Locally-stored bias term parameters. + OutputDataType bias; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Linear + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp new file mode 100644 index 00000000000..92064727822 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -0,0 +1,174 @@ +/** + * @file linear.hpp + * @author Marcus Edel + * + * Definition of the LinearNoBias class also known as fully-connected layer or + * affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_HPP + +#include + +#include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the LinearNoBias class. The LinearNoBias class represents a + * single layer of a neural network. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LinearNoBias +{ + public: + //! Create the LinearNoBias object. + LinearNoBias() {} + /** + * Create the LinearNoBias object using the specified number of units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + LinearNoBias(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize, 1); + } + + /* + * Reset the layer parameter. + */ + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weight * input; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = weight.t() * gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored weight parameter. + OutputDataType weight; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LinearNoBias + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp new file mode 100644 index 00000000000..95a79c97b91 --- /dev/null +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -0,0 +1,131 @@ +/** + * @file log_softmax.hpp + * @author Marcus Edel + * + * Definition of the LogSoftmax class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the log softmax layer. The log softmax loss layer computes + * the multinomial logistic loss of the softmax of its inputs. This layer is + * meant to be used in combination with the negative log likelihood layer + * (NegativeLogLikelihoodLayer), which expects that the input contains + * log-probabilities for each class. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LogSoftMax +{ + public: + /** + * Create the LogSoftmax object. + */ + LogSoftMax() { /* Nothing to do here. */ } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) + { + g = gy - arma::exp(input) * arma::accu(gy); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + InputDataType& Delta() const { return delta; } + //! Modify the delta. + InputDataType& Delta() { return delta; } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LogSoftmax + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp new file mode 100644 index 00000000000..0f9f0b4978d --- /dev/null +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -0,0 +1,161 @@ +/** + * @file lookup.hpp + * @author Marcus Edel + * + * Definition of the Lookup class a particular convolution, where the width of + * the convolution is 1. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_HPP + +#include +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Lookup class. The Lookup class is a particular + * convolution, where the width of the convolution is 1. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Lookup +{ + public: + /** + * Create the Lookup object using the specified number of input and output + * units. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + */ + Lookup(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize, inSize); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weights.cols(arma::conv_to::from(input) - 1); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) + { + g = gy; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) + { + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; + } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Lookup + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp new file mode 100644 index 00000000000..6cd9dc630fb --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -0,0 +1,516 @@ +/** + * @file lstm.hpp + * @author Marcus Edel + * + * Definition of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * An implementation of a lstm network layer. + * + * This class allows specification of the type of the activation functions used + * for the gates and cells and also of the type of the function used to + * initialize and update the peephole weights. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LSTM +{ + public: + //! Create the LSTM object. + LSTM() { /* Nothing to do here */ } + + /** + * Create the LSTM layer object using the specified parameters. + * + * @param inSize The number of input units. + * @param outSize The number of output units. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + LSTM(const size_t inSize, const size_t outSize, const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + { + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(prevOutput), + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)) + ), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(0, 0, 1 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)) + ), + outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor( + ForwardVisitor( + std::move(nextCell), + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) + ), + cellModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)) + ), + cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)), + std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, + cellActivationModule)) + ), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)), + std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)), + std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)), + std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)), + std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) + ), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) + ), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) + { + boost::apply_visitor( + GradientVisitor( + std::move(input), + std::move(prevError) + ), + input2GateModule); + + boost::apply_visitor( + GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError) + ), + output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } + } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the maximum number of steps to backpropagate through time (BPTT). + size_t Rho() const { return rho; } + //! Modify the maximum number of steps to backpropagate through time (BPTT). + size_t& Rho() { return rho; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return weights; } + //! Modify the parameters. + OutputDataType& Parameters() { return weights; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + //! Get the model modules. + std::vector& Model() { return network; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); + } + + private: + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored weight object. + OutputDataType weights; + + //! Locally-stored previous output. + arma::mat prevOutput; + + //! Locally-stored previous cell state. + arma::mat prevCell; + + //! Locally-stored input 2 gate module. + LayerTypes input2GateModule; + + //! Locally-stored output 2 gate module. + LayerTypes output2GateModule; + + //! Locally-stored input gate module. + LayerTypes inputGateModule; + + //! Locally-stored hidden state module. + LayerTypes hiddenStateModule; + + //! Locally-stored forget gate module. + LayerTypes forgetGateModule; + + //! Locally-stored output gate module. + LayerTypes outputGateModule; + + //! Locally-stored cell module. + LayerTypes cellModule; + + //! Locally-stored cell activation module. + LayerTypes cellActivationModule; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored list of network modules. + std::vector network; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! Locally-stored cell parameters. + std::vector cellParameter; + + //! Locally-stored output parameters. + std::vector outParameter; + + //! Locally-stored previous error. + arma::mat prevError; + + //! Locally-stored cell activation error. + arma::mat cellActivationError; + + //! Locally-stored foget gate error. + arma::mat forgetGateError; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class LSTM + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp new file mode 100644 index 00000000000..e93077ebbad --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -0,0 +1,375 @@ +/** + * @file max_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_HPP + +#include +// #include "layer_types.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/* + * The max pooling rule for convolution neural networks. Take the maximum value + * within the receptive block. + */ +class MaxPoolingRule +{ + public: + /* + * Return the maximum value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + size_t Pooling(const MatType& input) + { + return arma::as_scalar(arma::find(input.max() == input, 1)); + } +}; + +/** + * Implementation of the MaxPooling layer. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MaxPooling +{ +public: + //! Create the PoolingLayer object. + MaxPooling() + { + /* Nothing to do here */ + } + + /** + * Create the MaxPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + * @param floor Rounding operator (floor or ceil). + */ + MaxPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void PoolingOperation(const arma::Mat& input, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dW) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dH) + { + arma::mat subInput = input(arma::span(rowidx, rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + const size_t idx = pooling.Pooling(subInput); + output(i, j) = subInput(idx); + + if (!deterministic) + { + arma::Mat subIndices = indices(arma::span(rowidx, + rowidx + kW - 1 - offset), + arma::span(colidx, colidx + kH - 1 - offset)); + + poolingIndices(i, j) = subIndices(idx); + } + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param error The backward error. + * @param output The pooled result. + * @param poolingIndices The pooled indices. + */ + template + void Unpooling(const arma::Mat& error, + arma::Mat& output, + arma::Mat& poolingIndices) + { + for (size_t i = 0; i < poolingIndices.n_elem; ++i) + { + output(poolingIndices(i)) += error(i); + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored pooling strategy. + MaxPoolingRule pooling; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored indices matrix parameter. + arma::Mat indices; + + //! Locally-stored indices column parameter. + arma::Col indicesCol; + + //! Locally-stored pooling indicies. + std::vector poolingIndices; +}; // class MaxPooling + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp new file mode 100644 index 00000000000..e0c097f6ddc --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -0,0 +1,322 @@ +/** + * @file mean_pooling.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Definition of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the MeanPooling. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanPooling +{ +public: + //! Create the MeanPooling object. + MeanPooling() + { + /* Nothing to do here */ + } + + /** + * Create the MeanPooling object using the specified number of units. + * + * @param kW Width of the pooling window. + * @param kH Height of the pooling window. + * @param dW Width of the stride operation. + * @param dH Width of the stride operation. + */ + MeanPooling(const size_t kW, + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) + + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the height. + size_t& InputHeight() { return inputHeight; } + + //! Get the width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } + + private: + + /** + * Apply pooling to the input and store the results. + * + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + */ + template + void Pooling(const arma::Mat& input, arma::Mat& output) + { + const size_t rStep = kW; + const size_t cStep = kH; + + for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += dH) + { + for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += dW) + { + arma::mat subInput = input( + arma::span(rowidx, rowidx + rStep - 1 - offset), + arma::span(colidx, colidx + cStep - 1 - offset)); + + output(i, j) = arma::mean(arma::mean(subInput)); + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param input The input to be apply the unpooling rule. + * @param output The pooled result. + */ + template + void Unpooling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + const size_t rStep = input.n_rows / error.n_rows - offset; + const size_t cStep = input.n_cols / error.n_cols - offset; + + arma::Mat unpooledError; + for (size_t j = 0; j < input.n_cols - cStep; j += cStep) + { + for (size_t i = 0; i < input.n_rows - rStep; i += rStep) + { + const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)); + + unpooledError = arma::Mat(inputArea.n_rows, inputArea.n_cols); + unpooledError.fill(error(i / rStep, j / cStep) / inputArea.n_elem); + + output(arma::span(i, i + rStep - 1 - offset), + arma::span(j, j + cStep - 1 - offset)) += unpooledError; + } + } + } + + //! Locally-stored number of input units. + size_t inSize; + + //! Locally-stored number of output units. + size_t outSize; + + //! Locally-stored width of the pooling window. + size_t kW; + + //! Locally-stored height of the pooling window. + size_t kH; + + //! Locally-stored width of the stride operation. + size_t dW; + + //! Locally-stored height of the stride operation. + size_t dH; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored reset parameter used to initialize the module once. + bool reset; + + //! Rounding operation used. + bool floor; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; + + //! Locally-stored stored rounding offset. + size_t offset; + + //! Locally-stored output parameter. + arma::cube outputTemp; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube gTemp; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanPooling + + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp new file mode 100644 index 00000000000..6abdc15c21c --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -0,0 +1,98 @@ +/** + * @file mean_squared_error.hpp + * @author Marcus Edel + * + * Definition and implementation of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The mean squared error performance function measures the network's + * performance according to the mean of squared errors. + * + * @tparam ActivationFunction Activation function used for the embedding layer. + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MeanSquaredError +{ + public: + /** + * Create the MeanSquaredError object. + */ + MeanSquaredError() { /* Nothing to do here. */ } + + /* + * Computes the mean squared error function. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + return arma::mean(arma::mean(arma::square(input - target))); + } + + /** + * Ordinary feed backward pass of a neural network. + * + * @param input The propagated input activation. + * @param target The target vector. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + output = (input - target); + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MeanSquaredError + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp new file mode 100644 index 00000000000..2caa8a37a35 --- /dev/null +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -0,0 +1,108 @@ +/** + * @file multiply_constant.hpp + * @author Marcus Edel + * + * Definition of the MultiplyConstantLayer class, which multiplies the input by + * a (non-learnable) constant. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the multiply constant layer. The multiply constant layer + * multiplies the input by a (non-learnable) constant. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class MultiplyConstant +{ + public: + /** + * Create the MultiplyConstant object. + */ + MultiplyConstant(const double scalar) : scalar(scalar) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network. Multiply the input with the + * specified constant scalar value. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + output = input * scalar; + } + + /** + * Ordinary feed backward pass of a neural network. The backward pass + * multiplies the error with the specified constant scalar value. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) + { + g = gy * scalar; + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(scalar, "scalar"); + } + + private: + //! Locally-stored constant scalar value. + const double scalar; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class MultiplyConstant + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 6c08698f790..2f5a863fd8d 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -1,16 +1,16 @@ /** - * @file negative_log_likelihood_layer.hpp + * @file negative_log_likelihood.hpp * @author Marcus Edel * - * Definition of the NegativeLogLikelihoodLayer class. + * Definition of the NegativeLogLikelihood class. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP -#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_HPP #include @@ -19,11 +19,10 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of the negative log likelihood layer. The negative log - * likelihood layer expects that the input contains log-probabilities for each + * likelihood layer expectes that the input contains log-probabilities for each * class. The layer also expects a class index, in the range between 1 and the * number of classes, as target when calling the Forward function. * - * @tparam ActivationFunction Activation function used for the embedding layer. * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, * arma::sp_mat or arma::cube). * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, @@ -33,26 +32,22 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class NegativeLogLikelihoodLayer +class NegativeLogLikelihood { public: /** * Create the NegativeLogLikelihoodLayer object. */ - NegativeLogLikelihoodLayer() { /* Nothing to do here. */ } + NegativeLogLikelihood() { /* Nothing to do here. */ } - /** - * Ordinary feed forward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for - * each class. The layer also expects a class index, in the range between 1 - * and the number of classes, as target when calling the Forward function. + /* + * Computes the Negative log likelihood. * - * @param input Input data that contains the log-probabilities for each class. - * @param target The target vector, that contains the class index in the range - * between 1 and the number of classes. + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. */ template - double Forward(const arma::Mat& input, const arma::Mat& target) + double Forward(const arma::Mat&& input, arma::Mat&& target) { double output = 0; @@ -70,7 +65,7 @@ class NegativeLogLikelihoodLayer /** * Ordinary feed backward pass of a neural network. The negative log - * likelihood layer expects that the input contains log-probabilities for + * likelihood layer expectes that the input contains log-probabilities for * each class. The layer also expects a class index, in the range between 1 * and the number of classes, as target when calling the Forward function. * @@ -80,10 +75,13 @@ class NegativeLogLikelihoodLayer * @param output The calculated error. */ template - void Backward(const arma::Mat& input, - const arma::Mat& target, - arma::Mat& output) + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) { + // std::cout << "------------------------------------------------------\n"; + // std::cout << "NegativeLogLikelihood\n"; + output = arma::zeros >(input.n_rows, input.n_cols); for (size_t i = 0; i < input.n_cols; ++i) { @@ -93,6 +91,10 @@ class NegativeLogLikelihoodLayer output(currentTarget, i) = -1; } + + // std::cout << "output: \n" << output << std::endl; + + // std::cout << "------------------------------------------------------\n"; } //! Get the input parameter. @@ -119,7 +121,7 @@ class NegativeLogLikelihoodLayer //! Locally-stored output parameter object. OutputDataType outputParameter; -}; // class NegativeLogLikelihoodLayer +}; // class NegativeLogLikelihood }; // namespace ann }; // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp new file mode 100644 index 00000000000..5870071faef --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -0,0 +1,356 @@ +/** + * @file recurrent.hpp + * @author Marcus Edel + * + * Definition of the LinearLayer class also known as fully-connected layer or + * affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the RecurrentLayer class. Recurrent layers can be used + * similarly to feed-forward layers. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Recurrent +{ + public: + /** + * Create the Recurrent object using the specified modules. + * + * @param start The start module. + * @param start The input module. + * @param start The feedback module. + * @param start The transfer module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + Recurrent(const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + + { + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + } + + private: + //! Locally-stored start module. + LayerTypes startModule; + + //! Locally-stored input module. + LayerTypes inputModule; + + //! Locally-stored feedback module. + LayerTypes feedbackModule; + + //! Locally-stored transfer module. + LayerTypes transferModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! Locally-stored number of gradient steps. + size_t gradientStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; +}; // class Recurrent + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp new file mode 100644 index 00000000000..15ecec36d01 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -0,0 +1,127 @@ +/** + * @file select.hpp + * @author Marcus Edel + * + * Definition and implementation of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The select module selects the specified column from a given input matrix. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Select +{ + public: + /** + * Create the Select object. + * + * @param index The column which should be extracted from the given input. + * @param index The number of elements that should be used. + */ + Select(const size_t index, const size_t elements = 0) : + index(index), + elements(elements) + { + /* Nothing to do here. */ + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } + } + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + private: + //! Locally-stored column index. + size_t index; + + //! Locally-stored number of elements selected. + size_t elements; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class Select + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp new file mode 100644 index 00000000000..277b3342dc2 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -0,0 +1,292 @@ +/** + * @file sequential.hpp + * @author Marcus Edel + * + * Definition of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_HPP + +#include + +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the Sequential class. The sequential class works as a + * feed-forward fully connected network container which plugs various layers + * together. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Sequential +{ + public: + + /** + * Create the Sequential object using the specified parameters. + * + * @param model Expose the all network modules. + */ + Sequential(const bool model = true) : model(model), reset(false) + { + /* Nothing to do here. */ + } + + //! Destroy the Sequential object. + ~Sequential() + { + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } + + output = boost::apply_visitor(outputParameterVisitor, network.back()); + } + + /** + * Ordinary feed backward pass of a neural network, using 3rd-order tensors as + * input, calculating the function f(x) by propagating x backwards through f. + * Using the results from the feed forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + //! Return the model modules. + std::vector& Model() + { + if (model) + { + return network; + } + + return empty; + } + + //! Return the initial point for the optimization. + const arma::mat& Parameters() const { return parameters; } + //! Modify the initial point for the optimization. + arma::mat& Parameters() { return parameters; } + + arma::mat const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + arma::mat& InputParameter() { return inputParameter; } + + //! Get the output parameter. + arma::mat const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + arma::mat& OutputParameter() { return outputParameter; } + + //! Get the delta.e + arma::mat const& Delta() const { return delta; } + //! Modify the delta. + arma::mat& Delta() { return delta; } + + //! Get the gradient. + arma::mat const& Gradient() const { return gradient; } + //! Modify the gradient. + arma::mat& Gradient() { return gradient; } + + private: + //! Parameter which indicates if the modules should be exposed. + bool model; + + //! Indicator if we already initialized the model. + bool reset; + + //! Locally-stored network modules. + std::vector network; + + //! Locally-stored model parameters. + arma::mat parameters; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! Locally-stored empty list of modules. + std::vector empty; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient object. + arma::mat gradient; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! The input width. + size_t width; + + //! The input height. + size_t height; +}; // class Sequential + + +} // namespace ann +} // namespace mlpack + +#endif From 2f3c44877d90e4fb79324409af2cf9e82b455b27 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:29:09 +0100 Subject: [PATCH 47/82] Remove the rmva model for the CmakeLists file. --- src/mlpack/methods/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index f292e9756c9..dde69de0b1b 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -51,7 +51,6 @@ set(DIRS randomized_svd range_search rann - rmva regularized_svd softmax_regression sparse_autoencoder From e99e0f4279d45c5a5916963504b50ac46fe38bb3 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 5 Dec 2016 01:01:25 +0100 Subject: [PATCH 48/82] Add visitor function set; which abstracts away the different types of layers. --- .../methods/ann/layer/layer_visitor.hpp | 1034 ++++++++++++++ .../methods/ann/layer/layer_visitor_impl.hpp | 1241 +++++++++++++++++ 2 files changed, 2275 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/layer_visitor.hpp create mode 100644 src/mlpack/methods/ann/layer/layer_visitor_impl.hpp diff --git a/src/mlpack/methods/ann/layer/layer_visitor.hpp b/src/mlpack/methods/ann/layer/layer_visitor.hpp new file mode 100644 index 00000000000..a311b0addbc --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor.hpp @@ -0,0 +1,1034 @@ +/** + * @file layer_visitor.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_HPP + +#include +#include + +#include + +namespace mlpack { +namespace ann { + +/** + * LoadOutputParameterVisitor restores the output parameter using the given + * parameter set. + */ +class LoadOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Restore the output parameter given a parameter set. + LoadOutputParameterVisitor(std::vector&& parameter); + + //! Restore the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Restore the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Restore the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * SaveOutputParameterVisitor saves the output parameter into the given + * parameter set. + */ +class SaveOutputParameterVisitor : public boost::static_visitor +{ + public: + //! Save the output parameter into the given parameter set. + SaveOutputParameterVisitor(std::vector&& parameter); + + //! Save the output parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The parameter set. + std::vector&& parameter; + + //! Save the output parameter for a module which doesn't implement the + //! Model() function. + template + typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; + + //! Save the output parameter for a module which implements the Model() + //! function. + template + typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type + OutputParameter(T* layer) const; +}; + +/** + * DeleteVisitor executes the destructor of the instantiated object. + */ +class DeleteVisitor : public boost::static_visitor +{ + public: + //! Execute the destructor. + template + void operator()(LayerType* layer) const; +}; + +/** + * ForwardOutputVisitor executes the Forward() function given the input and + * output parameter. + */ +class ForwardOutputVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardOutputVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + double operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * ForwardVisitor executes the Forward() function given the input and output + * parameter. + */ +class ForwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Foward() function given the input and output parameter. + ForwardVisitor(arma::mat&& input, arma::mat&& output); + + //! Execute the Foward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The output parameter set. + arma::mat&& output; +}; + +/** + * BackwardVisitor executes the Backward() function given the input, error and + * delta parameter. + */ +class BackwardVisitor : public boost::static_visitor +{ + public: + //! Execute the Backward() function given the input, error and delta + //! parameter. + BackwardVisitor(arma::mat&& input, arma::mat&& error, arma::mat&& delta); + + //! Execute the Backward() function. + template + void operator()(LayerType* layer) const; + + private: + //! The input parameter set. + arma::mat&& input; + + //! The error parameter. + arma::mat&& error; + + //! The delta parameter. + arma::mat&& delta; +}; + +/** + * ResetVisitor executes the Reset() function. + */ +class ResetVisitor : public boost::static_visitor +{ + public: + //! Execute the Reset() function. + template + void operator()(LayerType* layer) const; + + private: + //! Execute the Reset() function for a module which implements the Reset() + //! function. + template + typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Model() + //! function. + template + typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Execute the Reset() function for a module which implements the Reset() + //! and Model() function. + template + typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; + + //! Do not execute the Reset() function for a module which doesn't implement + // the Reset() or Model() function. + template + typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + ResetParameter(T* layer) const; +}; + +/** + * InputParameterVisitor exposes the input parameter of the given module. + */ +class InputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the input parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * OutputParameterVisitor exposes the output parameter of the given module. + */ +class OutputParameterVisitor : public boost::static_visitor +{ + public: + //! Return the output parameter set. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * DeltaVisitor exposes the delta parameter of the given module. + */ +class DeltaVisitor : public boost::static_visitor +{ + public: + //! Return the delta parameter. + template + arma::mat& operator()(LayerType* layer) const; +}; + +/** + * ParametersVisitor exposes the parameters set of the given module and stores + * the parameters set into the given matrix. + */ +class ParametersVisitor : public boost::static_visitor +{ + public: + //! Store the parameters set into the given parameters matrix. + ParametersVisitor(arma::mat&& parameters); + + //! Set the parameters set. + template + void operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not set the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Set the parameters set if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * ParametersSetVisitor update the parameters set using the given matrix. + */ +class ParametersSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters set given the parameters matrix. + ParametersSetVisitor(arma::mat&& parameters); + + //! Update the parameters set. + template + void operator()(LayerType *layer) const; + + private: + //! The parameters set. + arma::mat&& parameters; + + //! Do not update the parameters set if the module doesn't implement the + //! Parameters() function. + template + typename std::enable_if< + !HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; + + //! Update the parameters set if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value, void>::type + LayerParameters(T* layer, P& output) const; +}; + +/** + * WeightSizeVisitor returns the number of weights of the given module. + */ +class WeightSizeVisitor : public boost::static_visitor +{ + public: + //! Return the number of weights. + template + size_t operator()(LayerType* layer) const; + + private: + //! If the module doesn't implement the Parameters() or Model() function + //! return 0. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Model() + //! function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the number of parameters if the module implements the Parameters() + //! function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; + + //! Return the accumulated number of parameters if the module implements the + //! Parameters() and Model() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P& output) const; +}; + +/** + * SetInputWidthVisitor updates the input width parameter with the given input + * width. + */ +class SetInputWidthVisitor : public boost::static_visitor +{ + public: + //! Update the input width parameter with the given input width. + SetInputWidthVisitor(const size_t inputWidth = 0, const bool reset = false); + + //! Update the input width parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input width parameter. + size_t inputWidth; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; + + //! Update the input width if the module implements the InputWidth() or + //! Model() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputWidth(T* layer) const; +}; + +/** + * SetInputHeightVisitor updates the input height parameter with the given input + * height. + */ +class SetInputHeightVisitor : public boost::static_visitor +{ + public: + //! Update the input height parameter with the given input height. + SetInputHeightVisitor(const size_t inputHeight = 0, const bool reset = false); + + //! Update the input height parameter. + template + bool operator()(LayerType* layer) const; + + private: + //! The input height parameter. + size_t inputHeight; + + //! If set reset the height parameter if already set. + bool reset; + + //! Do nothing if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; + + //! Update the input height if the module implements the InputHeight() or + //! Model() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type + LayerInputHeight(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class OutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * OutputWidthVisitor exposes the OutputHeight() method of the given module. + */ +class OutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * LastOutputWidthVisitor exposes the OutputWidth() method of the given module. + */ +class LastOutputWidthVisitor : public boost::static_visitor +{ + public: + //! Return the output width. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputWidth() or Model() + //! function. + template + typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the InputWidth() + //! function. + template + typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() function. + template + typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; + + //! Return the output width if the module implements the Model() or + //! InputWidth() function. + template + typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputWidth(T* layer) const; +}; + +/** + * LastOutputHeightVisitor exposes the OutputHeight() method of the given module. + */ +class LastOutputHeightVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerOutputHeight(T* layer) const; +}; + +/** + * WeightSetVisitor update the module parameters given the parameters set. + */ +class WeightSetVisitor : public boost::static_visitor +{ + public: + //! Update the parameters given the parameters set and offset. + WeightSetVisitor(arma::mat&& weight, const size_t offset = 0); + + //! Update the parameters set. + template + size_t operator()(LayerType* layer) const; + + private: + //! The parameters set. + arma::mat&& weight; + + //! The parameters offset. + const size_t offset; + + //! Do not update the parameters if the module doesn't implement the + //! Parameters() or Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() function. + template + typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; + + //! Update the parameters if the module implements the Model() and + //! Parameters() function. + template + typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerSize(T* layer, P&& input) const; +}; + +/** + * RhoVisitor exposes the Rho() method of the given module. + */ +class RhoVisitor : public boost::static_visitor +{ + public: + //! Return the output height. + template + size_t operator()(LayerType* layer) const; + + private: + //! Return 0 if the module doesn't implement the InputHeight() or Model() + //! function. + template + typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the InputHeight() + //! function. + template + typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implements the Model() function. + template + typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; + + //! Return the output height if the module implement the Model() or + //! InputHeight() function. + template + typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerRho(T* layer) const; +}; + +/** + * DeterministicSetVisitor set the deterministic parameter given the + * deterministic value. + */ +class DeterministicSetVisitor : public boost::static_visitor +{ + public: + //! Set the deterministic parameter given the current deterministic value. + DeterministicSetVisitor(const bool deterministic = true); + + //! Set the deterministic parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The deterministic parameter. + const bool deterministic; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerDeterministic(T* layer) const; +}; + +/** + * AddVisitor exposes the Add() method of the given module. + */ +class AddVisitor : public boost::static_visitor +{ + public: + //! Exposes the Add() method of the given module. + template + AddVisitor(T newLayer); + + //! Exposes the Add() method. + template + void operator()(LayerType* layer) const; + + private: + //! The layer that should be added. + LayerTypes newLayer; + + //! Only add the layer if the module implements the Add() function. + template + typename std::enable_if< + HasAddCheck::value, void>::type + LayerAdd(T* layer) const; + + //! Do not add the layer if the module doesn't implement the Add() function. + template + typename std::enable_if< + !HasAddCheck::value, void>::type + LayerAdd(T* layer) const; +}; + +/** + * GradientSetVisitor update the gradient parameter given the gradient set. + */ +class GradientSetVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientSetVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + + +/** + * GradientUpdateVisitor update the gradient parameter given the gradient set. + */ +class GradientUpdateVisitor : public boost::static_visitor +{ + public: + //! Update the gradient parameter given the gradient set. + GradientUpdateVisitor(arma::mat&& gradient, size_t offset = 0); + + //! Update the gradient parameter. + template + size_t operator()(LayerType* layer) const; + + private: + //! The gradient set. + arma::mat&& gradient; + + //! The gradient offset. + size_t offset; + + //! Update the gradient if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Update the gradient if the module implements the Gradient() and Model() + //! function. + template + typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not update the gradient parameter if the module doesn't implement the + //! Gradient() or Model() function. + template + typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type + LayerGradients(T* layer, P& input) const; +}; + +/* + * GradientZeroVisitor set the gradient to zero for the given module. + */ +class GradientZeroVisitor : public boost::static_visitor +{ + public: + //! Set the gradient to zero for the given module. + GradientZeroVisitor(); + + //! Set the gradient to zero. + template + void operator()(LayerType* layer) const; + + private: + //! Set the gradient to zero if the module implements the Gradient() function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not set the gradient to zero if the module doesn't implement the + //! Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * SearchModeVisitor executes the Gradient() method of the given module using + * the input and delta parameter. + */ +class GradientVisitor : public boost::static_visitor +{ + public: + //! Executes the Gradient() method of the given module using the input and + //! delta parameter. + GradientVisitor(arma::mat&& input, arma::mat&& delta); + + //! Executes the Gradient() method. + template + void operator()(LayerType* layer) const; + + private: + //! The input set. + arma::mat&& input; + + //! The delta parameter. + arma::mat&& delta; + + //! Execute the Gradient() function if the module implements the Gradient() + //! function. + template + typename std::enable_if< + HasGradientCheck::value, void>::type + LayerGradients(T* layer, arma::mat& input) const; + + //! Do not execute the Gradient() function if the module doesn't implement + //! the Gradient() function. + template + typename std::enable_if< + !HasGradientCheck::value, void>::type + LayerGradients(T* layer, P& input) const; +}; + +/** + * RewardSetVisitor set the reward parameter given the reward value. + */ +class RewardSetVisitor : public boost::static_visitor +{ + public: + //! Set the reward parameter given the reward value. + RewardSetVisitor(const double reward); + + //! Set the reward parameter. + template + void operator()(LayerType* layer) const; + + private: + //! The reward value. + const double reward; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() and Model() function. + template + typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Set the deterministic parameter if the module implements the + //! Deterministic() function. + template + typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; + + //! Do not set the deterministic parameter if the module doesn't implement the + //! Deterministic() or Model() function. + template + typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type + LayerReward(T* layer) const; +}; + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "layer_visitor_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp new file mode 100644 index 00000000000..482b8e10ad3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_visitor_impl.hpp @@ -0,0 +1,1241 @@ +/** + * @file layer_visitor_impl.hpp + * @author Marcus Edel + * + * This file provides an easy way to serialize a layer, abstracts away the + * different types of layers, and also automatically directs any function to the + * right layer type. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_VISITOR_IMPL_HPP + +// In case it hasn't been included yet. +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann { + +//! LoadOutputParameterVisitor visitor class. +inline LoadOutputParameterVisitor::LoadOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void LoadOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +LoadOutputParameterVisitor::OutputParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(LoadOutputParameterVisitor(std::move(parameter)), + layer->Model()[layer->Model().size() - i - 1]); + } + + layer->OutputParameter() = parameter.back(); + parameter.pop_back(); +} + +//! SaveOutputParameterVisitor visitor class. +inline SaveOutputParameterVisitor::SaveOutputParameterVisitor( + std::vector&& parameter) : parameter(std::move(parameter)) +{ + /* Nothing to do here. */ +} + +template +inline void SaveOutputParameterVisitor::operator()(LayerType* layer) const +{ + OutputParameter(layer); +} + +template +inline typename std::enable_if< + !HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasModelCheck&(T::*)()>::value, void>::type +SaveOutputParameterVisitor::OutputParameter(T* layer) const +{ + parameter.push_back(layer->OutputParameter()); + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SaveOutputParameterVisitor(std::move(parameter)), + layer->Model()[i]); + } +} + +//! DeleteVisitor visitor class. +template +inline void DeleteVisitor::operator()(LayerType* layer) const +{ + if (layer) + delete layer; +} + +//! ForwardOutputVisitor visitor class. +inline ForwardOutputVisitor::ForwardOutputVisitor(arma::mat&& input, + arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline double ForwardOutputVisitor::operator()(LayerType* layer) const +{ + return layer->Forward(std::move(input), std::move(output)); +} + +//! ForwardVisitor visitor class. +inline ForwardVisitor::ForwardVisitor(arma::mat&& input, arma::mat&& output) : + input(std::move(input)), + output(std::move(output)) +{ + /* Nothing to do here. */ +} + +template +inline void ForwardVisitor::operator()(LayerType* layer) const +{ + layer->Forward(std::move(input), std::move(output)); +} + +//! BackwardVisitor visitor class. +inline BackwardVisitor::BackwardVisitor(arma::mat&& input, + arma::mat&& error, + arma::mat&& delta) : + input(std::move(input)), + error(std::move(error)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void BackwardVisitor::operator()(LayerType* layer) const +{ + layer->Backward(std::move(input), std::move(error), std::move(delta)); +} + +//! ResetVisitor visitor class. +template +inline void ResetVisitor::operator()(LayerType* layer) const +{ + ResetParameter(layer); +} + +template +inline typename std::enable_if< + HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasResetCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(ResetVisitor(), layer->Model()[i]); + } + + layer->Reset(); +} + +template +inline typename std::enable_if< + !HasResetCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +ResetVisitor::ResetParameter(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! InputParameterVisitor visitor class. +template +inline arma::mat& InputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->InputParameter(); +} + +//! OutputParameterVisitor visitor class. +template +inline arma::mat& OutputParameterVisitor::operator()(LayerType *layer) const +{ + return layer->OutputParameter(); +} + +//! DeltaVisitor visitor class. +template +inline arma::mat& DeltaVisitor::operator()(LayerType *layer) const +{ + return layer->Delta(); +} + +//! ParametersVisitor visitor class. +inline ParametersVisitor::ParametersVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersVisitor::LayerParameters(T* layer, P& /* output */) const +{ + parameters = layer->Parameters(); +} + +//! ParametersSetVisitor visitor class. +inline ParametersSetVisitor::ParametersSetVisitor(arma::mat&& parameters) : + parameters(std::move(parameters)) +{ + /* Nothing to do here. */ +} + +template +inline void ParametersSetVisitor::operator()(LayerType *layer) const +{ + LayerParameters(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* /* layer */, P& /* output */) const +{ + /* Nothing to do here. */ +} + +template +inline typename std::enable_if< + HasParametersCheck::value, void>::type +ParametersSetVisitor::LayerParameters(T* layer, P& /* output */) const +{ + layer->Parameters() = parameters; +} + +//! WeightSizeVisitor visitor class. +template +inline size_t WeightSizeVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* /* layer */, P& /* output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSizeVisitor::LayerSize(T* layer, P& /* output */) const +{ + size_t weights = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + weights += boost::apply_visitor(WeightSizeVisitor(), layer->Model()[i]); + } + + return weights; +} + +//! SetInputWidthVisitor visitor class. +inline SetInputWidthVisitor::SetInputWidthVisitor(const size_t inputWidth, + const bool reset) : + inputWidth(inputWidth), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerInputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputWidthVisitor::LayerInputWidth(T* layer) const +{ + if (layer->InputWidth() == 0 || reset) + { + layer->InputWidth() = inputWidth; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputWidthVisitor(inputWidth, reset), + layer->Model()[i]); + } + + return true; +} + +//! SetInputHeightVisitor visitor class. +inline SetInputHeightVisitor::SetInputHeightVisitor(const size_t inputHeight, + const bool reset) : + inputHeight(inputHeight), + reset(reset) +{ + /* Nothing to do here. */ +} + +template +inline bool SetInputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerInputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* /* layer */) const +{ + return false; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + return true; +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, bool>::type +SetInputHeightVisitor::LayerInputHeight(T* layer) const +{ + if (layer->InputHeight() == 0 || reset) + { + layer->InputHeight() = inputHeight; + } + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(SetInputHeightVisitor(inputHeight, reset), + layer->Model()[i]); + } + + return true; +} + +//! OutputWidthVisitor visitor class. +template +inline size_t OutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! OutputHeightVisitor visitor class. +template +inline size_t OutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +OutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! LastOutputWidthVisitor visitor class. +template +inline size_t LastOutputWidthVisitor::operator()(LayerType* layer) const +{ + return LayerOutputWidth(layer); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + return layer->OutputWidth(); +} + +template +inline typename std::enable_if< + !HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputWidth = boost::apply_visitor(LastOutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputWidth::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputWidthVisitor::LayerOutputWidth(T* layer) const +{ + size_t outputWidth = layer->OutputWidth(); + + if (outputWidth == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputWidth = boost::apply_visitor(OutputWidthVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputWidth != 0) + { + return outputWidth; + } + } + } + + return outputWidth; +} + +//! LastOutputHeightVisitor visitor class. +template +inline size_t LastOutputHeightVisitor::operator()(LayerType* layer) const +{ + return LayerOutputHeight(layer); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + return layer->OutputHeight(); +} + +template +inline typename std::enable_if< + !HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + size_t outputHeight = boost::apply_visitor(LastOutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + + return 0; +} + +template +inline typename std::enable_if< + HasInputHeight::value && + HasModelCheck&(T::*)()>::value, size_t>::type +LastOutputHeightVisitor::LayerOutputHeight(T* layer) const +{ + size_t outputHeight = layer->OutputHeight(); + + if (outputHeight == 0) + { + for (size_t i = 0; i < layer->Model().size(); ++i) + { + outputHeight = boost::apply_visitor(OutputHeightVisitor(), + layer->Model()[layer->Model().size() - 1 - i]); + + if (outputHeight != 0) + { + return outputHeight; + } + } + } + + return outputHeight; +} + +//! WeightSetVisitor visitor class. +inline WeightSetVisitor::WeightSetVisitor(arma::mat&& weight, + const size_t offset) : + weight(std::move(weight)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t WeightSetVisitor::operator()(LayerType* layer) const +{ + return LayerSize(layer, std::move(layer->OutputParameter())); +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* /* layer */, P&& /*output */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /*output */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + HasParametersCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +WeightSetVisitor::LayerSize(T* layer, P&& /* output */) const +{ + layer->Parameters() = arma::mat(weight.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(WeightSetVisitor( + std::move(weight), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +//! RhoVisitor visitor class. +template +inline size_t RhoVisitor::operator()(LayerType* layer) const +{ + return LayerRho(layer); +} + +template +inline typename std::enable_if< + !HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* /* layer */) const +{ + return 0; +} + +template +inline typename std::enable_if< + !HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + size_t moduleRho = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + moduleRho = boost::apply_visitor(RhoVisitor(), layer->Model()[i]); + if (moduleRho != 0) + { + return moduleRho; + } + } + + return moduleRho; +} + +template +inline typename std::enable_if< + HasRho::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +template +inline typename std::enable_if< + HasRho::value && + HasModelCheck&(T::*)()>::value, size_t>::type +RhoVisitor::LayerRho(T* layer) const +{ + return layer->Rho(); +} + +//! DeterministicSetVisitor visitor class. +inline DeterministicSetVisitor::DeterministicSetVisitor( + const bool deterministic) : deterministic(deterministic) +{ + /* Nothing to do here. */ +} + +template +inline void DeterministicSetVisitor::operator()(LayerType* layer) const +{ + LayerDeterministic(layer); +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(DeterministicSetVisitor(deterministic), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* layer) const +{ + layer->Deterministic() = deterministic; +} + +template +inline typename std::enable_if< + !HasDeterministicCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +DeterministicSetVisitor::LayerDeterministic(T* /* input */) const +{ + /* Nothing to do here. */ +} + +//! AddVisitor visitor class. +template +inline AddVisitor::AddVisitor(T newLayer) : + newLayer(std::move(newLayer)) +{ + /* Nothing to do here. */ +} + +template +inline void AddVisitor::operator()(LayerType* layer) const +{ + LayerAdd(layer); +} + +template +inline typename std::enable_if< + HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* layer) const +{ + layer->Add(newLayer); +} + +template +inline typename std::enable_if< + !HasAddCheck::value, void>::type +AddVisitor::LayerAdd(T* /* layer */) const +{ + /* Nothing to do here. */ +} + +//! GradientSetVisitor visitor class. +inline GradientSetVisitor::GradientSetVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientSetVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient() = arma::mat(gradient.memptr() + offset, + layer->Parameters().n_rows, layer->Parameters().n_cols, false, false); + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientSetVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientSetVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientUpdateVisitor visitor class. +inline GradientUpdateVisitor::GradientUpdateVisitor(arma::mat&& gradient, + size_t offset) : + gradient(std::move(gradient)), + offset(offset) +{ + /* Nothing to do here. */ +} + +template +inline size_t GradientUpdateVisitor::operator()(LayerType* layer) const +{ + return LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + return layer->Parameters().n_elem; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + size_t modelOffset = 0; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + HasGradientCheck::value && + HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + if (layer->Parameters().n_elem != 0) + { + layer->Gradient() = gradient.submat(offset, 0, + offset + layer->Parameters().n_elem - 1, 0);; + } + + size_t modelOffset = layer->Parameters().n_elem; + for (size_t i = 0; i < layer->Model().size(); ++i) + { + modelOffset += boost::apply_visitor(GradientUpdateVisitor( + std::move(gradient), modelOffset + offset), layer->Model()[i]); + } + + return modelOffset; +} + +template +inline typename std::enable_if< + !HasGradientCheck::value && + !HasModelCheck&(T::*)()>::value, size_t>::type +GradientUpdateVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + return 0; +} + +//! GradientZeroVisitor visitor class. +inline GradientZeroVisitor::GradientZeroVisitor() +{ + /* Nothing to do here. */ +} + +template +inline void GradientZeroVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient().zeros(); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientZeroVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! GradientVisitor visitor class. +inline GradientVisitor::GradientVisitor(arma::mat&& input, arma::mat&& delta) : + input(std::move(input)), + delta(std::move(delta)) +{ + /* Nothing to do here. */ +} + +template +inline void GradientVisitor::operator()(LayerType* layer) const +{ + LayerGradients(layer, layer->OutputParameter()); +} + +template +inline typename std::enable_if< + HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* layer, arma::mat& /* input */) const +{ + layer->Gradient(std::move(input), std::move(delta), + std::move(layer->Gradient())); +} + +template +inline typename std::enable_if< + !HasGradientCheck::value, void>::type +GradientVisitor::LayerGradients(T* /* layer */, P& /* input */) const +{ + /* Nothing to do here. */ +} + +//! RewardSetVisitor visitor class. +inline RewardSetVisitor::RewardSetVisitor(const double reward) : reward(reward) +{ + /* Nothing to do here. */ +} + +template +inline void RewardSetVisitor::operator()(LayerType* layer) const +{ + LayerReward(layer); +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; + + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + for (size_t i = 0; i < layer->Model().size(); ++i) + { + boost::apply_visitor(RewardSetVisitor(reward), + layer->Model()[i]); + } +} + +template +inline typename std::enable_if< + HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* layer) const +{ + layer->Reward() = reward; +} + +template +inline typename std::enable_if< + !HasRewardCheck::value && + !HasModelCheck&(T::*)()>::value, void>::type +RewardSetVisitor::LayerReward(T* /* input */) const +{ + /* Nothing to do here. */ +} + +} // namespace ann +} // namespace mlpack + +#endif From 1f95e03c3e5e8a9855171a928b7255f886adadb3 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 5 Dec 2016 16:54:23 +0100 Subject: [PATCH 49/82] Minor style fixes. --- src/mlpack/methods/ann/layer/dropout.hpp | 2 +- src/mlpack/methods/ann/layer/negative_log_likelihood.hpp | 7 ------- src/mlpack/methods/ann/layer/select.hpp | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index 57e76f6170d..b5bfa9a3e2c 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -62,7 +62,7 @@ class Dropout * @param rescale If true the input is rescaled when deterministic is False. */ Dropout(const double ratio = 0.5, - const bool rescale = true) : + const bool rescale = true) : ratio(ratio), scale(1.0 / (1.0 - ratio)), rescale(rescale) diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 2f5a863fd8d..0de8cb7cd5b 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -79,9 +79,6 @@ class NegativeLogLikelihood const arma::Mat&& target, arma::Mat&& output) { - // std::cout << "------------------------------------------------------\n"; - // std::cout << "NegativeLogLikelihood\n"; - output = arma::zeros >(input.n_rows, input.n_cols); for (size_t i = 0; i < input.n_cols; ++i) { @@ -91,10 +88,6 @@ class NegativeLogLikelihood output(currentTarget, i) = -1; } - - // std::cout << "output: \n" << output << std::endl; - - // std::cout << "------------------------------------------------------\n"; } //! Get the input parameter. diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index 15ecec36d01..d683830cc88 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -36,7 +36,7 @@ class Select * Create the Select object. * * @param index The column which should be extracted from the given input. - * @param index The number of elements that should be used. + * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0) : index(index), From b09d22ba667d7a3d444d44e6dd0187899bd46c82 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 6 Dec 2016 22:29:46 +0100 Subject: [PATCH 50/82] Refactor recurrent network test. --- src/mlpack/tests/recurrent_network_test.cpp | 169 ++++++++++---------- 1 file changed, 81 insertions(+), 88 deletions(-) diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index c49ae423ae2..ff5daae9ede 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,26 +3,13 @@ * @author Marcus Edel * * Tests the recurrent network. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include -#include -#include -#include -#include -#include - -#include -#include #include -#include -#include - #include +#include +#include +#include #include #include "test_tools.hpp" @@ -78,19 +65,29 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) // times, I'm fine with that. All I want to know is that the network is able // to escape from local minima and to solve the task. size_t successes = 0; + const size_t rho = 10; for (size_t trial = 0; trial < 5; ++trial) { - // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features. - arma::mat input, labels; - GenerateNoisySines(input, labels, 10, 6); + // Generate 12 (2 * 6) noisy sines. A single sine contains rho points/features. + arma::mat input, labelsTemp; + GenerateNoisySines(input, labelsTemp, rho, 6); + + arma::mat labels = arma::zeros(rho, labelsTemp.n_cols); + for (size_t i = 0; i < labelsTemp.n_cols; ++i) + { + const int value = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + labels.col(i).fill(value); + } /* - * Construct a network with 1 input unit, 4 hidden units and 2 output units. - * The hidden layer is connected to itself. The network structure looks like: + * Construct a network with 1 input unit, 4 hidden units and 10 output + * units. The hidden layer is connected to itself. The network structure + * looks like: * * Input Hidden Output - * Layer(1) Layer(4) Layer(2) + * Layer(1) Layer(4) Layer(10) * +-----+ +-----+ +-----+ * | | | | | | * | +------>| +------>| | @@ -100,38 +97,45 @@ BOOST_AUTO_TEST_CASE(SequenceClassificationTest) * . . * ....... */ - LinearLayer<> linearLayer0(1, 4); - RecurrentLayer<> recurrentLayer0(4); - BaseLayer inputBaseLayer; + Add<> add(4); + Linear<> lookup(1, 4); + SigmoidLayer<> sigmoidLayer; + Linear<> linear(4, 4); + Recurrent<> recurrent(add, lookup, linear, sigmoidLayer, rho); - LinearLayer<> hiddenLayer(4, 2); - BaseLayer hiddenBaseLayer; + RNN<> model(rho); + model.Add >(); + model.Add(recurrent); + model.Add >(4, 10); + model.Add >(); - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, inputBaseLayer, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 500 * input.n_cols, -100); - - net.Train(input, labels, opt); + SGD opt(model, 0.1, 500 * input.n_cols, -100); + model.Train(input, labels, opt); arma::mat prediction; - net.Predict(input, prediction); + model.Predict(input, prediction); size_t error = 0; - for (size_t i = 0; i < labels.n_cols; i++) + for (size_t i = 0; i < prediction.n_cols; ++i) { - if (arma::sum(arma::sum(arma::abs(prediction.col(i) - labels.col(i)))) == 0) + arma::mat singlePrediction = prediction.submat((rho - 1) * rho, i, + rho * rho - 1, i); + + const int predictionValue = arma::as_scalar(arma::find( + arma::max(singlePrediction.col(0)) == + singlePrediction.col(0), 1) + 1); + + const int targetValue = arma::as_scalar(arma::find( + arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1; + + if (predictionValue == targetValue) { error++; } } - double classificationError = 1 - double(error) / labels.n_cols; + double classificationError = 1 - double(error) / prediction.n_cols; + if (classificationError <= 0.2) { ++successes; @@ -279,9 +283,7 @@ void GenerateNextEmbeddedReber(const arma::Mat& transitions, /** * Train the specified network and the construct a Reber grammar dataset. */ -template -void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, - bool embedded = false) +void ReberGrammarTestNetwork(bool embedded = false) { // Reber state transition matrix. (The last two columns are the indices to the // next path). @@ -346,36 +348,34 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, * | | | | | | * | +------>| +------>| | * | | ..>| | | | - * +-----+ . +--+--+ +-----+ + * +-----+ . +--+--+ +-- ---+ * . . * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(7, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); + const size_t outputSize = 7; + const size_t inputSize = 7; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - LinearLayer<>hiddenLayer(10, 7); - BaseLayer hiddenBaseLayer; + RNN > model(rho); - BinaryClassificationLayer classOutputLayer; + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); - - RNN net(modules, classOutputLayer); - - SGD opt(net, 0.5, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; - for (size_t i = 0; i < 15; i++) + for (size_t i = 0; i < 40; i++) { for (size_t j = 0; j < trainReberGrammarCount; j++) { inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + + model.Train(inputTemp, labelsTemp, opt); } } @@ -384,10 +384,11 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, // Ask the network to predict the next Reber grammar in the given sequence. for (size_t i = 0; i < testReberGrammarCount; i++) { - arma::mat output; + arma::mat output, prediction; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, prediction); + data::Binarize(prediction, output, 0.5); const size_t reberGrammerSize = 7; std::string inputReber = ""; @@ -429,8 +430,7 @@ void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0, */ BOOST_AUTO_TEST_CASE(ReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM); + ReberGrammarTestNetwork(false); } /** @@ -438,8 +438,7 @@ BOOST_AUTO_TEST_CASE(ReberGrammarTest) */ BOOST_AUTO_TEST_CASE(EmbeddedReberGrammarTest) { - LSTMLayer<> hiddenLayerLSTM(10); - ReberGrammarTestNetwork(hiddenLayerLSTM, true); + ReberGrammarTestNetwork(true); } /* @@ -490,7 +489,6 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) for (size_t i = 2; i < 8; i++) input(2 + rand() % 6, index(i)) = 1; - // Set the prompts which direct the network to give an answer. input(8, 8) = 1; input(9, 9) = 1; @@ -503,8 +501,7 @@ void GenerateDistractedSequence(arma::mat& input, arma::mat& output) * Train the specified network and the construct distracted sequence recall * dataset. */ -template -void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) +void DistractedSequenceRecallTestNetwork() { const size_t trainDistractedSequenceCount = 1000; const size_t testDistractedSequenceCount = 1000; @@ -538,22 +535,18 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) * . . * ....... */ - const size_t lstmSize = 4 * 10; - LinearLayer<> linearLayer0(10, lstmSize); - RecurrentLayer<> recurrentLayer0(10, lstmSize); - - LinearLayer<> hiddenLayer(10, 3); - TanHLayer<> hiddenBaseLayer; - - BinaryClassificationLayer classOutputLayer; - - auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0, - hiddenLayer, hiddenBaseLayer); + const size_t outputSize = 3; + const size_t inputSize = 10; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - RNN net(modules, classOutputLayer); + RNN > model(rho); + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - SGD opt(net, 0.04, 2, -200); + SGD opt(model, 0.1, 2, -50000); arma::mat inputTemp, labelsTemp; for (size_t i = 0; i < 40; i++) @@ -563,7 +556,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) inputTemp = trainInput.at(0, j); labelsTemp = trainLabels.at(0, j); - net.Train(inputTemp, labelsTemp, opt); + model.Train(inputTemp, labelsTemp, opt); } } @@ -576,7 +569,8 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) arma::mat output; arma::mat input = testInput.at(0, i); - net.Predict(input, output); + model.Predict(input, output); + data::Binarize(output, output, 0.5); if (arma::accu(arma::abs(testLabels.at(0, i) - output)) != 0) error += 1; @@ -597,8 +591,7 @@ void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0) */ BOOST_AUTO_TEST_CASE(DistractedSequenceRecallTest) { - LSTMLayer<> hiddenLayerLSTMPeephole(10, true); - DistractedSequenceRecallTestNetwork(hiddenLayerLSTMPeephole); + DistractedSequenceRecallTestNetwork(); } BOOST_AUTO_TEST_SUITE_END(); From 89dd57bec8d37aa7894e5b9eb6a267eb48043b7e Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 7 Dec 2016 19:21:41 +0100 Subject: [PATCH 51/82] Remove unused pooling test. --- src/mlpack/tests/pooling_rules_test.cpp | 80 ------------------------- 1 file changed, 80 deletions(-) delete mode 100644 src/mlpack/tests/pooling_rules_test.cpp diff --git a/src/mlpack/tests/pooling_rules_test.cpp b/src/mlpack/tests/pooling_rules_test.cpp deleted file mode 100644 index 0dd2c9dcb24..00000000000 --- a/src/mlpack/tests/pooling_rules_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file convolution_test.cpp - * @author Marcus Edel - * - * Tests for various convolution strategies. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#include - -#include -#include - -#include -#include "test_tools.hpp" - -using namespace mlpack; -using namespace mlpack::ann; - -BOOST_AUTO_TEST_SUITE(PoolingTest); - -/** - * Test the max pooling rule. - */ -BOOST_AUTO_TEST_CASE(MaxPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 36; - - MaxPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - BOOST_REQUIRE_EQUAL(arma::accu(output), input.max()); -} - -/** - * Test the mean pooling rule. - */ -BOOST_AUTO_TEST_CASE(MeanPoolingTest) -{ - // The data was generated by magic(6) in MATLAB. - arma::mat input, output; - input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr - << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr - << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr - << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr - << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr - << 4 << 36 << 29 << 13 << 18 << 11; - - // Expected output of the generated 6 x 6 matrix. - const double poolingOutput = 18.5; - - MeanPooling poolingRule; - - // Test the pooling function. - BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput); - - // Test the unpooling function. - poolingRule.Unpooling(input, input.max(), output); - bool b = arma::all(arma::vectorise(output) == (input.max() / input.n_elem)); - BOOST_REQUIRE_EQUAL(b, true); -} - -BOOST_AUTO_TEST_SUITE_END(); From 9d3d87896b7ab8ae7f942a95d06deb2399032b44 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 8 Dec 2016 23:19:17 +0100 Subject: [PATCH 52/82] Refactor FNN class; works for CNNs and FFNs --- src/mlpack/methods/ann/ffn.hpp | 398 +++++++----------------- src/mlpack/methods/ann/ffn_impl.hpp | 461 ++++++++++++++++------------ 2 files changed, 383 insertions(+), 476 deletions(-) diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index f9bc4d53a8b..6a6f013683d 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -14,10 +14,10 @@ #include -#include -#include +#include +#include +#include #include -#include #include namespace mlpack { @@ -26,25 +26,20 @@ namespace ann /** Artificial Neural Network. */ { /** * Implementation of a standard feed forward network. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> + +// NguyenWidrowInitialization +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class FFN { public: //! Convenience typedef for the internal model construction. - using NetworkType = FFN; + using NetworkType = FFN; /** * Create the FFN object with the given predictors and responses set (this is @@ -52,100 +47,32 @@ class FFN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + FFN(OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the FFN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Create the FNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. + * @param responses Outputs results from input training variables. * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Train the feedforward network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::SGD). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(const arma::mat& predictors, const arma::mat& responses); + FFN(const arma::mat& predictors, + const arma::mat& responses, + OutputLayerType&& outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Train the feedforward network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::RMSprop - > - void Train(OptimizerType& optimizer); + //! Destructor to release allocated memory. + ~FFN(); /** * Train the feedforward network on the given input data using the given @@ -170,7 +97,7 @@ class FFN /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -184,7 +111,7 @@ class FFN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ double Evaluate(const arma::mat& parameters, const size_t i, @@ -203,6 +130,21 @@ class FFN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -216,214 +158,61 @@ class FFN void Serialize(Archive& ar, const unsigned int /* version */); private: + // Helper functions. /** - * Reset the network by zeroing the layer activations and by setting the - * layer status. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. * - * enable_if (SFINAE) is used to iterate through the network. The general - * case peels off the first type and recurses, as usual with - * variadic function templates. + * @param input Data sequence to compute probabilities for. */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetParameter(network); - } + void Forward(arma::mat&& input); /** - * Reset the layer status by setting the current deterministic parameter - * through all layer that implement the Deterministic function. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } + void Backward(); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - LinkParameter(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void Gradient(); /** - * Link the calculated activation with the connection layer. + * Reset the module infomration (weights/parameters). */ - template - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } - - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Measures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + void ResetParameters(); /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template - typename std::enable_if::type - Backward(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, - std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + void ResetDeterministic(); /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& /* unused */) { /* Nothing to do here */ } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Update(T& layer, P& /* unused */, D& delta) - { - layer.Gradient(layer.InputParameter(), delta, layer.Gradient()); - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Update(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + void ResetGradients(arma::mat& gradient); - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } - - //! Instantiated feedforward network. - LayerTypes network; - - //! The output layer used to evaluate the network + //! Instantiated outputlayer used to evaluate the network. OutputLayerType outputLayer; - //! Performance strategy used to calculate the error. - PerformanceFunction performanceFunc; + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The input width. + size_t width; - //! Matrix of (trained) parameters. - arma::mat parameter; + //! The input height. + size_t height; + + //! Indicator if we already trained the model. + bool reset; + + //! Locally-stored model modules. + std::vector network; //! The matrix of data points (predictors). arma::mat predictors; @@ -431,11 +220,56 @@ class FFN //! The matrix of responses to the input data points. arma::mat responses; + //! Matrix of (trained) parameters. + arma::mat parameter; + //! The number of separable functions (the number of predictor points). size_t numFunctions; - //! Locally stored backward error. + //! The current error for the backward pass. arma::mat error; + + //! THe current input of the forward/backward pass. + arma::mat currentInput; + + //! THe current target of the forward/backward pass. + arma::mat currentTarget; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored output width visitor. + OutputWidthVisitor outputWidthVisitor; + + //! Locally-stored output height visitor. + OutputHeightVisitor outputHeightVisitor; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; + + //! The current evaluation mode (training or testing). + bool deterministic; + + //! Locally-stored delta object. + arma::mat delta; + + //! Locally-stored input parameter object. + arma::mat inputParameter; + + //! Locally-stored output parameter object. + arma::mat outputParameter; + + //! Locally-stored gradient parameter. + arma::mat gradient; }; // class FFN } // namespace ann diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index 5b1cc611871..d640781fbb2 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -19,170 +19,76 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols) +template +FFN::FFN(OutputLayerType&& outputLayer, InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, +template +FFN::FFN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) + OutputLayerType&& outputLayer, + InitializationRuleType initializeRule) : + outputLayer(std::move(outputLayer)), + initializeRule(initializeRule), + width(0), + height(0), + reset(false) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); + if (!reset) + { + ResetParameters(); + } } -template -template -FFN::FFN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)) +template +FFN::~FFN() { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deleteVisitor)); } -template +template template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) -{ - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); - - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} - -template -template class OptimizerType> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void FFN::Train(const arma::mat& predictors, const arma::mat& responses, OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("ffn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("ffn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "FFN::FFN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + } -template -template< - template class OptimizerType -> -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("ffn_optimization"); const double out = optimizer.Optimize(parameter); @@ -192,101 +98,268 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +template +void FFN::Predict(arma::mat& predictors, arma::mat& responses) { - deterministic = true; + if (parameter.is_empty()) + { + ResetParameters(); + } + + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } arma::mat responsesTemp; - ResetParameter(network); - Forward(arma::mat(predictors.colptr(0), predictors.n_rows, 1, false, true), - network); - OutputPrediction(responsesTemp, network); + Forward(std::move(arma::mat(predictors.colptr(0), + predictors.n_rows, 1, false, true))); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()).col(0); responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); responses.col(0) = responsesTemp.col(0); for (size_t i = 1; i < predictors.n_cols; i++) { - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(arma::mat(predictors.colptr(i), + predictors.n_rows, 1, false, true))); - responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true); - OutputPrediction(responsesTemp, network); + responsesTemp = boost::apply_visitor(outputParameterVisitor, + network.back()); responses.col(i) = responsesTemp.col(0); } } -template -double FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, +template +double FFN::Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic) { - this->deterministic = deterministic; + if (parameter.is_empty()) + { + ResetParameters(); + } + + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } - ResetParameter(network); + currentInput = std::move(arma::mat(predictors.colptr(i), + predictors.n_rows, 1, false, true)); - Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), - network); + Forward(std::move(currentInput)); - return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false, - true), error, network); + currentTarget = arma::mat(responses.colptr(i), responses.n_rows, + 1, false, true); + + double res = outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); + + return res; } -template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, +template +void FFN::Gradient(const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } + else + { + gradient.zeros(); + } + + Evaluate(parameters, i, false); + outputLayer.Backward(std::move(boost::apply_visitor(outputParameterVisitor, + network.back())), std::move(currentTarget), std::move(error)); - Evaluate(parameter, i, false); + Backward(); + ResetGradients(gradient); + Gradient(); +} - NetworkGradients(gradient, network); +template +void FFN::ResetParameters() +{ + size_t weights = 0; + for (size_t i = 0; i < network.size(); ++i) + { + weights += boost::apply_visitor(weightSizeVisitor, network[i]); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); - Backward<>(error, network); - UpdateGradients<>(network); + boost::apply_visitor(resetVisitor, network[i]); + } +} + +template +void FFN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} + +template +void FFN::ResetGradients(arma::mat& gradient) +{ + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), network[i]); + } } -template +template +void FFN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } +} + +template +void FFN::Backward() +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(error), std::move( + boost::apply_visitor(deltaVisitor, network.back()))), network.back()); + + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } +} + +template +void FFN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - 2])), std::move(error)), + network[network.size() - 1]); +} + +template template -void FFN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void FFN::Serialize(Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(width, "width"); + ar & data::CreateNVP(height, "height"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (size_t i = 0; i < network.size(); ++i) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), network[i]); + + boost::apply_visitor(resetVisitor, network[i]); + } } } From e362608f74bb236b1afe99bea6888e364810bb11 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 9 Dec 2016 21:40:20 +0100 Subject: [PATCH 53/82] Refactor RNN class; works will all current modules including the updated recurrent module. --- src/mlpack/methods/ann/rnn.hpp | 749 +++++----------------------- src/mlpack/methods/ann/rnn_impl.hpp | 525 ++++++++++--------- 2 files changed, 415 insertions(+), 859 deletions(-) diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp index 6b9483cd831..6c5c69968d8 100644 --- a/src/mlpack/methods/ann/rnn.hpp +++ b/src/mlpack/methods/ann/rnn.hpp @@ -14,39 +14,29 @@ #include -#include - -#include -#include -#include -#include +#include +#include +#include #include namespace mlpack { namespace ann /** Artificial Neural Network. */ { /** - * Implementation of a standard recurrent neural network. + * Implementation of a standard recurrent neural network container. * - * @tparam LayerTypes Contains all layer modules used to construct the network. * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. - * @tparam PerformanceFunction Performance strategy used to calculate the error. */ -template < - typename LayerTypes, - typename OutputLayerType, - typename InitializationRuleType = NguyenWidrowInitialization, - class PerformanceFunction = CrossEntropyErrorFunction<> +template< + typename OutputLayerType = NegativeLogLikelihood<>, + typename InitializationRuleType = RandomInitialization > class RNN { public: //! Convenience typedef for the internal model construction. - using NetworkType = RNN; + using NetworkType = RNN; /** * Create the RNN object with the given predictors and responses set (this is @@ -54,100 +44,40 @@ class RNN * Optionally, specify which initialize rule and performance function should * be used. * - * @param network Network modules used to construct the network. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. * @param outputLayer Output layer used to evaluate the network. - * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. - * @param optimizer Instantiated optimizer used to train the model. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template class OptimizerType> - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + RNN(const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); /** * Create the RNN object with the given predictors and responses set (this is - * the set that is used to train the network). Optionally, specify which - * initialize rule and performance function should be used. + * the set that is used to train the network) and the given optimizer. + * Optionally, specify which initialize rule and performance function should + * be used. * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. * @param predictors Input training variables. - * @param responses Outputs resulting from input training variables. + * @param responses Outputs results from input training variables. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + * @param single Predict only the last element of the input sequence. + * @param outputLayer Output layer used to evaluate the network. * @param initializeRule Optional instantiated InitializationRule object * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, + RNN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); + const size_t rho, + const bool single = false, + OutputLayerType outputLayer = OutputLayerType(), + InitializationRuleType initializeRule = InitializationRuleType()); - /** - * Create the RNN object with an empty predictors and responses set and - * default optimizer. Make sure to call Train(predictors, responses) when - * training. - * - * @param network Network modules used to construct the network. - * @param outputLayer Output layer used to evaluate the network. - * @param initializeRule Optional instantiated InitializationRule object - * for initializing the network parameter. - * @param performanceFunction Optional instantiated PerformanceFunction - * object used to calculate the error. - */ - template - RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule = InitializationRuleType(), - PerformanceFunction performanceFunction = PerformanceFunction()); - - /** - * Train the recurrent neural network on the given input data. By default, the - * SGD optimization algorithm is used, but others can be specified - * (such as mlpack::optimization::RMSprop). - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @tparam OptimizerType Type of optimizer to use to train the model. - * @param predictors Input training variables. - * @param responses Outputs results from input training variables. - */ - template< - template class OptimizerType = mlpack::optimization::SGD - > - void Train(const arma::mat& predictors, const arma::mat& responses); - - /** - * Train the recurrent neural network with the given instantiated optimizer. - * Using this overload allows configuring the instantiated optimizer before - * training is performed. - * - * This will use the existing model parameters as a starting point for the - * optimization. If this is not what you want, then you should access the - * parameters vector directly with Parameters() and modify it as desired. - * - * @param optimizer Instantiated optimizer used to train the model. - */ - template< - template class OptimizerType = mlpack::optimization::SGD - > - void Train(OptimizerType& optimizer); + //! Destructor to release allocated memory. + ~RNN(); /** * Train the recurrent neural network on the given input data using the given @@ -172,7 +102,7 @@ class RNN /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the - * OutputClass() function. + * output layer function. * * @param predictors Input predictors. * @param responses Matrix to put output predictions of responses into. @@ -186,9 +116,9 @@ class RNN * @param parameters Matrix model parameters. * @param i Index of point to use for objective function evaluation. * @param deterministic Whether or not to train or test the model. Note some - * layer act differently in training or testing mode. + * layer act differently in training or testing mode. */ - double Evaluate(const arma::mat& parameters, + double Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic = true); @@ -206,6 +136,29 @@ class RNN const size_t i, arma::mat& gradient); + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + template + void Add(const LayerType& layer) { network.push_back(new LayerType(layer)); } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + //! Return the number of separable functions (the number of predictor points). size_t NumFunctions() const { return numFunctions; } @@ -219,575 +172,117 @@ class RNN void Serialize(Archive& ar, const unsigned int /* version */); private: - /* - * Predict the response of the given input matrix. - */ - template - void SinglePredict(const DataType& input, DataType& output) - { - deterministic = true; - seqLen = input.n_rows / inputSize; - ResetParameter(network); - - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) - { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); - - // Retrieve output of the subsequence. - if (seqOutput) - { - DataType seqOutput; - OutputPrediction(seqOutput, network); - output = arma::join_cols(output, seqOutput); - } - } - - // Retrieve output of the complete sequence. - if (!seqOutput) - OutputPrediction(output, network); - } - - /** - * Reset the network by clearing the layer activations and by setting the - * layer status. - */ - template - typename std::enable_if::type - ResetParameter(std::tuple& /* unused */) - { - activations.clear(); - } - - template - typename std::enable_if::type - ResetParameter(std::tuple& network) - { - ResetDeterministic(std::get(network)); - ResetSeqLen(std::get(network)); - ResetRecurrent(std::get(network), std::get(network).InputParameter()); - std::get(network).Delta().zeros(); - - ResetParameter(network); - } - - /** - * Reset the layer status by setting the current deterministic parameter - * for all layer that implement the Deterministic function. - */ - template - typename std::enable_if< - HasDeterministicCheck::value, void>::type - ResetDeterministic(T& layer) - { - layer.Deterministic() = deterministic; - } - - template - typename std::enable_if< - !HasDeterministicCheck::value, void>::type - ResetDeterministic(T& /* unused */) { /* Nothing to do here */ } - - /** - * Reset the layer sequence length by setting the current seqLen parameter - * for all layer that implement the SeqLen function. - */ - template - typename std::enable_if< - HasSeqLenCheck::value, void>::type - ResetSeqLen(T& layer) - { - layer.SeqLen() = seqLen; - } - - template - typename std::enable_if< - !HasSeqLenCheck::value, void>::type - ResetSeqLen(T& /* unused */) { /* Nothing to do here */ } - - /** - * Distinguish between recurrent layer and non-recurrent layer when resetting - * the recurrent parameter. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& layer, P& /* unused */) - { - layer.RecurrentParameter().zeros(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - ResetRecurrent(T& /* unused */, P& /* unused */) - { - /* Nothing to do here */ - } - + // Helper functions. /** - * Initialize the network by setting the input size and output size. - */ - template - typename std::enable_if::type - InitLayer(const InputDataType& /* unused */, - const TargetDataType& target, - std::tuple& /* unused */) - { - seqOutput = outputSize < target.n_elem ? true : false; - } - - template - typename std::enable_if::type - InitLayer(const InputDataType& input, - const TargetDataType& target, - std::tuple& network) - { - Init(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta()); - - InitLayer(input, target, - network); - } - - /** - * Retrieve the weight matrix for all layer that implement the Weights - * function to extract the input size and output size. + * The Forward algorithm (part of the Forward-Backward algorithm). Computes + * forward probabilities for each module. + * + * @param input Data sequence to compute probabilities for. */ - template - typename std::enable_if< - HasGradientCheck::value, void>::type - Init(T& layer, P& /* unused */, D& /* unused */) - { - // Initialize the input size only once. - if (!inputSize) - inputSize = layer.Weights().n_cols; - - outputSize = layer.Weights().n_rows; - } - - template - typename std::enable_if< - !HasGradientCheck::value, void>::type - Init(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + void Forward(arma::mat&& input); /** - * Save the network layer activations. + * The Backward algorithm (part of the Forward-Backward algorithm). Computes + * backward pass for module. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& /* unused */) - { - Save(I, std::get(network), std::get(network).InputParameter()); - LinkRecurrent(network); - } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - SaveActivations(std::tuple& network) - { - Save(I, std::get(network), std::get(network).InputParameter()); - SaveActivations(network); - } + void Backward(); /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Save(const size_t layerNumber, T& layer, P& /* unused */) - { - if (activations.size() == layerNumber) - { - activations.push_back(new arma::mat(layer.OutputParameter().n_rows, - seqLen)); - } - - activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter(); - } - - /** - * Load the network layer activations. + * Iterate through all layer modules and update the the gradient using the + * layer defined optimizer. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - std::get<0>(network).InputParameter() = input; - } - - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename DataType, typename... Tp - > - typename std::enable_if::type - LoadActivations(DataType& input, std::tuple& network) - { - Load(I, std::get(network), std::get(network).InputParameter()); - LoadActivations(input, network); - } + void Gradient(); - /** - * Distinguish between recurrent layer and non-recurrent layer when storing - * the activations. + /* + * Predict the response of the given input sequence. + * + * @param predictors Input predictors. + * @param responses Vector to put output prediction of a response into. */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - Load(const size_t layerNumber, T& layer, P& /* unused */) - { - layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum); - } + void SinglePredict(const arma::mat& predictors, arma::mat& responses); /** - * Run a single iteration of the feed forward algorithm, using the given - * input and target vector, store the calculated error into the error - * vector. + * Reset the module infomration (weights/parameters). */ - template - void Forward(const DataType& input, std::tuple& network) - { - std::get(network).InputParameter() = input; - std::get(network).Forward(std::get(network).InputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } - - template - typename std::enable_if::type - ForwardTail(std::tuple& /* unused */) { /* Nothing to do here */ } - - template - typename std::enable_if::type - ForwardTail(std::tuple& network) - { - std::get(network).Forward(std::get(network).OutputParameter(), - std::get(network).OutputParameter()); - - ForwardTail(network); - } + void ResetParameters(); /** - * Link the calculated activation with the correct layer. + * Reset the module status by setting the current deterministic parameter + * for all modules that implement the Deterministic function. */ - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& /* unused */) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - } - - template< - size_t I = 1, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkParameter(std::tuple& network) - { - if (!LayerTraits(network))>::type>::IsBiasLayer) - { - std::get(network).InputParameter() = std::get( - network).OutputParameter(); - } - - LinkParameter(network); - } + void ResetDeterministic(); /** - * Link the calculated activation with the correct recurrent layer. + * Reset the gradient for all modules that implement the Gradient function. */ - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& /* unused */) { /* Nothing to do here */ } + void ResetGradients(arma::mat& gradient); - template< - size_t I = 0, - size_t Max = std::tuple_size::value - 1, - typename... Tp - > - typename std::enable_if::type - LinkRecurrent(std::tuple& network) - { - UpdateRecurrent(std::get(network), std::get(network).InputParameter(), - std::get(network).OutputParameter()); - LinkRecurrent(network); - } + //! Number of steps to backpropagate through time (BPTT). + size_t rho; - /** - * Distinguish between recurrent layer and non-recurrent layer when updating - * the recurrent activations. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& layer, P& /* unused */, D& output) - { - layer.RecurrentParameter() = output; - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! Instantiated outputlayer used to evaluate the network. + OutputLayerType outputLayer; - /* - * Calculate the output error and update the overall error. - */ - template - double OutputError(const DataType& target, - ErrorType& error, - const std::tuple& network) - { - // Calculate and store the output error. - outputLayer.CalculateError( - std::get(network).OutputParameter(), target, error); - - // Masures the network's performance with the specified performance - // function. - return performanceFunc.Error(network, target, error); - } + //! Instantiated InitializationRule object for initializing the network + //! parameter. + InitializationRuleType initializeRule; - /** - * Run a single iteration of the feed backward algorithm, using the given - * error of the output layer. Note that we iterate backward through the - * layer modules. - */ - template - void Backward(DataType& error, std::tuple& network) - { - std::get(network).Backward( - std::get(network).OutputParameter(), error, - std::get(network).Delta()); - - BackwardTail(error, network); - } - - template - typename std::enable_if::type - BackwardTail(const DataType& /* unused */, std::tuple& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if::type - BackwardTail(const DataType& error, std::tuple& network) - { - BackwardRecurrent(std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - std::get(network).Backward( - std::get(network).OutputParameter(), - std::get(network).Delta(), - std::get(network).Delta()); - - BackwardTail(error, network); - } + //! The input size. + size_t inputSize; - /* - * Update the delta of the recurrent layer. - */ - template - typename std::enable_if< - HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& layer, P& /* unused */, D& delta) - { - if (!layer.Delta().is_empty()) - delta += layer.Delta(); - } - - template - typename std::enable_if< - !HasRecurrentParameterCheck::value, void>::type - BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */) - { - /* Nothing to do here */ - } + //! The output size. + size_t outputSize; - /** - * Iterate through all layer modules and update the the gradient using the - * layer defined optimizer. - */ - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - } - - template::value - 2, - typename... Tp> - typename std::enable_if::type - UpdateGradients(std::tuple& network) - { - Update(std::get(network), std::get(network).OutputParameter(), - std::get(network).Delta(), std::get(network), - std::get(network).InputParameter(), - std::get(network).Delta()); - - UpdateGradients(network); - } - - template - typename std::enable_if< - HasGradientCheck::value && - HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& delta2) - { - layer.Gradient(layer.InputParameter(), delta2, layer.Gradient()); - } - - template - typename std::enable_if< - (!HasGradientCheck::value && - !HasRecurrentParameterCheck::value) || - (!HasGradientCheck::value && - HasRecurrentParameterCheck::value), void>::type - Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - /* Nothing to do here */ - } - - template - typename std::enable_if< - HasGradientCheck::value && - !HasRecurrentParameterCheck::value, void>::type - Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */, - P2& /* unused */, D2& /* unused */) - { - layer.Gradient(layer.InputParameter(), delta1, layer.Gradient()); - } + //! The target size. + size_t targetSize; - /* - * Calculate and store the output activation. - */ - template - void OutputPrediction(DataType& output, std::tuple& network) - { - // Calculate and store the output prediction. - outputLayer.OutputClass(std::get( - network).OutputParameter(), output); - } + //! Indicator if we already trained the model. + bool reset; - //! Instantiated recurrent neural network. - LayerTypes network; + //! Only predict the last element of the input sequence. + bool single; - //! The outputlayer used to evaluate the network - OutputLayerType& outputLayer; + //! Locally-stored model modules. + std::vector network; - //! Performance strategy used to claculate the error. - PerformanceFunction performanceFunc; + //! The matrix of data points (predictors). + arma::mat predictors; - //! The current evaluation mode (training or testing). - bool deterministic; + //! The matrix of responses to the input data points. + arma::mat responses; //! Matrix of (trained) parameters. arma::mat parameter; - //! The matrix of data points (predictors). - arma::mat predictors; + //! The number of separable functions (the number of predictor points). + size_t numFunctions; - //! The matrix of responses to the input data points. - arma::mat responses; + //! The current error for the backward pass. + arma::mat error; - //! Locally stored network input size. - size_t inputSize; + //! THe current input of the forward/backward pass. + arma::mat currentInput; - //! Locally stored network output size. - size_t outputSize; + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; - //! The index of the current sequence number. - size_t seqNum; + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; - //! Locally stored number of samples in one input sequence. - size_t seqLen; + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; - //! Locally stored parameter that indicates if the input is a sequence. - bool seqOutput; + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; - //! The activation storage we are using to perform the feed backward pass. - boost::ptr_vector activations; + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; - //! The number of separable functions (the number of predictor points). - size_t numFunctions; + //! Locally-stored delete visitor. + DeleteVisitor deleteVisitor; - //! Locally stored backward error. - arma::mat error; + //! The current evaluation mode (training or testing). + bool deterministic; }; // class RNN } // namespace ann diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp index d8d2f07f0ac..a2abb2ce6c3 100644 --- a/src/mlpack/methods/ann/rnn_impl.hpp +++ b/src/mlpack/methods/ann/rnn_impl.hpp @@ -19,176 +19,91 @@ namespace mlpack { namespace ann /** Artificial Neural Network. */ { -template -template class OptimizerType -> -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - predictors(predictors), - responses(responses), - numFunctions(predictors.n_cols), +template +RNN::RNN(const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); - - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); - - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; + /* Nothing to do here */ } -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - const arma::mat& predictors, +template +RNN::RNN(const arma::mat& predictors, const arma::mat& responses, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : + rho(rho), + outputLayer(outputLayer), + initializeRule(initializeRule), inputSize(0), - outputSize(0) + outputSize(0), + targetSize(0), + reset(false), + single(single) { - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); + numFunctions = responses.n_cols; - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + this->deterministic = true; + ResetDeterministic(); - Train(predictors, responses); -} - -template -template -RNN::RNN(LayerType &&network, - OutputType &&outputLayer, - InitializationRuleType initializeRule, - PerformanceFunction performanceFunction) : - network(std::forward(network)), - outputLayer(std::forward(outputLayer)), - performanceFunc(std::move(performanceFunction)), - inputSize(0), - outputSize(0) -{ - static_assert(std::is_same::type, - LayerTypes>::value, - "The type of network must be LayerTypes."); - - static_assert(std::is_same::type, - OutputLayerType>::value, - "The type of outputLayer must be OutputLayerType."); - - initializeRule.Initialize(parameter, NetworkSize(this->network), 1); - NetworkWeights(parameter, this->network); + if (!reset) + { + ResetParameters(); + reset = true; + } } -template -template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(const arma::mat& predictors, const arma::mat& responses) +template +RNN::~RNN() { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; - - OptimizerType optimizer(*this); - - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); - - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } } -template +template template class OptimizerType> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void RNN::Train(const arma::mat& predictors, const arma::mat& responses, OptimizerType& optimizer) { - numFunctions = predictors.n_cols; - this->predictors = predictors; - this->responses = responses; + numFunctions = responses.n_cols; - // Train the model. - Timer::Start("rnn_optimization"); - const double out = optimizer.Optimize(parameter); - Timer::Stop("rnn_optimization"); + this->predictors = std::move(predictors); + this->responses = std::move(responses); - Log::Info << "RNN::RNN(): final objective of trained model is " << out - << "." << std::endl; -} + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + reset = true; + } -template -template< - template class OptimizerType -> -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Train(OptimizerType& optimizer) -{ // Train the model. Timer::Start("rnn_optimization"); const double out = optimizer.Optimize(parameter); @@ -198,96 +113,128 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction << "." << std::endl; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +template +void RNN::Predict(arma::mat& predictors, arma::mat& responses) { - arma::mat responsesTemp; - SinglePredict(arma::mat(predictors.colptr(0), predictors.n_rows, - 1, false, true), responsesTemp); + if (parameter.is_empty()) + { + ResetParameters(); + } - responses = arma::mat(responsesTemp.n_elem, predictors.n_cols); - responses.col(0) = responsesTemp.col(0); + if (!deterministic) + { + deterministic = true; + ResetDeterministic(); + } + + responses = arma::zeros(outputSize * rho, predictors.n_cols); + arma::mat responsesTemp = responses.col(0); + + for (size_t i = 0; i < predictors.n_cols; i++) + { + SinglePredict( + arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true), + responsesTemp); + + responses.col(i) = responsesTemp; + } +} - for (size_t i = 1; i < predictors.n_cols; i++) +template +void RNN::SinglePredict(const arma::mat& predictors, arma::mat& responses) +{ + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - SinglePredict(arma::mat(predictors.colptr(i), predictors.n_rows, - 1, false, true), responsesTemp); - responses.col(i) = responsesTemp.col(0); + currentInput = predictors.rows(seqNum * inputSize, + (seqNum + 1) * inputSize - 1); + Forward(std::move(currentInput)); + + responses.rows(seqNum * outputSize, (seqNum + 1) * outputSize - 1) = + boost::apply_visitor(outputParameterVisitor, network.back()); } } -template -double RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Evaluate(const arma::mat& /* unused */, +template +double RNN::Evaluate(const arma::mat& /* parameters */, const size_t i, const bool deterministic) { - this->deterministic = deterministic; + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + + if (deterministic != this->deterministic) + { + this->deterministic = deterministic; + ResetDeterministic(); + } arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true); arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Initialize the activation storage only once. - if (activations.empty()) - InitLayer(input, target, network); - - double networkError = 0; - seqLen = input.n_rows / inputSize; - ResetParameter(network); + if (!inputSize) + { + inputSize = input.n_elem / rho; + targetSize = target.n_elem / rho; + } - error = arma::mat(outputSize, outputSize < target.n_elem ? seqLen : 1); + double performance = 0; - // Iterate through the input sequence and perform the feed forward pass. - for (seqNum = 0; seqNum < seqLen; seqNum++) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Perform the forward pass and save the activations. - Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1), - network); - SaveActivations(network); + currentInput = input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1); + arma::mat currentTarget = target.rows(seqNum * targetSize, + (seqNum + 1) * targetSize - 1); - // Retrieve output error of the subsequence. - if (seqOutput) + Forward(std::move(currentInput)); + + if (!deterministic) { - arma::mat seqError = error.unsafe_col(seqNum); - arma::mat seqTarget = target.submat(seqNum * outputSize, 0, - (seqNum + 1) * outputSize - 1, 0); - networkError += OutputError(seqTarget, seqError, network); + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } } + + performance += outputLayer.Forward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget)); } - // Retrieve output error of the complete sequence. - if (!seqOutput) - return OutputError(target, error, network); + if (!outputSize) + { + outputSize = boost::apply_visitor(outputParameterVisitor, + network.back()).n_elem; + } - return networkError; + return performance; } -template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction ->::Gradient(const arma::mat& /* unused */, +template +void RNN::Gradient(const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { + if (parameter.is_empty()) + { + ResetParameters(); + reset = true; + } + gradient = arma::zeros(parameter.n_rows, parameter.n_cols); } else @@ -295,59 +242,173 @@ LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction gradient.zeros(); } - Evaluate(parameter, i, false); + Evaluate(parameters, i, false); - arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols); - NetworkGradients(currentGradient, network); + arma::mat currentGradient = arma::zeros(parameter.n_rows, + parameter.n_cols); + ResetGradients(currentGradient); - const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows, + 1, false, true); + arma::mat target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true); - // Iterate through the input sequence and perform the feed backward pass. - for (seqNum = seqLen - 1; seqNum >= 0; seqNum--) + for (size_t seqNum = 0; seqNum < rho; ++seqNum) { - // Load the network activation for the upcoming backward pass. - LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) * - inputSize - 1), network); + currentGradient.zeros(); + + arma::mat currentTarget = target.rows((rho - seqNum - 1) * targetSize, + (rho - seqNum) * targetSize - 1); + currentInput = input.rows((rho - seqNum - 1) * inputSize, + (rho - seqNum) * inputSize - 1); - // Perform the backward pass. - if (seqOutput) + for (size_t l = 0; l < network.size(); ++l) { - arma::mat seqError = error.unsafe_col(seqNum); - Backward(seqError, network); + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (single && seqNum > 0) + { + error.zeros(); } else { - Backward(error, network); + outputLayer.Backward(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(currentTarget), + std::move(error)); } - // Link the parameters and update the gradients. - LinkParameter(network); - UpdateGradients<>(network); - - // Update the overall gradient. + Backward(); + Gradient(); gradient += currentGradient; + } +} + +template +void RNN::ResetParameters() +{ + size_t weights = 0; + for (LayerTypes& layer : network) + { + weights += boost::apply_visitor(weightSizeVisitor, layer); + } + + parameter.set_size(weights, 1); + initializeRule.Initialize(parameter, parameter.n_elem, 1); + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } +} + +template +void RNN::ResetDeterministic() +{ + DeterministicSetVisitor deterministicSetVisitor(deterministic); + std::for_each(network.begin(), network.end(), + boost::apply_visitor(deterministicSetVisitor)); +} - if (seqNum == 0) break; +template +void RNN::ResetGradients(arma::mat& gradient) +{ + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(GradientSetVisitor(std::move(gradient), + offset), layer); } } -template +template +void RNN::Forward(arma::mat&& input) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + } +} + +template +void RNN::Backward() +{ + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network.back())), + std::move(error), std::move(boost::apply_visitor(deltaVisitor, + network.back()))), network.back()); + + for (size_t i = 2; i < network.size(); ++i) + { + boost::apply_visitor(BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + network[network.size() - i])), std::move(boost::apply_visitor( + deltaVisitor, network[network.size() - i + 1])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i]))), + network[network.size() - i]); + } +} + +template +void RNN::Gradient() +{ + boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( + boost::apply_visitor(deltaVisitor, network[1]))), network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, network[i - 1])), + std::move(boost::apply_visitor(deltaVisitor, network[i + 1]))), + network[i]); + } +} + +template template -void RNN< -LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction +void RNN::Serialize(Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(single, "single"); + ar & data::CreateNVP(inputSize, "inputSize"); + ar & data::CreateNVP(outputSize, "outputSize"); + ar & data::CreateNVP(targetSize, "targetSize"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) { - NetworkWeights(parameter, network); + reset = false; + + size_t offset = 0; + for (LayerTypes& layer : network) + { + offset += boost::apply_visitor(WeightSetVisitor(std::move(parameter), + offset), layer); + + boost::apply_visitor(resetVisitor, layer); + } } } From f54949c561057291450bddfaef8bb35f073af56a Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 10 Dec 2016 21:46:54 +0100 Subject: [PATCH 54/82] Include all layer modules. --- src/mlpack/methods/ann/layer/layer.hpp | 30 +++++ src/mlpack/methods/ann/layer/layer_types.hpp | 117 +++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/layer.hpp create mode 100644 src/mlpack/methods/ann/layer/layer_types.hpp diff --git a/src/mlpack/methods/ann/layer/layer.hpp b/src/mlpack/methods/ann/layer/layer.hpp new file mode 100644 index 00000000000..c1de77741e9 --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer.hpp @@ -0,0 +1,30 @@ +/** + * @file layer.hpp + * @author Marcus Edel + * + * This includes various layers to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_HPP + +#include "add_merge.hpp" +#include "concat_performance.hpp" +#include "convolution.hpp" +#include "dropconnect.hpp" +#include "glimpse.hpp" +#include "layer_types.hpp" +#include "linear.hpp" +#include "linear_no_bias.hpp" +#include "lstm.hpp" +#include "recurrent.hpp" +#include "recurrent_attention.hpp" +#include "sequential.hpp" +#include "concat.hpp" +#include "vr_class_reward.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp new file mode 100644 index 00000000000..156616864db --- /dev/null +++ b/src/mlpack/methods/ann/layer/layer_types.hpp @@ -0,0 +1,117 @@ +/** + * @file layer_types.hpp + * @author Marcus Edel + * + * This provides a list of all modules that can be used to construct a model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP +#define MLPACK_METHODS_ANN_LAYER_LAYER_TYPES_HPP + +#include + +// Layer modules. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Convolution modules. +#include +#include +#include + +namespace mlpack { +namespace ann { + +template class AddMerge; +template class Concat; +template class DropConnect; +template class Glimpse; +template class Linear; +template class LinearNoBias; +template class LSTM; +template class Recurrent; +template class Sequential; +template class VRClassReward; + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +class ConcatPerformance; + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +class Convolution; + +template< + typename InputDataType, + typename OutputDataType +> +class RecurrentAttention; + +using LayerTypes = boost::variant< + Add*, + AddMerge*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + BaseLayer*, + Concat*, + ConcatPerformance, + arma::mat, arma::mat>*, + Constant*, + Convolution, + NaiveConvolution, + NaiveConvolution, arma::mat, arma::mat>*, + DropConnect*, + Dropout*, + Glimpse*, + HardTanH*, + Join*, + LeakyReLU*, + Linear*, + LinearNoBias*, + LogSoftMax*, + Lookup*, + LSTM*, + MaxPooling*, + MeanPooling*, + MeanSquaredError*, + MultiplyConstant*, + NegativeLogLikelihood*, + Recurrent*, + RecurrentAttention*, + ReinforceNormal*, + Select*, + Sequential*, + VRClassReward* +>; + +} // namespace ann +} // namespace mlpack + +#endif From f5bfe204946514f6bd3b80e7de169f7026b97dde Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 13:52:50 +0100 Subject: [PATCH 55/82] Minor style fixes. --- src/mlpack/methods/ann/layer/convolution.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index be7fb7d6a5b..03477c6ec1e 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -99,7 +99,7 @@ class Convolution void Reset() { weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false,false); + outSize * inSize, false, false); bias = arma::mat(weights.memptr() + weight.n_elem, outSize, 1, false, false); } @@ -196,8 +196,6 @@ class Convolution { gTemp.slice(inMap) += output; } - - } } From 18fefb38a8e10c75fd0eece7688f43d50cc8eb19 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 13:55:03 +0100 Subject: [PATCH 56/82] Add layer traits to check for the input width, height and model function. --- src/mlpack/methods/ann/layer/layer_traits.hpp | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/ann/layer/layer_traits.hpp b/src/mlpack/methods/ann/layer/layer_traits.hpp index a8671d62f5d..ff4fbf2d387 100644 --- a/src/mlpack/methods/ann/layer/layer_traits.hpp +++ b/src/mlpack/methods/ann/layer/layer_traits.hpp @@ -64,27 +64,42 @@ HAS_MEM_FUNC(Gradient, HasGradientCheck); // function. HAS_MEM_FUNC(Deterministic, HasDeterministicCheck); -// This gives us a HasRecurrentParameterCheck type (where U is a function -// pointer) we can use with SFINAE to catch when a type has a -// RecurrentParameter() function. -HAS_MEM_FUNC(RecurrentParameter, HasRecurrentParameterCheck); +// This gives us a HasParametersCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Parameters, HasParametersCheck); -// This gives us a HasSeqLenCheck type (where U is a function pointer) we -// can use with SFINAE to catch when a type has a SeqLen() function. -HAS_MEM_FUNC(SeqLen, HasSeqLenCheck); +// This gives us a HasAddCheck type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Weights() function. +HAS_MEM_FUNC(Add, HasAddCheck); -// This gives us a HasWeightsCheck type (where U is a function pointer) we +// This gives us a HasModelCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Weights() function. -HAS_MEM_FUNC(Weights, HasWeightsCheck); +HAS_MEM_FUNC(Model, HasModelCheck); // This gives us a HasLocationCheck type (where U is a function pointer) // we can use with SFINAE to catch when a type has a Location() function. HAS_MEM_FUNC(Location, HasLocationCheck); +// This gives us a HasResetCheck type (where U is a function pointer) +// we can use with SFINAE to catch when a type has a Location() function. +HAS_MEM_FUNC(Reset, HasResetCheck); + // This gives us a HasRewardCheck type (where U is a function pointer) we // can use with SFINAE to catch when a type has a Reward() function. HAS_MEM_FUNC(Reward, HasRewardCheck); +// This gives us a HasInputWidth type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputWidth() function. +HAS_MEM_FUNC(InputWidth, HasInputWidth); + +// This gives us a HasInputHeight type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a InputHeight() function. +HAS_MEM_FUNC(InputHeight, HasInputHeight); + +// This gives us a HasRho type (where U is a function pointer) we +// can use with SFINAE to catch when a type has a Rho() function. +HAS_MEM_FUNC(InputHeight, HasRho); + } // namespace ann } // namespace mlpack From 919ee117a94a6bda00780ea38a5cfd910ce9ecd7 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:47:53 +0100 Subject: [PATCH 57/82] Refactor neural visual attention modules. --- src/mlpack/methods/ann/layer/glimpse.hpp | 592 ++++++++++++++++++ .../methods/ann/layer/recurrent_attention.hpp | 408 ++++++++++++ .../methods/ann/layer/reinforce_normal.hpp | 140 +++++ .../methods/ann/layer/vr_class_reward.hpp | 191 ++++++ 4 files changed, 1331 insertions(+) create mode 100644 src/mlpack/methods/ann/layer/glimpse.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_attention.hpp create mode 100644 src/mlpack/methods/ann/layer/reinforce_normal.hpp create mode 100644 src/mlpack/methods/ann/layer/vr_class_reward.hpp diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp new file mode 100644 index 00000000000..37db36b1edc --- /dev/null +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -0,0 +1,592 @@ +/** + * @file glimpse.hpp + * @author Marcus Edel + * + * Definition of the GlimpseLayer class, which takes an input image and a + * location to extract a retina-like representation of the input image at + * different increasing scales. + * + * For more information, see the following. + * + * @code + * @article{CoRR2014, + * author = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, + * title = {Recurrent Models of Visual Attention}, + * journal = {CoRR}, + * volume = {abs/1406.6247}, + * year = {2014}, + * } + * @endcode + */ +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_HPP + +#include + +#include "layer_types.hpp" +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + + +/* + * The mean pooling rule for convolution neural networks. Average all values + * within the receptive block. + */ +class MeanPoolingRule +{ + public: + /* + * Return the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + */ + template + double Pooling(const MatType& input) + { + return arma::mean(arma::mean(input)); + } + + /* + * Set the average value within the receptive block. + * + * @param input Input used to perform the pooling operation. + * @param value The unpooled value. + * @param output The unpooled output data. + */ + template + void Unpooling(const MatType& input, const double value, MatType& output) + { + output = arma::zeros(input.n_rows, input.n_cols); + const double mean = arma::mean(arma::mean(input)); + + output.elem(arma::find(mean == input, 1)).fill(value); + } +}; + +/** + * The glimpse layer returns a retina-like representation + * (down-scaled cropped images) of increasing scale around a given location in a + * given image. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class Glimpse +{ + public: + + /** + * Create the GlimpseLayer object using the specified ratio and rescale + * parameter. + * + * @param inSize The size of the input units. + * @param size The used glimpse size (height = width). + * @param depth The number of patches to crop per glimpse. + * @param scale The scaling factor used to create the increasing retina-like + * representation. + * @param inputWidth The input width of the given input data. + * @param inputHeight The input height of the given input data. + */ + Glimpse(const size_t inSize, + const size_t size, + const size_t depth = 3, + const size_t scale = 2, + const size_t inputWidth = 0, + const size_t inputHeight = 0) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of the glimpse layer. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } + + /** + * Ordinary feed backward pass of the glimpse layer. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } + + //! Get the input parameter. + InputDataType& InputParameter() const {return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const {return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the detla. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Set the locationthe x and y coordinate of the center of the output + //! glimpse. + void Location(const arma::mat& location) + { + this->location = location; + } + + //! Get the input width. + size_t const& InputWidth() const { return inputWidth; } + //! Modify input the width. + size_t& InputWidth() { return inputWidth; } + + //! Get the input height. + size_t const& InputHeight() const { return inputHeight; } + //! Modify the input height. + size_t& InputHeight() { return inputHeight; } + + //! Get the output width. + size_t const& OutputWidth() const { return outputWidth; } + //! Modify the output width. + size_t& OutputWidth() { return outputWidth; } + + //! Get the output height. + size_t const& OutputHeight() const { return outputHeight; } + //! Modify the output height. + size_t& OutputHeight() { return outputHeight; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + private: + /* + * Transform the given input by changing rows to columns. + * + * @param w The input matrix used to perform the transformation. + */ + void Transform(arma::mat& w) + { + arma::mat t = w; + + for (size_t i = 0, k = 0; i < w.n_elem; k++) + { + for (size_t j = 0; j < w.n_cols; j++, i++) + { + w(k, j) = t(i); + } + } + } + + /* + * Transform the given input by changing rows to columns. + * + * @param w The input matrix used to perform the transformation. + */ + void Transform(arma::cube& w) + { + for (size_t i = 0; i < w.n_slices; i++) + { + arma::mat t = w.slice(i); + Transform(t); + w.slice(i) = t; + } + } + + /** + * Apply pooling to the input and store the results to the output parameter. + * + * @param kSize the kernel size used to perform the pooling operation. + * @param input The input to be apply the pooling rule. + * @param output The pooled result. + */ + template + void Pooling(const size_t kSize, + const arma::Mat& input, + arma::Mat& output) + { + + const size_t rStep = kSize; + const size_t cStep = kSize; + + for (size_t j = 0; j < input.n_cols; j += cStep) + { + for (size_t i = 0; i < input.n_rows; i += rStep) + { + output(i / rStep, j / cStep) += pooling.Pooling( + input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1))); + } + } + } + + /** + * Apply unpooling to the input and store the results. + * + * @param input The input to be apply the unpooling rule. + * @param error The error used to perform the unpooling operation. + * @param output The pooled result. + */ + template + void Unpooling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + const size_t rStep = input.n_rows / error.n_rows; + const size_t cStep = input.n_cols / error.n_cols; + + arma::Mat unpooledError; + for (size_t j = 0; j < input.n_cols; j += cStep) + { + for (size_t i = 0; i < input.n_rows; i += rStep) + { + const arma::Mat& inputArea = input(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)); + + pooling.Unpooling(inputArea, error(i / rStep, j / cStep), + unpooledError); + + output(arma::span(i, i + rStep - 1), + arma::span(j, j + cStep - 1)) += unpooledError; + } + } + } + + /** + * Apply ReSampling to the input and store the results in the output + * parameter. + * + * @param input The input to be apply the ReSampling rule. + * @param output The pooled result. + */ + template + void ReSampling(const arma::Mat& input, arma::Mat& output) + { + double wRatio = (double) (input.n_rows - 1) / (size - 1); + double hRatio = (double) (input.n_cols - 1) / (size - 1); + + double iWidth = input.n_rows - 1; + double iHeight = input.n_cols - 1; + + for (size_t y = 0; y < size; y++) + { + for (size_t x = 0; x < size; x++) + { + double ix = wRatio * x; + double iy = hRatio * y; + + // Get the 4 nearest neighbors. + double ixNw = std::floor(ix); + double iyNw = std::floor(iy); + double ixNe = ixNw + 1; + double iySw = iyNw + 1; + + // Get surfaces to each neighbor. + double se = (ix - ixNw) * (iy - iyNw); + double sw = (ixNe - ix) * (iy - iyNw); + double ne = (ix - ixNw) * (iySw - iy); + double nw = (ixNe - ix) * (iySw - iy); + + // Calculate the weighted sum. + output(y, x) = input(iyNw, ixNw) * nw + + input(iyNw, std::min(ixNe, iWidth)) * ne + + input(std::min(iySw, iHeight), ixNw) * sw + + input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se; + } + } + } + + /** + * Apply DownwardReSampling to the input and store the results into the output + * parameter. + * + * @param input The input to be apply the DownwardReSampling rule. + * @param error The error used to perform the DownwardReSampling operation. + * @param output The DownwardReSampled result. + */ + template + void DownwardReSampling(const arma::Mat& input, + const arma::Mat& error, + arma::Mat& output) + { + double iWidth = input.n_rows - 1; + double iHeight = input.n_cols - 1; + + double wRatio = iWidth / (size - 1); + double hRatio = iHeight / (size - 1); + + for (size_t y = 0; y < size; y++) + { + for (size_t x = 0; x < size; x++) + { + double ix = wRatio * x; + double iy = hRatio * y; + + // Get the 4 nearest neighbors. + double ixNw = std::floor(ix); + double iyNw = std::floor(iy); + double ixNe = ixNw + 1; + double iySw = iyNw + 1; + + // Get surfaces to each neighbor. + double se = (ix - ixNw) * (iy - iyNw); + double sw = (ixNe - ix) * (iy - iyNw); + double ne = (ix - ixNw) * (iySw - iy); + double nw = (ixNe - ix) * (iySw - iy); + + double ograd = error(y, x); + + output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd; + output(iyNw, std::min(ixNe, iWidth)) = output(iyNw, + std::min(ixNe, iWidth)) + ne * ograd; + output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight), + ixNw) + sw * ograd; + output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output( + std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd; + } + } + } + + //! The size of the input units. + size_t inSize; + + //! The used glimpse size (height = width). + size_t size; + + //! The number of patches to crop per glimpse. + size_t depth; + + //! The scale fraction. + size_t scale; + + //! Locally-stored input width. + size_t inputWidth; + + //! Locally-stored input height. + size_t inputHeight; + + //! Locally-stored output width. + size_t outputWidth; + + //! Locally-stored output height. + size_t outputHeight; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored depth of the input. + size_t inputDepth; + + //! Locally-stored transformed input parameter. + arma::cube inputTemp; + + //! Locally-stored transformed output parameter. + arma::cube outputTemp; + + //! The x and y coordinate of the center of the output glimpse. + arma::mat location; + + //! Locally-stored object to perform the mean pooling operation. + MeanPoolingRule pooling; + + //! Location-stored module location parameter. + std::vector locationParameter; + + //! Location-stored transformed gradient paramter. + arma::cube gTemp; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; +}; // class GlimpseLayer + +}; // namespace ann +}; // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp new file mode 100644 index 00000000000..1d1405d8863 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -0,0 +1,408 @@ +/** + * @file recurrent_attention.hpp + * @author Marcus Edel + * + * Definition of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_HPP + +#include +#include + +#include "layer_types.hpp" +#include "add_merge.hpp" +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * This class implements the Recurrent Model for Visual Attention, using a + * variety of possible layer implementations. + * + * For more information, see the following paper. + * + * @code + * @article{MnihHGK14, + * title={Recurrent Models of Visual Attention}, + * author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu}, + * journal={CoRR}, + * volume={abs/1406.6247}, + * year={2014} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class RecurrentAttention +{ + public: + /** + * Create the RecurrentAttention object using the specified modules. + * + * @param start The module output size. + * @param start The recurrent neural network module. + * @param start The action module. + * @param rho Maximum number of steps to backpropagate through time (BPTT). + */ + template + RecurrentAttention(const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) + { + network.push_back(rnnModule); + network.push_back(actionModule); + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards trough f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) + { + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } + } + + /* + * Calculate the gradient using the output delta and the input activation. + * + * @param input The input parameter used for calculating the gradient. + * @param error The calculated error. + * @param gradient The calculated gradient. + */ + template + void Gradient(arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); + } + + //! Get the model modules. + std::vector& Model() { return network; } + + //! The value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the parameters. + OutputDataType const& Parameters() const { return parameters; } + //! Modify the parameters. + OutputDataType& Parameters() { return parameters; } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the gradient. + OutputDataType const& Gradient() const { return gradient; } + //! Modify the gradient. + OutputDataType& Gradient() { return gradient; } + + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); + } + + private: + //! Calculate the gradient of the attention module. + void IntermediateGradient() + { + intermediateGradient.zeros(); + + // Gradient of the action module. + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(initialInput), + std::move(actionError)), actionModule); + } + else + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError)), + actionModule); + } + + // Gradient of the recurrent module. + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError)), + rnnModule); + + attentionGradient += intermediateGradient; + } + + //! Locally-stored module output size. + size_t outSize; + + //! Locally-stored start module. + LayerTypes rnnModule; + + //! Locally-stored input module. + LayerTypes actionModule; + + //! Number of steps to backpropagate through time (BPTT). + size_t rho; + + //! Locally-stored number of forward steps. + size_t forwardStep; + + //! Locally-stored number of backward steps. + size_t backwardStep; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored weight object. + OutputDataType parameters; + + //! Locally-stored initial module. + LayerTypes initialModule; + + //! Locally-stored recurrent module. + LayerTypes recurrentModule; + + //! Locally-stored model modules. + std::vector network; + + //! Locally-stored merge module. + LayerTypes mergeModule; + + //! Locally-stored weight size visitor. + WeightSizeVisitor weightSizeVisitor; + + //! Locally-stored delta visitor. + DeltaVisitor deltaVisitor; + + //! Locally-stored output parameter visitor. + OutputParameterVisitor outputParameterVisitor; + + //! Locally-stored feedback output parameters. + std::vector feedbackOutputParameter; + + //! List of all module parameters for the backward pass (BBTT). + std::vector moduleOutputParameter; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored gradient object. + OutputDataType gradient; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored recurrent error parameter. + arma::mat recurrentError; + + //! Locally-stored action error parameter. + arma::mat actionError; + + //! Locally-stored action delta. + arma::mat actionDelta; + + //! Locally-stored recurrent delta. + arma::mat rnnDelta; + + //! Locally-stored initial action input. + arma::mat initialInput; + + //! Locally-stored reset visitor. + ResetVisitor resetVisitor; + + //! Locally-stored attention gradient. + arma::mat attentionGradient; + + //! Locally-stored intermediate gradient for the attention module. + arma::mat intermediateGradient; +}; // class RecurrentAttention + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp new file mode 100644 index 00000000000..bc938d1a766 --- /dev/null +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -0,0 +1,140 @@ +/** + * @file reinforce_normal.hpp + * @author Marcus Edel + * + * Definition of the ReinforceNormalLayer class, which implements the REINFORCE + * algorithm for the normal distribution. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the reinforce normal layer. The reinforce normal layer + * implements the REINFORCE algorithm for the normal distribution. + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class ReinforceNormal +{ + public: + /** + * Create the ReinforceNormal object. + * + * @param stdev Standard deviation used during the forward and backward pass. + */ + ReinforceNormal(const double stdev) : stdev(stdev) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) + { + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); + } + + + //! Get the input parameter. + InputDataType& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + //! Get the value of the reward parameter. + double Reward() const { return reward; } + //! Modify the value of the deterministic parameter. + double& Reward() { return reward; } + + private: + //! Standard deviation used during the forward and backward pass. + const double stdev; + + //! Locally-stored reward parameter. + double reward; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Locally-stored output module parameter parameters. + std::vector moduleInputParameter; + + //! If true use maximum a posteriori during the forward pass. + bool deterministic; +}; // class ReinforceNormal + +}; // namespace ann +}; // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp new file mode 100644 index 00000000000..d2802dacabc --- /dev/null +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -0,0 +1,191 @@ +/** + * @file vr_class_reward.hpp + * @author Marcus Edel + * + * Definition of the VRClassReward class, which implements the variance + * reduced classification reinforcement layer. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_HPP + +#include + +#include "layer_types.hpp" +#include "layer_visitor.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * Implementation of the variance reduced classification reinforcement layer. + * This layer is meant to be used in combination with the reinforce normal layer + * (ReinforceNormalLayer), which expects that an reward: + * (1 for success, 0 otherwise). + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class VRClassReward +{ + public: + /** + * Create the VRClassReward object. + * + * @param scale Parameter used to scale the reward. + * @param sizeAverage Take the average over all batches. + */ + VRClassReward(const double scale = 1, const bool sizeAverage = true) : + scale(scale), + sizeAverage(sizeAverage) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data that contains the log-probabilities for each class. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + */ + template + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; + } + + /** + * Ordinary feed backward pass of a neural network. The negative log + * likelihood layer expectes that the input contains log-probabilities for + * each class. The layer also expects a class index, in the range between 1 + * and the number of classes, as target when calling the Forward function. + * + * @param input The propagated input activation. + * @param target The target vector, that contains the class index in the range + * between 1 and the number of classes. + * @param output The calculated error. + */ + template + void Backward(const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) + { + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); + } + + //! Get the input parameter. + InputDataType& InputParameter() const {return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType& OutputParameter() const {return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType& Delta() const {return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the value of the deterministic parameter. + bool Deterministic() const { return deterministic; } + //! Modify the value of the deterministic parameter. + bool& Deterministic() { return deterministic; } + + /* + * Add a new module to the model. + * + * @param args The layer parameter. + */ + template + void Add(Args... args) { network.push_back(new LayerType(args...)); } + + /* + * Add a new module to the model. + * + * @param layer The Layer to be added to the model. + */ + void Add(LayerTypes layer) { network.push_back(layer); } + + private: + //! Locally-stored value to scale the reward. + const double scale; + + //! If true take the average over all batches. + const bool sizeAverage; + + //! Locally stored reward parameter. + double reward; + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! If true dropout and scaling is disabled, see notes above. + bool deterministic; + + //! Locally-stored network modules. + std::vector network; +}; // class VRClassReward + +}; // namespace ann +}; // namespace mlpack + +#endif From ca472a6f4ee516500b295efe738130261aa001e8 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:50:59 +0100 Subject: [PATCH 58/82] Use refactored rnn,ffn classes for the ann tests. --- src/mlpack/tests/CMakeLists.txt | 2 +- .../tests/activation_functions_test.cpp | 40 +- src/mlpack/tests/convolution_test.cpp | 3 +- .../tests/convolutional_network_test.cpp | 108 ++---- src/mlpack/tests/feedforward_network_test.cpp | 360 +++++++----------- src/mlpack/tests/recurrent_network_test.cpp | 5 + 6 files changed, 193 insertions(+), 325 deletions(-) diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 3b3ab0d30b1..b1dc438eaa2 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(mlpack_test ada_delta_test.cpp akfn_test.cpp aknn_test.cpp + ann_layer_test.cpp arma_extend_test.cpp armadillo_svd_test.cpp aug_lagrangian_test.cpp @@ -56,7 +57,6 @@ add_executable(mlpack_test mlpack_test.cpp nbc_test.cpp nca_test.cpp - network_util_test.cpp nmf_test.cpp nystroem_method_test.cpp octree_test.cpp diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp index bebca0de09b..94df3b59380 100644 --- a/src/mlpack/tests/activation_functions_test.cpp +++ b/src/mlpack/tests/activation_functions_test.cpp @@ -12,23 +12,13 @@ */ #include +#include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - #include #include "test_tools.hpp" @@ -125,7 +115,7 @@ void CheckInverseCorrect(const arma::colvec input) /* * Implementation of the HardTanH activation function test. The function is - * implemented as a HardTanH Layer in hard_tanh_layer.hpp + * implemented as a HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -133,11 +123,11 @@ void CheckInverseCorrect(const arma::colvec input) void CheckHardTanHActivationCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the activation function using the entire vector as input. arma::colvec activations; - htf.Forward(input, activations); + htf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -146,7 +136,7 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, /* * Implementation of the HardTanH activation function derivative test. The - * derivative is implemented as HardTanH Layer in hard_tanh_layer.hpp + * derivative is implemented as HardTanH Layer in hard_tanh.hpp * * @param input Input data used for evaluating the HardTanH activation function. * @param target Target data used to evaluate the HardTanH activation. @@ -154,14 +144,15 @@ void CheckHardTanHActivationCorrect(const arma::colvec input, void CheckHardTanHDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - HardTanHLayer<> htf; + HardTanH<> htf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - htf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + htf.Backward(std::move(input), std::move(error), std::move(derivatives)); + for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); @@ -170,7 +161,7 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, /* * Implementation of the LeakyReLU activation function test. The function is - * implemented as LeakyReLU layer in the file leaky_relu_layer.hpp + * implemented as LeakyReLU layer in the file leaky_relu.hpp * * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. @@ -178,11 +169,11 @@ void CheckHardTanHDerivativeCorrect(const arma::colvec input, void CheckLeakyReLUActivationCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the activation function using the entire vector as input. arma::colvec activations; - lrf.Forward(input, activations); + lrf.Forward(std::move(input), std::move(activations)); for (size_t i = 0; i < activations.n_elem; i++) { BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3); @@ -197,18 +188,17 @@ void CheckLeakyReLUActivationCorrect(const arma::colvec input, * @param input Input data used for evaluating the LeakyReLU activation function. * @param target Target data used to evaluate the LeakyReLU activation. */ - void CheckLeakyReLUDerivativeCorrect(const arma::colvec input, const arma::colvec target) { - LeakyReLULayer<> lrf; + LeakyReLU<> lrf; // Test the calculation of the derivatives using the entire vector as input. arma::colvec derivatives; // This error vector will be set to 1 to get the derivatives. - arma::colvec error(input.n_elem); - lrf.Backward(input, (arma::colvec)error.ones(), derivatives); + arma::colvec error = arma::ones(input.n_elem); + lrf.Backward(std::move(input), std::move(error), std::move(derivatives)); for (size_t i = 0; i < derivatives.n_elem; i++) { BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3); diff --git a/src/mlpack/tests/convolution_test.cpp b/src/mlpack/tests/convolution_test.cpp index a277b9cb41b..180ca8bab36 100644 --- a/src/mlpack/tests/convolution_test.cpp +++ b/src/mlpack/tests/convolution_test.cpp @@ -3,12 +3,11 @@ * @author Shangtong Zhang * @author Marcus Edel * - * Tests for various convolution strategies. - * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. + * Tests for various convolution strategies. */ #include diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index 52e1a6c6394..0c99722ca1c 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -11,21 +11,9 @@ */ #include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include #include - -#include -#include +#include +#include #include #include "test_tools.hpp" @@ -38,12 +26,9 @@ using namespace mlpack::optimization; BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest); /** - * Train and evaluate a vanilla network with the specified structure. + * Train the vanilla network on a larger dataset. */ -template< - typename PerformanceFunction -> -void BuildVanillaNetwork() +BOOST_AUTO_TEST_CASE(VanillaNetworkTest) { arma::mat X; X.load("mnist_first250_training_4s_and_9s.arm"); @@ -56,23 +41,19 @@ void BuildVanillaNetwork() } // Build the target matrix. - arma::mat Y = arma::zeros(10, nPoints); + arma::mat Y = arma::zeros(1, nPoints); for (size_t i = 0; i < nPoints; i++) { if (i < nPoints / 2) { - Y.col(i)(5) = 1; + Y(i) = 4; } else { - Y.col(i)(8) = 1; + Y(i) = 9; } } - arma::cube input = arma::cube(28, 28, nPoints); - for (size_t i = 0; i < nPoints; i++) - input.slice(i) = arma::mat(X.colptr(i), 28, 28); - /* * Construct a convolutional neural network with a 28x28x1 input layer, * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer @@ -90,57 +71,46 @@ void BuildVanillaNetwork() * | | +-+ | +-+ | +-+ | +-+ | | | * +---+ +---+ +---+ +---+ +---+ +---+ */ - - ConvLayer<> convLayer0(1, 8, 5, 5); - BiasLayer2D<> biasLayer0(8); - BaseLayer2D<> baseLayer0; - PoolingLayer<> poolingLayer0(2); - - ConvLayer<> convLayer1(8, 12, 5, 5); - BiasLayer2D<> biasLayer1(12); - BaseLayer2D<> baseLayer1; - PoolingLayer<> poolingLayer1(2); - - LinearMappingLayer<> linearLayer0(4608, 10); - BiasLayer<> biasLayer2(10); - SoftmaxLayer<> softmaxLayer0; - - OneHotLayer outputLayer; - - auto modules = std::tie(convLayer0, baseLayer0, linearLayer0, softmaxLayer0); - - CNN net(modules, outputLayer); - biasLayer0.Weights().zeros(); - biasLayer1.Weights().zeros(); - - RMSprop opt(net, 0.01, 0.88, 1e-8, 10 * input.n_slices, 0); - - net.Train(input, Y, opt); - - arma::mat prediction; - net.Predict(input, prediction); + FFN > model; + + model.Add >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28); + model.Add >(); + model.Add >(8, 8, 2, 2); + model.Add >(8, 12, 2, 2); + model.Add >(); + model.Add >(2, 2, 2, 2); + model.Add >(192, 20); + model.Add >(); + model.Add >(20, 30); + model.Add >(); + model.Add >(30, 10); + model.Add >(); + + RMSprop opt(model, 0.01, 0.88, 1e-8, 5000, -1); + + model.Train(std::move(X), std::move(Y), opt); + + arma::mat predictionTemp; + model.Predict(X, predictionTemp); + arma::mat prediction = arma::zeros(1, predictionTemp.n_cols); + + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; - for (size_t i = 0; i < nPoints; i++) + for (size_t i = 0; i < X.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - Y.col(i)))) == 0) + if (prediction(i) == Y(i)) { error++; } } - double classificationError = 1 - double(error) / nPoints; - BOOST_REQUIRE_LE(classificationError, 0.6); -} - -/** - * Train the vanilla network on a larger dataset. - */ -BOOST_AUTO_TEST_CASE(VanillaNetworkTest) -{ - BuildVanillaNetwork(); + double classificationError = 1 - double(error) / X.n_cols; + BOOST_REQUIRE_LE(classificationError, 0.2); } BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp index 4477bf22568..883fe9e6b00 100644 --- a/src/mlpack/tests/feedforward_network_test.cpp +++ b/src/mlpack/tests/feedforward_network_test.cpp @@ -12,21 +12,9 @@ */ #include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include #include +#include +#include #include #include "test_tools.hpp" @@ -40,16 +28,12 @@ BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest); /** * Train and evaluate a vanilla network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildVanillaNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -76,35 +60,32 @@ void BuildVanillaNetwork(MatType& trainData, * +-----+ +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> inputBiasLayer(hiddenLayerSize); - BaseLayer inputBaseLayer; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows); - BaseLayer outputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - OutputLayerType classOutputLayer; + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer, - hiddenLayer1, hiddenBiasLayer1, outputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - FFN net(modules, classOutputLayer); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -125,23 +106,36 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildVanillaNetwork - (trainData, trainLabels, testData, testLabels, 8, 200, 0.1); + BuildVanillaNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -151,33 +145,22 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 30, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildVanillaNetwork - (dataset, labels, dataset, labels, 10, 30, 0.4); + BuildVanillaNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a Dropout network with the specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropoutNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -204,35 +187,33 @@ void BuildDropoutNetwork(MatType& trainData, * +-----+ */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - DropoutLayer<> dropoutLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, dropoutLayer0, - hiddenLayer1, outputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - FFN net(modules, classOutputLayer); + model.Train(std::move(trainData), std::move(trainLabels), opt); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); + for (size_t i = 0; i < predictionTemp.n_cols; ++i) + { + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; + } size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) { error++; } @@ -253,23 +234,36 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropoutNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropoutNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -279,34 +273,23 @@ BOOST_AUTO_TEST_CASE(DropoutNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - // Vanilla neural net with tanh activation function. - BuildDropoutNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropoutNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } /** * Train and evaluate a DropConnect network(with a baselayer) with the * specified structure. */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> +template void BuildDropConnectNetwork(MatType& trainData, MatType& trainLabels, MatType& testData, MatType& testLabels, + const size_t outputSize, const size_t hiddenLayerSize, const size_t maxEpochs, const double classificationErrorThreshold) @@ -334,122 +317,42 @@ void BuildDropConnectNetwork(MatType& trainData, * * */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows); - DropConnectLayer dropConnectLayer0(hiddenLayer1); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); + FFN > model; + model.Add >(trainData.n_rows, hiddenLayerSize); + model.Add >(); + model.Add >(hiddenLayerSize, outputSize); + model.Add >(); - FFN net(modules, classOutputLayer); + RMSprop opt(model, 0.01, 0.88, 1e-8, + maxEpochs * trainData.n_cols, -1); - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); + model.Train(std::move(trainData), std::move(trainLabels), opt); - net.Train(trainData, trainLabels, opt); + MatType predictionTemp; + model.Predict(testData, predictionTemp); + MatType prediction = arma::zeros(1, predictionTemp.n_cols); - MatType prediction; - net.Predict(testData, prediction); - - size_t error = 0; - for (size_t i = 0; i < testData.n_cols; i++) + for (size_t i = 0; i < predictionTemp.n_cols; ++i) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + prediction(i) = arma::as_scalar(arma::find( + arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; } - double classificationError = 1 - double(error) / testData.n_cols; - BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); -} - -/** - * Train and evaluate a DropConnect network(with a linearlayer) with the - * specified structure. - */ -template< - typename PerformanceFunction, - typename OutputLayerType, - typename PerformanceFunctionType, - typename MatType = arma::mat -> -void BuildDropConnectNetworkLinear(MatType& trainData, - MatType& trainLabels, - MatType& testData, - MatType& testLabels, - const size_t hiddenLayerSize, - const size_t maxEpochs, - const double classificationErrorThreshold) -{ - /* - * Construct a feed forward network with trainData.n_rows input nodes, - * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The - * network struct that looks like: - * - * Input Hidden DropConnect Output - * Layer Layer Layer Layer - * +-----+ +-----+ +-----+ +-----+ - * | | | | | | | | - * | +------>| +------>| +------>| | - * | | +>| | | | | | - * +-----+ | +--+--+ +-----+ +-----+ - * | - * Bias | - * Layer | - * +-----+ | - * | | | - * | +-----+ - * | | - * +-----+ - * - * - */ - LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize); - BiasLayer<> biasLayer(hiddenLayerSize); - BaseLayer hiddenLayer0; - - DropConnectLayer<> dropConnectLayer0(hiddenLayerSize, trainLabels.n_rows); - - BaseLayer outputLayer; - - OutputLayerType classOutputLayer; - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, - dropConnectLayer0, outputLayer); - - FFN net(modules, classOutputLayer); - - RMSprop opt(net, 0.01, 0.88, 1e-8, - maxEpochs * trainData.n_cols, 1e-18); - - net.Train(trainData, trainLabels, opt); - - MatType prediction; - net.Predict(testData, prediction); - size_t error = 0; for (size_t i = 0; i < testData.n_cols; i++) { - if (arma::sum(arma::sum( - arma::abs(prediction.col(i) - testLabels.col(i)))) == 0) - { - error++; - } + if (int(arma::as_scalar(prediction.col(i))) == + int(arma::as_scalar(testLabels.col(i)))) + { + error++; + } } double classificationError = 1 - double(error) / testData.n_cols; BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold); } + /** * Train the dropconnect network on a larger dataset. */ @@ -461,28 +364,36 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat trainLabels = arma::zeros(1, trainLabelsTemp.n_cols); + for (size_t i = 0; i < trainLabelsTemp.n_cols; ++i) + { + trainLabels(i) = arma::as_scalar(arma::find( + arma::max(trainLabelsTemp.col(i)) == trainLabelsTemp.col(i), 1)) + 1; + } data::Load("thyroid_test.csv", dataset, true); arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4, dataset.n_cols - 1); - arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0, + + arma::mat testLabelsTemp = dataset.submat(dataset.n_rows - 3, 0, dataset.n_rows - 1, dataset.n_cols - 1); + arma::mat testLabels = arma::zeros(1, testLabelsTemp.n_cols); + for (size_t i = 0; i < testLabels.n_cols; ++i) + { + testLabels(i) = arma::as_scalar(arma::find( + arma::max(testLabelsTemp.col(i)) == testLabelsTemp.col(i), 1)) + 1; + } + // Vanilla neural net with logistic activation function. // Because 92 percent of the patients are not hyperthyroid the neural // network must be significant better than 92%. - BuildDropConnectNetwork - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); - - BuildDropConnectNetworkLinear - (trainData, trainLabels, testData, testLabels, 4, 100, 0.1); + BuildDropConnectNetwork<> + (trainData, trainLabels, testData, testLabels, 3, 8, 70, 0.1); dataset.load("mnist_first250_training_4s_and_9s.arm"); @@ -492,18 +403,11 @@ BOOST_AUTO_TEST_CASE(DropConnectNetworkTest) arma::mat labels = arma::zeros(1, dataset.n_cols); labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1); + labels += 1; // Vanilla neural net with logistic activation function. - BuildDropConnectNetwork - (dataset, labels, dataset, labels, 8, 30, 0.4); - - - BuildDropConnectNetworkLinear - (dataset, labels, dataset, labels, 8, 30, 0.4); + BuildDropConnectNetwork<> + (dataset, labels, dataset, labels, 2, 10, 50, 0.2); } -BOOST_AUTO_TEST_SUITE_END(); +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index ff5daae9ede..f7546e83b50 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,6 +3,11 @@ * @author Marcus Edel * * Tests the recurrent network. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include From d178103c2fe824c67a09ddc475b2293985b216b3 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 13 Dec 2016 23:05:53 +0100 Subject: [PATCH 59/82] Add ann module test. --- src/mlpack/tests/ann_layer_test.cpp | 524 ++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 src/mlpack/tests/ann_layer_test.cpp diff --git a/src/mlpack/tests/ann_layer_test.cpp b/src/mlpack/tests/ann_layer_test.cpp new file mode 100644 index 00000000000..101a0170751 --- /dev/null +++ b/src/mlpack/tests/ann_layer_test.cpp @@ -0,0 +1,524 @@ +/** + * @file ann_layer_test.cpp + * @author Marcus Edel + * + * Tests the ann layer modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::ann; + +BOOST_AUTO_TEST_SUITE(ANNLayerTest); + +// Helper function whcih calls the Reset function of the given module. +template +void ResetFunction( + T& layer, + typename std::enable_if::value>::type* = 0) +{ + layer.Reset(); +} + +template +void ResetFunction( + T& /* layer */, + typename std::enable_if::value>::type* = 0) +{ + /* Nothing to do here */ +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianTest(ModuleType& module, + arma::mat& input, + const double minValue = -2, + const double maxValue = -1, + const double perturbation = 1e-6) +{ + arma::mat output, outputA, outputB, jacobianA, jacobianB; + + // Initialize the input matrix. + RandomInitialization init(minValue, maxValue); + init.Initialize(input, input.n_rows, input.n_cols); + + // Initialize the module parameters. + ResetFunction(module); + + // Initialize the jacobian matrix. + module.Forward(std::move(input), std::move(output)); + jacobianA = arma::zeros(input.n_elem, output.n_elem); + + // Share the input paramter matrix. + arma::mat sin = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + double original = sin(i); + sin(i) = original - perturbation; + module.Forward(std::move(input), std::move(outputA)); + sin(i) = original + perturbation; + module.Forward(std::move(input), std::move(outputB)); + sin(i) = original; + + outputB -= outputA; + outputB /= 2 * perturbation; + jacobianA.row(i) = outputB.t(); + } + + // Initialize the derivative parameter. + arma::mat deriv = arma::zeros(output.n_rows, output.n_cols); + + // Share the derivative parameter. + arma::mat derivTemp = arma::mat(deriv.memptr(), deriv.n_rows, deriv.n_cols, + false, false); + + // Initialize the jacobian matrix. + jacobianB = arma::zeros(input.n_elem, output.n_elem); + + for (size_t i = 0; i < derivTemp.n_elem; ++i) + { + deriv.zeros(); + derivTemp(i) = 1; + + arma::mat delta; + module.Backward(std::move(input), std::move(deriv), std::move(delta)); + + jacobianB.col(i) = delta; + } + + return arma::max(arma::max(arma::abs(jacobianA - jacobianB))); +} + +// Approximate Jacobian and supposedly-true Jacobian, then compare them +// similarly to before. +template +double JacobianPerformanceTest(ModuleType& module, + arma::mat& input, + arma::mat& target, + const double eps = 1e-6) +{ + module.Forward(std::move(input), std::move(target)); + + arma::mat delta; + module.Backward(std::move(input), std::move(target), std::move(delta)); + + arma::mat centralDifference = arma::zeros(delta.n_rows, delta.n_cols); + arma::mat inputTemp = arma::mat(input.memptr(), input.n_rows, input.n_cols, + false, false); + + arma::mat centralDifferenceTemp = arma::mat(centralDifference.memptr(), + centralDifference.n_rows, centralDifference.n_cols, false, false); + + for (size_t i = 0; i < input.n_elem; ++i) + { + inputTemp(i) = inputTemp(i) + eps; + double outputA = module.Forward(std::move(input), std::move(target)); + inputTemp(i) = inputTemp(i) - (2 * eps); + double outputB = module.Forward(std::move(input), std::move(target)); + + centralDifferenceTemp(i) = (outputA - outputB) / ( 2 * eps); + inputTemp(i) = inputTemp(i) + eps; + } + + return arma::max(arma::max(arma::abs(centralDifference - delta))); +} + +/** + * Simple add module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddLayerTest) +{ + arma::mat output, input, delta; + Add<> module(10); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(module.Parameters()), arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(10 + arma::accu(module.Parameters()), + arma::accu(output), 1e-3); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_CLOSE(arma::accu(output), arma::accu(delta), 1e-3); +} + +/** + * Jacobian add module test. + */ +BOOST_AUTO_TEST_CASE(JacobianAddLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Add<> module(elements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple constant module test. + */ +BOOST_AUTO_TEST_CASE(SimpleConstantLayerTest) +{ + arma::mat output, input, delta; + Constant<> module(10, 3.0); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); + + // Test the forward function. + input = arma::ones(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(output), 30.0); + + // Test the backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian constant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t elements = math::RandInt(2, 1000); + arma::mat input; + input.set_size(elements, 1); + + Constant<> module(elements, 1.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple dropout module test. + */ +BOOST_AUTO_TEST_CASE(SimpleDropoutLayerTest) +{ + // Initialize the probability of setting a value to zero and the scale + // parameter. + const double p = 0.2; + const double scale = 1.0 / (1.0 - p); + + // Initialize the input parameter. + arma::mat input(1000, 1); + input.fill(1 - p); + + Dropout<> module(p); + module.Deterministic() = false; + + // Test the Forward function. + arma::mat output; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(output) - (1 - p))), 0.05); + + // Test the Backward function. + arma::mat delta; + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_LE( + arma::as_scalar(arma::abs(arma::mean(delta) - (1 - p))), 0.05); + + // Test the Forward function. + module.Deterministic() = true; + module.Rescale() = false; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(arma::accu(input), arma::accu(output)); + + // Test the Forward function. + module.Rescale() = true; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu(input) * scale, arma::accu(output), 1e-3); +} + +/** + * Simple linear module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearLayerTest) +{ + arma::mat output, input, delta; + Linear<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_CLOSE(arma::accu( + module.Parameters().submat(100, 0, module.Parameters().n_elem - 1, 0)), + arma::accu(output), 1e-3); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + Linear<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(SimpleLinearNoBiasLayerTest) +{ + arma::mat output, input, delta; + LinearNoBias<> module(10, 10); + module.Parameters().randu(); + module.Reset(); + + // Test the Forward function. + input = arma::zeros(10, 1); + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(0, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(input), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(delta), 0); +} + +/** + * Jacobian linear no bias module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLinearNoBiasLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + const size_t outputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LinearNoBias<> module(inputElements, outputElements); + module.Parameters().randu(); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian negative log likelihood module test. + */ +BOOST_AUTO_TEST_CASE(JacobianNegativeLogLikelihoodLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + NegativeLogLikelihood<> module; + const size_t inputElements = math::RandInt(5, 100); + arma::mat input; + RandomInitialization init(0, 1); + init.Initialize(input, inputElements, 1); + + arma::mat target(1, 1); + target(0) = math::RandInt(1, inputElements - 1); + + double error = JacobianPerformanceTest(module, input, target); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian LeakyReLU module test. + */ +BOOST_AUTO_TEST_CASE(JacobianLeakyReLULayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + LeakyReLU<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian MultiplyConstant module test. + */ +BOOST_AUTO_TEST_CASE(JacobianMultiplyConstantLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + MultiplyConstant<> module(3.0); + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Jacobian HardTanH module test. + */ +BOOST_AUTO_TEST_CASE(JacobianHardTanHLayerTest) +{ + for (size_t i = 0; i < 5; i++) + { + const size_t inputElements = math::RandInt(2, 1000); + + arma::mat input; + input.set_size(inputElements, 1); + + HardTanH<> module; + + double error = JacobianTest(module, input); + BOOST_REQUIRE_LE(error, 1e-5); + } +} + +/** + * Simple select module test. + */ +BOOST_AUTO_TEST_CASE(SimpleSelectLayerTest) +{ + arma::mat outputA, outputB, input, delta; + + input = arma::ones(10, 5); + for (size_t i = 0; i < input.n_cols; ++i) + { + input.col(i) *= i; + } + + // Test the Forward function. + Select<> moduleA(3); + moduleA.Forward(std::move(input), std::move(outputA)); + BOOST_REQUIRE_EQUAL(30, arma::accu(outputA)); + + // Test the Forward function. + Select<> moduleB(3, 5); + moduleB.Forward(std::move(input), std::move(outputB)); + BOOST_REQUIRE_EQUAL(15, arma::accu(outputB)); + + // Test the Backward function. + moduleA.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(30, arma::accu(delta)); + + // Test the Backward function. + moduleB.Backward(std::move(input), std::move(outputA), std::move(delta)); + BOOST_REQUIRE_EQUAL(15, arma::accu(delta)); +} + +/** + * Simple join module test. + */ +BOOST_AUTO_TEST_CASE(SimpleJoinLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 5); + + // Test the Forward function. + Join<> module; + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(50, arma::accu(output)); + + bool b = output.n_rows == 1 || output.n_cols == 1; + BOOST_REQUIRE_EQUAL(b, true); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(50, arma::accu(delta)); + + b = delta.n_rows == input.n_rows && input.n_cols; + BOOST_REQUIRE_EQUAL(b, true); +} + +/** + * Simple add merge module test. + */ +BOOST_AUTO_TEST_CASE(SimpleAddMergeLayerTest) +{ + arma::mat output, input, delta; + input = arma::ones(10, 1); + + for (size_t i = 0; i < 5; ++i) + { + AddMerge<> module; + const size_t numMergeModules = math::RandInt(2, 10); + for (size_t m = 0; m < numMergeModules; ++m) + { + IdentityLayer<> identityLayer; + identityLayer.Forward(std::move(input), + std::move(identityLayer.OutputParameter())); + + module.Add(identityLayer); + } + + // Test the Forward function. + module.Forward(std::move(input), std::move(output)); + BOOST_REQUIRE_EQUAL(10 * numMergeModules, arma::accu(output)); + + // Test the Backward function. + module.Backward(std::move(input), std::move(output), std::move(delta)); + BOOST_REQUIRE_EQUAL(arma::accu(output), arma::accu(delta)); + } +} + +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file From 4c565a4c75c1b172d4d10d788ecd159f7fdc4eab Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 14 Dec 2016 22:46:38 +0100 Subject: [PATCH 60/82] Split layer modules into definition and implementation. --- src/mlpack/methods/ann/CMakeLists.txt | 6 - src/mlpack/methods/ann/layer/CMakeLists.txt | 77 +++- src/mlpack/methods/ann/layer/add.hpp | 28 +- src/mlpack/methods/ann/layer/add_impl.hpp | 68 ++++ src/mlpack/methods/ann/layer/add_merge.hpp | 28 +- .../methods/ann/layer/add_merge_impl.hpp | 61 ++++ src/mlpack/methods/ann/layer/concat.hpp | 125 +------ src/mlpack/methods/ann/layer/concat_impl.hpp | 159 +++++++++ .../methods/ann/layer/concat_performance.hpp | 58 +--- .../ann/layer/concat_performance_impl.hpp | 118 +++++++ src/mlpack/methods/ann/layer/constant.hpp | 37 +- .../methods/ann/layer/constant_impl.hpp | 65 ++++ src/mlpack/methods/ann/layer/convolution.hpp | 198 +---------- .../methods/ann/layer/convolution_impl.hpp | 328 ++++++++++++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 84 +---- .../methods/ann/layer/dropconnect_impl.hpp | 118 +++++++ src/mlpack/methods/ann/layer/dropout.hpp | 48 +-- src/mlpack/methods/ann/layer/dropout_impl.hpp | 84 +++++ src/mlpack/methods/ann/layer/glimpse.hpp | 190 +--------- src/mlpack/methods/ann/layer/glimpse_impl.hpp | 224 ++++++++++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 93 +---- .../methods/ann/layer/hard_tanh_impl.hpp | 72 ++++ src/mlpack/methods/ann/layer/join.hpp | 26 +- src/mlpack/methods/ann/layer/join_impl.hpp | 60 ++++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 29 +- .../methods/ann/layer/leaky_relu_impl.hpp | 60 ++++ src/mlpack/methods/ann/layer/linear.hpp | 45 +-- src/mlpack/methods/ann/layer/linear_impl.hpp | 87 +++++ .../methods/ann/layer/linear_no_bias.hpp | 40 +-- .../methods/ann/layer/linear_no_bias_impl.hpp | 83 +++++ src/mlpack/methods/ann/layer/log_softmax.hpp | 53 +-- .../methods/ann/layer/log_softmax_impl.hpp | 85 +++++ src/mlpack/methods/ann/layer/lookup.hpp | 33 +- src/mlpack/methods/ann/layer/lookup_impl.hpp | 74 ++++ src/mlpack/methods/ann/layer/lstm.hpp | 297 +--------------- src/mlpack/methods/ann/layer/lstm_impl.hpp | 273 +++++++++++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 121 +------ .../methods/ann/layer/max_pooling_impl.hpp | 149 ++++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 94 +---- .../methods/ann/layer/mean_pooling_impl.hpp | 126 +++++++ .../methods/ann/layer/mean_squared_error.hpp | 28 +- .../ann/layer/mean_squared_error_impl.hpp | 57 +++ .../methods/ann/layer/multiply_constant.hpp | 27 +- .../ann/layer/multiply_constant_impl.hpp | 51 +++ .../ann/layer/negative_log_likelihood.hpp | 44 +-- .../layer/negative_log_likelihood_impl.hpp | 76 ++++ src/mlpack/methods/ann/layer/recurrent.hpp | 159 +-------- .../methods/ann/layer/recurrent_attention.hpp | 161 +-------- .../ann/layer/recurrent_attention_impl.hpp | 204 +++++++++++ .../methods/ann/layer/recurrent_impl.hpp | 206 +++++++++++ .../methods/ann/layer/reinforce_normal.hpp | 47 +-- .../ann/layer/reinforce_normal_impl.hpp | 69 ++++ src/mlpack/methods/ann/layer/select.hpp | 46 +-- src/mlpack/methods/ann/layer/select_impl.hpp | 75 ++++ src/mlpack/methods/ann/layer/sequential.hpp | 117 +------ .../methods/ann/layer/sequential_impl.hpp | 154 ++++++++ .../methods/ann/layer/vr_class_reward.hpp | 74 +--- .../ann/layer/vr_class_reward_impl.hpp | 101 ++++++ 58 files changed, 3658 insertions(+), 2042 deletions(-) create mode 100644 src/mlpack/methods/ann/layer/add_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/add_merge_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/concat_performance_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/constant_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/convolution_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/dropconnect_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/dropout_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/glimpse_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/hard_tanh_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/join_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/leaky_relu_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/log_softmax_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/lookup_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/lstm_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/max_pooling_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_pooling_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/multiply_constant_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/recurrent_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/select_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/sequential_impl.hpp create mode 100644 src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt index 6ff7011404b..46174b88578 100644 --- a/src/mlpack/methods/ann/CMakeLists.txt +++ b/src/mlpack/methods/ann/CMakeLists.txt @@ -1,12 +1,8 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - cnn.hpp - cnn_impl.hpp ffn.hpp ffn_impl.hpp - network_util.hpp - network_util_impl.hpp rnn.hpp rnn_impl.hpp ) @@ -23,6 +19,4 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) add_subdirectory(activation_functions) add_subdirectory(init_rules) add_subdirectory(layer) -add_subdirectory(performance_functions) -add_subdirectory(pooling_rules) add_subdirectory(convolution_rules) diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt index b639cdad785..4211aeaaaa7 100644 --- a/src/mlpack/methods/ann/layer/CMakeLists.txt +++ b/src/mlpack/methods/ann/layer/CMakeLists.txt @@ -1,23 +1,68 @@ # Define the files we need to compile # Anything not in this list will not be compiled into mlpack. set(SOURCES - layer_traits.hpp - binary_classification_layer.hpp + add.hpp + add_impl.hpp + add_merge.hpp + add_merge_impl.hpp base_layer.hpp - empty_layer.hpp - bias_layer.hpp - dropout_layer.hpp - dropconnect_layer.hpp - hard_tanh_layer.hpp - leaky_relu_layer.hpp - linear_layer.hpp - conv_layer.hpp - pooling_layer.hpp - recurrent_layer.hpp - lstm_layer.hpp - sparse_bias_layer.hpp - sparse_input_layer.hpp - sparse_output_layer.hpp + concat.hpp + concat_impl.hpp + concat_performance.hpp + concat_performance_impl.hpp + constant.hpp + constant_impl.hpp + convolution.hpp + convolution_impl.hpp + dropconnect.hpp + dropconnect_impl.hpp + dropout.hpp + dropout_impl.hpp + glimpse.hpp + glimpse_impl.hpp + hard_tanh.hpp + hard_tanh_impl.hpp + join.hpp + join_impl.hpp + layer.hpp + layer_traits.hpp + layer_visitor.hpp + layer_visitor_impl.hpp + layer_types.hpp + leaky_relu.hpp + leaky_relu_impl.hpp + linear.hpp + linear_impl.hpp + linear_no_bias.hpp + linear_no_bias_impl.hpp + log_softmax.hpp + log_softmax_impl.hpp + lookup.hpp + lookup_impl.hpp + lstm.hpp + lstm_impl.hpp + max_pooling.hpp + max_pooling_impl.hpp + mean_pooling.hpp + mean_pooling_impl.hpp + mean_squared_error.hpp + mean_squared_error_impl.hpp + multiply_constant.hpp + multiply_constant_impl.hpp + negative_log_likelihood.hpp + negative_log_likelihood_impl.hpp + recurrent.hpp + recurrent_impl.hpp + recurrent_attention.hpp + recurrent_attention_impl.hpp + reinforce_normal.hpp + reinforce_normal_impl.hpp + select.hpp + select_impl.hpp + sequential.hpp + sequential_impl.hpp + vr_class_reward_impl.hpp + vr_class_reward_impl.hpp ) # Add directory name to sources. diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp index be8fc60e31b..1afb1121300 100644 --- a/src/mlpack/methods/ann/layer/add.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -39,10 +39,7 @@ class Add * * @param outSize The number of output units. */ - Add(const size_t outSize) : outSize(outSize) - { - weights.set_size(outSize, 1); - } + Add(const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -52,10 +49,7 @@ class Add * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = input + weights; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -69,10 +63,7 @@ class Add template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -84,10 +75,7 @@ class Add template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -118,10 +106,7 @@ class Add * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of output units. @@ -146,4 +131,7 @@ class Add } // namespace ann } // namespace mlpack +// Include implementation. +#include "add_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/add_impl.hpp b/src/mlpack/methods/ann/layer/add_impl.hpp new file mode 100644 index 00000000000..8e87078dcef --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_impl.hpp @@ -0,0 +1,68 @@ +/** + * @file add_impl.hpp + * @author Marcus Edel + * + * Implementation of the Add class that applies a bias term to the incoming + * data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_IMPL_HPP + +// In case it hasn't yet been included. +#include "add.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Add::Add(const size_t outSize) : + outSize(outSize) +{ + weights.set_size(outSize, 1); +} + +template +template +void Add::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = input + weights; +} + +template +template +void Add::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Add::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = error; +} + +template +template +void Add::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp index 7a01792d250..222c3ef1a2f 100644 --- a/src/mlpack/methods/ann/layer/add_merge.hpp +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -38,10 +38,7 @@ class AddMerge { public: //! Create the AddMerge object. - AddMerge() - { - // Nothing to do here. - } + AddMerge(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -51,15 +48,7 @@ class AddMerge * @param output Resulting output activation. */ template - void Forward(const InputType&& /* input */, OutputType&& output) - { - output = boost::apply_visitor(outputParameterVisitor, network.front()); - - for (size_t i = 1; i < network.size(); ++i) - { - output += boost::apply_visitor(outputParameterVisitor, network[i]); - } - } + void Forward(const InputType&& /* input */, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -73,10 +62,7 @@ class AddMerge template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Add a new module to the model. @@ -120,10 +106,7 @@ class AddMerge * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(network, "network"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: std::vector network; @@ -150,4 +133,7 @@ class AddMerge } // namespace ann } // namespace mlpack +// Include implementation. +#include "add_merge_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp new file mode 100644 index 00000000000..0ef62c2d0ac --- /dev/null +++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp @@ -0,0 +1,61 @@ +/** + * @file add_merge_impl.hpp + * @author Marcus Edel + * + * Definition of the AddMerge module which accumulates the output of the given + * modules. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP + +// In case it hasn't yet been included. +#include "add_merge_impl.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +AddMerge::AddMerge() +{ + // Nothing to do here. +} + +template +template +void AddMerge::Forward( + const InputType&& /* input */, OutputType&& output) +{ + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } +} + +template +template +void AddMerge::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = gy; +} + + +template +template +void AddMerge::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(network, "network"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp index bd836c7e26b..808309f7122 100644 --- a/src/mlpack/methods/ann/layer/concat.hpp +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -45,12 +45,7 @@ class Concat * @param model Expose all network modules. * @param same Merge the error in the backward pass. */ - Concat(const bool model = true, const bool same = true) : - model(model), - same(same) - { - parameters.set_size(0, 0); - } + Concat(const bool model = true, const bool same = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,42 +55,7 @@ class Concat * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - size_t outSize = 0; - - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (boost::apply_visitor( - outputParameterVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - } - } - - output = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - if (elements < outSize) - { - output.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(outputParameterVisitor, network[i])); - } - else - { - output.col(i) = arma::vectorise(boost::apply_visitor( - outputParameterVisitor, network[i])); - } - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -109,67 +69,7 @@ class Concat template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - size_t outSize = 0; - size_t elements = 0; - - for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) - { - elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - arma::mat delta; - if (gy.n_cols == 1) - { - delta = gy.submat(j, 0, j + elements - 1, 0); - } - else - { - delta = gy.submat(0, i, elements - 1, i); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(delta), std::move( - boost::apply_visitor(deltaVisitor, network[i]))), network[i]); - - if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - } - - if (same) - { - if (i == 0) - { - g = std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g += std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - - if (!same) - { - g = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - if (elements < outSize) - { - g.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g.col(i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -181,14 +81,7 @@ class Concat template void Gradient(arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(error)), network[i]); - } - } + arma::Mat&& /* gradient */); /* * Add a new module to the model. @@ -240,6 +133,12 @@ class Concat //! Modify the gradient. arma::mat& Gradient() { return gradient; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -278,8 +177,10 @@ class Concat arma::mat gradient; }; // class Concat - } // namespace ann } // namespace mlpack +// Include implementation. +#include "concat_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/concat_impl.hpp b/src/mlpack/methods/ann/layer/concat_impl.hpp new file mode 100644 index 00000000000..44efe499231 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_impl.hpp @@ -0,0 +1,159 @@ +/** + * @file concat_impl.hpp + * @author Marcus Edel + * + * Implementation of the Concat class, which acts as a concatenation contain. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Concat::Concat( + const bool model, const bool same) : model(model), same(same) +{ + parameters.set_size(0, 0); +} + +template +template +void Concat::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } +} + +template +template +void Concat::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } +} + +template +template +void Concat::Gradient( + arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } +} + +template +template +void Concat::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + + +#endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp index 0f03cbc72e1..39b7e10f126 100644 --- a/src/mlpack/methods/ann/layer/concat_performance.hpp +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -47,12 +47,7 @@ class ConcatPerformance * @param outputLayer Output layer used to evaluate the network. */ ConcatPerformance(const size_t inSize, - OutputLayerType&& outputLayer = OutputLayerType()) : - inSize(inSize), - outputLayer(std::move(outputLayer)) - { - /* Nothing to do here. */ - } + OutputLayerType&& outputLayer = OutputLayerType()); /* * Computes the Negative log likelihood. @@ -61,20 +56,7 @@ class ConcatPerformance * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - const size_t elements = input.n_elem / inSize; - - double output = 0; - for (size_t i = 0; i < input.n_elem; i+= elements) - { - arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); - output += outputLayer.Forward(std::move(subInput), std::move(target)); - } - - return output; - } - + double Forward(const arma::Mat&& input, arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log * likelihood layer expectes that the input contains log-probabilities for @@ -89,28 +71,7 @@ class ConcatPerformance template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - const size_t elements = input.n_elem / inSize; - - arma::mat subInput = input.submat(0, 0, elements - 1, 0); - arma::mat subOutput; - - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output = arma::zeros(subOutput.n_elem, inSize); - output.col(0) = subOutput; - - for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) - { - subInput = input.submat(i, 0, i + elements - 1, 0); - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output.col(j) = subOutput; - } - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -127,6 +88,12 @@ class ConcatPerformance //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored number of inputs. size_t inSize; @@ -144,7 +111,10 @@ class ConcatPerformance OutputDataType outputParameter; }; // class ConcatPerformance -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/concat_performance_impl.hpp b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp new file mode 100644 index 00000000000..f014ac33f01 --- /dev/null +++ b/src/mlpack/methods/ann/layer/concat_performance_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file concat_performance_impl.hpp + * @author Marcus Edel + * + * Implementation of the ConcatPerformance class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONCAT_PERFORMANCE_IMPL_HPP + +// In case it hasn't yet been included. +#include "concat_performance.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::ConcatPerformance(const size_t inSize, OutputLayerType&& outputLayer) : + inSize(inSize), + outputLayer(std::move(outputLayer)) +{ + // Nothing to do here. +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +double ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& target) +{ + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } +} + +template< + typename OutputLayerType, + typename InputDataType, + typename OutputDataType +> +template +void ConcatPerformance< + OutputLayerType, + InputDataType, + OutputDataType +>::Serialize(Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_performance_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp index 58816acfbd3..b24b44aa802 100644 --- a/src/mlpack/methods/ann/layer/constant.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -41,13 +41,7 @@ class Constant * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ - Constant(const size_t outSize, const double scalar) : - inSize(0), - outSize(outSize) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } + Constant(const size_t outSize, const double scalar); /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -57,15 +51,7 @@ class Constant * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - if (inSize == 0) - { - inSize = input.n_elem; - } - - output = constantOutput; - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -76,10 +62,9 @@ class Constant * @param g The calculated gradient. */ template - void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) - { - g = arma::zeros(inSize, 1); - } + void Backward(const DataType&& /* input */, + DataType&& /* gy */, + DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -100,10 +85,7 @@ class Constant * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input units. @@ -125,7 +107,10 @@ class Constant OutputDataType outputParameter; }; // class ConstantLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "constant_impl.hpp" #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/constant_impl.hpp b/src/mlpack/methods/ann/layer/constant_impl.hpp new file mode 100644 index 00000000000..09e0624c6e0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/constant_impl.hpp @@ -0,0 +1,65 @@ +/** + * @file constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the Constant class, which outputs a constant value given + * any input. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Constant::Constant( + const size_t outSize, + const double scalar) : + inSize(0), + outSize(outSize) +{ + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); +} + +template +template +void Constant::Forward( + const InputType&& input, OutputType&& output) +{ + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; +} + +template +template +void Constant::Backward( + const DataType&& /* input */, DataType&& /* gy */, DataType&& g) +{ + g = arma::zeros(inSize, 1); +} + +template +template +void Constant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(constantOutput, "constantOutput"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index 03477c6ec1e..a7f647ed226 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -47,10 +47,7 @@ class Convolution { public: //! Create the Convolution object. - Convolution() - { - /* Nothing to do here. */ - } + Convolution(); /** * Create the Convolution object using the specified number of input maps, @@ -76,33 +73,12 @@ class Convolution const size_t padW = 0, const size_t padH = 0, const size_t inputWidth = 0, - const size_t inputHeight = 0) : - inSize(inSize), - outSize(outSize), - kW(kW), - kH(kH), - dW(dW), - dH(dH), - padW(padW), - padH(padH), - inputWidth(inputWidth), - inputHeight(inputHeight), - outputWidth(0), - outputHeight(0) - { - weights.set_size((outSize * inSize * kW * kH) + outSize, 1); - } + const size_t inputHeight = 0); /* * Set the weight and bias term. */ - void Reset() - { - weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -112,48 +88,7 @@ class Convolution * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); - - if (padW != 0 || padH != 0) - { - Pad(inputTemp, padW, padH, inputPaddedTemp); - } - - size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); - size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); - - outputTemp = arma::zeros >(wConv, hConv, outSize); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat convOutput; - - if (padW != 0 || padH != 0) - { - ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - else - { - ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - - outputTemp.slice(outMap) += convOutput; - } - - outputTemp.slice(outMap) += bias(outMap); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -167,40 +102,7 @@ class Convolution template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), - outputWidth, outputHeight, outSize); - gTemp = arma::zeros >(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weight.slice(outMapIdx), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(mappedError.slice(outMap), - rotatedFilter, output, dW, dH); - - if (padW != 0 || padH != 0) - { - gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, - rotatedFilter.n_cols / 2, - rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, - rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); - } - else - { - gTemp.slice(inMap) += output; - } - } - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -212,74 +114,7 @@ class Convolution template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, - arma::Mat&& gradient) - { - arma::cube mappedError; - if (padW != 0 && padH != 0) - { - mappedError = arma::cube(error.memptr(), outputWidth / padW, - outputHeight / padH, outSize); - } - else - { - mappedError = arma::cube(error.memptr(), outputWidth, - outputHeight, outSize); - } - - gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, - weight.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, - s += outSize) - { - arma::Cube inputSlices; - if (padW != 0 || padH != 0) - { - inputSlices = inputPaddedTemp.slices(inMap, inMap); - } - else - { - inputSlices = inputTemp.slices(inMap, inMap); - } - - arma::Cube deltaSlices = mappedError.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, - output, dW, dH); - - if ((padW != 0 || padH != 0) && - (gradientTemp.n_rows < output.n_rows && - gradientTemp.n_cols < output.n_cols)) - { - for (size_t i = 0; i < output.n_slices; i++) - { - arma::mat subOutput = output.slice(i); - - gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, - subOutput.n_cols / 2, - subOutput.n_rows / 2 + gradientTemp.n_rows - 1, - subOutput.n_cols / 2 + gradientTemp.n_cols - 1); - } - } - else - { - for (size_t i = 0; i < output.n_slices; i++) - { - gradientTemp.slice(s) += output.slice(i); - } - } - } - - gradient.submat(weight.n_elem + outMap, 0, - weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( - outMap, outMap)); - } - - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -330,22 +165,7 @@ class Convolution * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - ar & data::CreateNVP(padW, "padW"); - ar & data::CreateNVP(padH, "padH"); - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inputWidth, "inputWidth"); - ar & data::CreateNVP(inputHeight, "inputHeight"); - ar & data::CreateNVP(outputWidth, "outputWidth"); - ar & data::CreateNVP(outputHeight, "outputHeight"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -515,8 +335,10 @@ class Convolution OutputDataType outputParameter; }; // class Convolution - } // namespace ann } // namespace mlpack +// Include implementation. +#include "convolution_impl.hpp" + #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp new file mode 100644 index 00000000000..99164d18166 --- /dev/null +++ b/src/mlpack/methods/ann/layer/convolution_impl.hpp @@ -0,0 +1,328 @@ +/** + * @file convolution_impl.hpp + * @author Marcus Edel + * + * Implementation of the Convolution module class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_CONVOLUTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "convolution.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution() +{ + // Nothing to do here. +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Convolution( + const size_t inSize, + const size_t outSize, + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const size_t padW, + const size_t padH, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) +{ + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Reset() +{ + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Forward(const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Gradient( + const arma::Mat&& /* input */, + arma::Mat&& error, + arma::Mat&& gradient) +{ + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); +} + +template< + typename ForwardConvolutionRule, + typename BackwardConvolutionRule, + typename GradientConvolutionRule, + typename InputDataType, + typename OutputDataType +> +template +void Convolution< + ForwardConvolutionRule, + BackwardConvolutionRule, + GradientConvolutionRule, + InputDataType, + OutputDataType +>::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); +} + +} // namespace ann +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp index 6180c812572..f74c8e6aaac 100644 --- a/src/mlpack/methods/ann/layer/dropconnect.hpp +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -63,10 +63,7 @@ class DropConnect { public: //! Create the DropConnect object. - DropConnect() - { - /* Nothing to do here. */ - } + DropConnect(); /** * Creates the DropConnect Layer as a Linear Object that takes input size, @@ -78,18 +75,9 @@ class DropConnect */ DropConnect(const size_t inSize, const size_t outSize, - const double ratio = 0.5) : - ratio(ratio), - scale(1.0 / (1 - ratio)), - baseLayer(new Linear(inSize, outSize)) - { - network.push_back(baseLayer); - } + const double ratio = 0.5); - ~DropConnect() - { - boost::apply_visitor(DeleteVisitor(), baseLayer); - } + ~DropConnect(); /** * Ordinary feed forward pass of the DropConnect layer. @@ -98,42 +86,7 @@ class DropConnect * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - } - else - { - // Save weights for denoising. - boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); - - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(denoise.n_rows, denoise.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), - baseLayer); - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - - output = output * scale; - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the DropConnect layer. @@ -145,16 +98,7 @@ class DropConnect template void Backward(arma::Mat&& input, arma::Mat&& gy, - arma::Mat&& g) - { - boost::apply_visitor( - BackwardVisitor( - std::move(input), - std::move(gy), - std::move(g) - ), - baseLayer); - } + arma::Mat&& g); /** * Calculate the gradient using the output delta and the input activation. @@ -166,14 +110,7 @@ class DropConnect template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - baseLayer); - - // Denoise the weights. - boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -219,6 +156,12 @@ class DropConnect scale = 1.0 / (1.0 - ratio); } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! The probability of setting a value to zero. double ratio; @@ -260,4 +203,7 @@ class DropConnect } // namespace ann } // namespace mlpack +// Include implementation. +#include "dropconnect_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/dropconnect_impl.hpp b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp new file mode 100644 index 00000000000..294952b9bcf --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropconnect_impl.hpp @@ -0,0 +1,118 @@ +/** + * @file dropconnect_impl.hpp + * @author Palash Ahuja + * @author Marcus Edel + * + * Implementation of the DropConnect class, which implements a regularizer + * that randomly sets connections to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropconnect.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +DropConnect::DropConnect() +{ + // Nothing to do here. +} + +template +DropConnect::DropConnect( + const size_t inSize, + const size_t outSize, + const double ratio) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) +{ + network.push_back(baseLayer); +} + +template +DropConnect::~DropConnect() +{ + boost::apply_visitor(DeleteVisitor(), baseLayer); +} + +template +template +void DropConnect::Forward( + arma::Mat&& input, + arma::Mat&& output) +{ + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + baseLayer); + + output = output * scale; + } +} + +template +template +void DropConnect::Backward( + arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy), + std::move(g)), baseLayer); +} + +template +template +void DropConnect::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); +} + +template +template +void DropConnect::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(scale, "scale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index b5bfa9a3e2c..4371f21dad3 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -61,14 +61,7 @@ class Dropout * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ - Dropout(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } + Dropout(const double ratio = 0.5, const bool rescale = true); /** * Ordinary feed forward pass of the dropout layer. @@ -77,30 +70,7 @@ class Dropout * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the dropout layer. @@ -112,10 +82,7 @@ class Dropout template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy % mask * scale; - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -156,11 +123,7 @@ class Dropout * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored delta object. @@ -191,4 +154,7 @@ class Dropout } // namespace ann } // namespace mlpack +// Include implementation. +#include "dropout_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/dropout_impl.hpp b/src/mlpack/methods/ann/layer/dropout_impl.hpp new file mode 100644 index 00000000000..b805a94d464 --- /dev/null +++ b/src/mlpack/methods/ann/layer/dropout_impl.hpp @@ -0,0 +1,84 @@ +/** + * @file dropout_impl.hpp + * @author Marcus Edel + * + * Implementation of the Dropout class, which implements a regularizer that + * randomly sets units to zero. Preventing units from co-adapting. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_DROPOUT_IMPL_HPP + +// In case it hasn't yet been included. +#include "dropout.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Dropout::Dropout( + const double ratio, const bool rescale) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) +{ + // Nothing to do here. +} + +template +template +void Dropout::Forward( + const arma::Mat&& input, + arma::Mat&& output) +{ + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } +} + +template +template +void Dropout::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy % mask * scale; +} + +template +template +void Dropout::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp index 37db36b1edc..e007fae30b9 100644 --- a/src/mlpack/methods/ann/layer/glimpse.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -100,16 +100,7 @@ class Glimpse const size_t depth = 3, const size_t scale = 2, const size_t inputWidth = 0, - const size_t inputHeight = 0) : - inSize(inSize), - size(size), - depth(depth), - scale(scale), - inputWidth(inputWidth), - inputHeight(inputHeight) - { - // Nothing to do here. - } + const size_t inputHeight = 0); /** * Ordinary feed forward pass of the glimpse layer. @@ -118,86 +109,7 @@ class Glimpse * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); - outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); - - location = input.submat(0, 1, 1, 1); - - if (!deterministic) - { - locationParameter.push_back(location); - } - - inputDepth = inputTemp.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, - inputTemp.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, - padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, - inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - outputTemp.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); - } - else - { - ReSampling(poolingInput, outputTemp.slice(j)); - } - } - } - } - } - - for (size_t i = 0; i < outputTemp.n_slices; ++i) - { - outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of the glimpse layer. @@ -209,90 +121,7 @@ class Glimpse template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(outputWidth, - outputHeight, 1); - - location = locationParameter.back(); - locationParameter.pop_back(); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - mappedError.slice(s + i) = arma::Mat(gy.memptr(), - outputWidth, outputHeight); - } - } - - gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, - inputTemp.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + - padSize * 2, inputTemp.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputTemp.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - gTemp += inputPadded.tube(padSize, padSize, padSize + - inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); - } - } - - Transform(gTemp); - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -341,6 +170,12 @@ class Glimpse //! Modify the value of the deterministic parameter. bool& Deterministic() { return deterministic; } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: /* * Transform the given input by changing rows to columns. @@ -586,7 +421,10 @@ class Glimpse bool deterministic; }; // class GlimpseLayer -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "glimpse_impl.hpp" #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/glimpse_impl.hpp b/src/mlpack/methods/ann/layer/glimpse_impl.hpp new file mode 100644 index 00000000000..40ec37c932d --- /dev/null +++ b/src/mlpack/methods/ann/layer/glimpse_impl.hpp @@ -0,0 +1,224 @@ +/** + * @file glimpse_impl.hpp + * @author Marcus Edel + * + * Implementation of the GlimpseLayer class, which takes an input image and a + * location to extract a retina-like representation of the input image at + * different increasing scales. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_IMPL_HPP + +// In case it hasn't yet been included. +#include "glimpse.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Glimpse::Glimpse( + const size_t inSize, + const size_t size, + const size_t depth, + const size_t scale, + const size_t inputWidth, + const size_t inputHeight) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) +{ + // Nothing to do here. +} + +template +template +void Glimpse::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; +} + +template +template +void Glimpse::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void Glimpse::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(size, "size"); + ar & data::CreateNVP(depth, "depth"); + ar & data::CreateNVP(scale, "scale"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(location, "location"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index 76b19f964af..88c8ad2d853 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -57,11 +57,7 @@ class HardTanH * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanH(const double maxValue = 1, const double minValue = -1) : - maxValue(maxValue), minValue(minValue) - { - // Nothing to do here. - } + HardTanH(const double maxValue = 1, const double minValue = -1); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -71,15 +67,7 @@ class HardTanH * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - output = input; - for (size_t i = 0; i < input.n_elem; i++) - { - output(i) = (output(i) > maxValue ? maxValue : - (output(i) < minValue ? minValue : output(i))); - } - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -93,17 +81,7 @@ class HardTanH template void Backward(const DataType&& input, DataType&& gy, - DataType&& g) - { - g = gy; - for (size_t i = 0; i < input.n_elem; i++) - { - if (input(i) < minValue || input(i) > maxValue) - { - g(i) = 0; - } - } - } + DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -134,69 +112,9 @@ class HardTanH * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(maxValue, "maxValue"); - ar & data::CreateNVP(minValue, "minValue"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: - /** - * Computes the HardTanH function. - * - * @param x Input data. - * @return f(x). - */ - double Fn(const double x) - { - if (x > maxValue) - return maxValue; - else if (x < minValue) - return minValue; - return x; - } - - /** - * Computes the HardTanH function using a dense matrix as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - - template - void Fn(const arma::Mat& x, arma::Mat& y) - { - y = x; - y.transform( [&](eT val) { return std::min( - std::max( val, minValue ), maxValue ); } ); - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param x Input data. - * @return f'(x) - */ - double Deriv(const double x) - { - return (x > maxValue || x < minValue) ? 0 : 1; - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param y Input activations. - * @param x The resulting derivatives. - */ - template - void Deriv(const InputType&& x, OutputType& y) - { - y = x; - - for (size_t i = 0; i < x.n_elem; i++) - y(i) = Deriv(x(i)); - } - //! Locally-stored delta object. OutputDataType delta; @@ -216,4 +134,7 @@ class HardTanH } // namespace ann } // namespace mlpack +// Include implementation. +#include "hard_tanh_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp new file mode 100644 index 00000000000..55b92812072 --- /dev/null +++ b/src/mlpack/methods/ann/layer/hard_tanh_impl.hpp @@ -0,0 +1,72 @@ +/** + * @file hard_tanh_impl.hpp + * @author Dhawal Arora + * + * Implementation and implementation of the HardTanH layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_IMPL_HPP + +// In case it hasn't yet been included. +#include "hard_tanh.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +HardTanH::HardTanH( + const double maxValue, + const double minValue) : + maxValue(maxValue), + minValue(minValue) +{ + // Nothing to do here. +} + +template +template +void HardTanH::Forward( + const InputType&& input, OutputType&& output) +{ + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } +} + +template +template +void HardTanH::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + g = gy; + for (size_t i = 0; i < input.n_elem; i++) + { + if (input(i) < minValue || input(i) > maxValue) + { + g(i) = 0; + } + } +} + +template +template +void HardTanH::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(maxValue, "maxValue"); + ar & data::CreateNVP(minValue, "minValue"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp index 2933ef181ef..bda31d787ab 100644 --- a/src/mlpack/methods/ann/layer/join.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -34,10 +34,7 @@ class Join { public: //! Create the Join object. - Join() - { - // Nothing to do here. - } + Join(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -47,12 +44,7 @@ class Join * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - inSizeRows = input.n_rows; - inSizeCols = input.n_cols; - output = arma::vectorise(input); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -66,10 +58,7 @@ class Join template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -90,11 +79,7 @@ class Join * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSizeRows, "inSizeRows"); - ar & data::CreateNVP(inSizeCols, "inSizeCols"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input rows. @@ -116,4 +101,7 @@ class Join } // namespace ann } // namespace mlpack +// Include implementation. +#include "join_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/join_impl.hpp b/src/mlpack/methods/ann/layer/join_impl.hpp new file mode 100644 index 00000000000..47584324871 --- /dev/null +++ b/src/mlpack/methods/ann/layer/join_impl.hpp @@ -0,0 +1,60 @@ +/** + * @file join_impl.hpp + * @author Marcus Edel + * + * Implementation of the Join module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_JOIN_IMPL_HPP + +// In case it hasn't yet been included. +#include "join.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Join::Join() +{ + // Nothing to do here. +} + +template +template +void Join::Forward( + const InputType&& input, OutputType&& output) +{ + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); +} + +template +template +void Join::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); +} + +template +template +void Join::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp index 8e69712b7f9..d8160f1a50b 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -2,8 +2,8 @@ * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLU layer first introduced - * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * Definition of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * * mlpack is free software; you may redistribute it and/or modify it under the @@ -51,10 +51,7 @@ class LeakyReLU * * @param alpha Non zero gradient */ - LeakyReLU(const double alpha = 0.03) : alpha(alpha) - { - // Nothing to do here. - } + LeakyReLU(const double alpha = 0.03); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,10 +61,7 @@ class LeakyReLU * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - Fn(input, output); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -79,12 +73,7 @@ class LeakyReLU * @param g The calculated gradient. */ template - void Backward(const DataType&& input, DataType&& gy, DataType&& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } + void Backward(const DataType&& input, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -110,10 +99,7 @@ class LeakyReLU * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(alpha, "alpha"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: /** @@ -185,4 +171,7 @@ class LeakyReLU } // namespace ann } // namespace mlpack +// Include implementation. +#include "leaky_relu_impl.hpp" + #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp new file mode 100644 index 00000000000..24bc13d9769 --- /dev/null +++ b/src/mlpack/methods/ann/layer/leaky_relu_impl.hpp @@ -0,0 +1,60 @@ +/** + * @file leaky_relu_impl.hpp + * @author Dhawal Arora + * + * Implementation of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_IMPL_HPP + +// In case it hasn't yet been included. +#include "leaky_relu.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LeakyReLU::LeakyReLU( + const double alpha) : alpha(alpha) +{ + // Nothing to do here. +} + +template +template +void LeakyReLU::Forward( + const InputType&& input, OutputType&& output) +{ + Fn(input, output); +} + +template +template +void LeakyReLU::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + DataType derivative; + Deriv(input, derivative); + g = gy % derivative; +} + +template +template +void LeakyReLU::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(alpha, "alpha"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear.hpp b/src/mlpack/methods/ann/layer/linear.hpp index d7f8e6f9982..0f6be78b647 100644 --- a/src/mlpack/methods/ann/layer/linear.hpp +++ b/src/mlpack/methods/ann/layer/linear.hpp @@ -37,7 +37,7 @@ class Linear { public: //! Create the Linear object. - Linear() {} + Linear(); /** * Create the Linear layer object using the specified number of units. @@ -45,22 +45,12 @@ class Linear * @param inSize The number of input units. * @param outSize The number of output units. */ - Linear(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize + outSize, 1); - } + Linear(const size_t inSize, const size_t outSize);; /* * Reset the layer parameter. */ - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -70,10 +60,7 @@ class Linear * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = (weight * input) + bias; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -85,12 +72,9 @@ class Linear * @param g The calculated gradient. */ template - void Backward(const arma::Mat&& /* unused */, + void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = weight.t() * gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -102,12 +86,7 @@ class Linear template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -138,12 +117,7 @@ class Linear * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored number of input units. @@ -177,4 +151,7 @@ class Linear } // namespace ann } // namespace mlpack +// Include implementation. +#include "linear_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/linear_impl.hpp b/src/mlpack/methods/ann/layer/linear_impl.hpp new file mode 100644 index 00000000000..8b726e5d255 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_impl.hpp @@ -0,0 +1,87 @@ +/** + * @file linear_impl.hpp + * @author Marcus Edel + * + * Implementation of the Linear layer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Linear::Linear() +{ + // Nothing to do here. +} + +template +Linear::Linear( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize + outSize, 1); +} + +template +void Linear::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); +} + +template +template +void Linear::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = (weight * input) + bias; +} + +template +template +void Linear::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void Linear::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; +} + +template +template +void Linear::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp index 92064727822..972f72db8ec 100644 --- a/src/mlpack/methods/ann/layer/linear_no_bias.hpp +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -37,27 +37,19 @@ class LinearNoBias { public: //! Create the LinearNoBias object. - LinearNoBias() {} + LinearNoBias(); /** * Create the LinearNoBias object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ - LinearNoBias(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize, 1); - } + LinearNoBias(const size_t inSize, const size_t outSize); /* * Reset the layer parameter. */ - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - } + void Reset(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -67,10 +59,7 @@ class LinearNoBias * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weight * input; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -84,10 +73,7 @@ class LinearNoBias template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - g = weight.t() * gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -99,11 +85,7 @@ class LinearNoBias template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -134,12 +116,7 @@ class LinearNoBias * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -171,4 +148,7 @@ class LinearNoBias } // namespace ann } // namespace mlpack +// Include implementation. +#include "linear_no_bias_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp new file mode 100644 index 00000000000..15f96cdb537 --- /dev/null +++ b/src/mlpack/methods/ann/layer/linear_no_bias_impl.hpp @@ -0,0 +1,83 @@ +/** + * @file linear_no_bias_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearNoBias class also known as fully-connected layer + * or affine transformation without the bias term. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LINEAR_NO_BIAS_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear_no_bias.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LinearNoBias::LinearNoBias() +{ + // Nothing to do here. +} + +template +LinearNoBias::LinearNoBias( + const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize * inSize, 1); +} + +template +void LinearNoBias::Reset() +{ + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); +} + +template +template +void LinearNoBias::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weight * input; +} + +template +template +void LinearNoBias::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + g = weight.t() * gy; +} + +template +template +void LinearNoBias::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); +} + +template +template +void LinearNoBias::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp index 95a79c97b91..df9872d6c8a 100644 --- a/src/mlpack/methods/ann/layer/log_softmax.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -39,7 +39,7 @@ class LogSoftMax /** * Create the LogSoftmax object. */ - LogSoftMax() { /* Nothing to do here. */ } + LogSoftMax(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -49,38 +49,7 @@ class LogSoftMax * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -94,10 +63,7 @@ class LogSoftMax template void Backward(const arma::Mat&& input, arma::Mat&& gy, - arma::Mat&& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -114,6 +80,12 @@ class LogSoftMax //! Modify the delta. InputDataType& Delta() { return delta; } + /** + * Serialize the layer. + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -125,7 +97,10 @@ class LogSoftMax OutputDataType outputParameter; }; // class LogSoftmax -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "log_softmax_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax_impl.hpp b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp new file mode 100644 index 00000000000..68fba8d8459 --- /dev/null +++ b/src/mlpack/methods/ann/layer/log_softmax_impl.hpp @@ -0,0 +1,85 @@ +/** + * @file log_softmax_impl.hpp + * @author Marcus Edel + * + * Implementation of the LogSoftmax class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_IMPL_HPP + +// In case it hasn't yet been included. +#include "log_softmax.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LogSoftMax::LogSoftMax() +{ + // Nothing to do here. +} + +template +template +void LogSoftMax::Forward( + const InputType&& input, OutputType&& output) +{ + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); +} + +template +template +void LogSoftMax::Backward( + const arma::Mat&& input, + arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy - arma::exp(input) * arma::accu(gy); +} + +template +template +void LogSoftMax::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp index 0f9f0b4978d..080d4b5746e 100644 --- a/src/mlpack/methods/ann/layer/lookup.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -42,12 +42,7 @@ class Lookup * @param inSize The number of input units. * @param outSize The number of output units. */ - Lookup(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } + Lookup(const size_t inSize, const size_t outSize); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -57,10 +52,7 @@ class Lookup * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weights.cols(arma::conv_to::from(input) - 1); - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -74,10 +66,7 @@ class Lookup template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, - arma::Mat&& g) - { - g = gy; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -89,11 +78,7 @@ class Lookup template void Gradient(const arma::Mat&& input, arma::Mat&& error, - arma::Mat&& gradient) - { - gradient = arma::zeros >(weights.n_rows, weights.n_cols); - gradient.cols(arma::conv_to::from(input) - 1) = error; - } + arma::Mat&& gradient); //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -124,12 +109,7 @@ class Lookup * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -158,4 +138,7 @@ class Lookup } // namespace ann } // namespace mlpack +// Include implementation. +#include "lookup_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/lookup_impl.hpp b/src/mlpack/methods/ann/layer/lookup_impl.hpp new file mode 100644 index 00000000000..cbe24468ed3 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lookup_impl.hpp @@ -0,0 +1,74 @@ +/** + * @file lookup_impl.hpp + * @author Marcus Edel + * + * Implementation of the Lookup class a particular convolution, where the width + * of the convolution is 1. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LOOKUP_IMPL_HPP + +// In case it hasn't yet been included. +#include "lookup.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Lookup::Lookup( + const size_t inSize, + const size_t outSize) : + inSize(inSize), + outSize(outSize) +{ + weights.set_size(outSize, inSize); +} + +template +template +void Lookup::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + output = weights.cols(arma::conv_to::from(input) - 1); +} + +template +template +void Lookup::Backward( + const arma::Mat&& /* input */, + const arma::Mat&& gy, + arma::Mat&& g) +{ + g = gy; +} + +template +template +void Lookup::Gradient( + const arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& gradient) +{ + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; +} + +template +template +void Lookup::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp index 6cd9dc630fb..cf70e344d94 100644 --- a/src/mlpack/methods/ann/layer/lstm.hpp +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -44,7 +44,7 @@ class LSTM { public: //! Create the LSTM object. - LSTM() { /* Nothing to do here */ } + LSTM(); /** * Create the LSTM layer object using the specified parameters. @@ -53,42 +53,7 @@ class LSTM * @param outSize The number of output units. * @param rho Maximum number of steps to backpropagate through time (BPTT). */ - LSTM(const size_t inSize, const size_t outSize, const size_t rho) : - inSize(inSize), - outSize(outSize), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - { - input2GateModule = new Linear<>(inSize, 4 * outSize); - output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); - - network.push_back(input2GateModule); - network.push_back(output2GateModule); - - inputGateModule = new SigmoidLayer<>(); - hiddenStateModule = new TanHLayer<>(); - forgetGateModule = new SigmoidLayer<>(); - outputGateModule = new SigmoidLayer<>(); - - network.push_back(inputGateModule); - network.push_back(hiddenStateModule); - network.push_back(forgetGateModule); - network.push_back(outputGateModule); - - cellModule = new IdentityLayer<>(); - cellActivationModule = new TanHLayer<>(); - - network.push_back(cellModule); - network.push_back(cellActivationModule); - - prevOutput = arma::zeros(outSize, 1); - prevCell = arma::zeros(outSize, 1); - prevError = arma::zeros(4 * outSize, 1); - cellActivationError = arma::zeros(outSize, 1); - } + LSTM(const size_t inSize, const size_t outSize, const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -98,112 +63,7 @@ class LSTM * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - cellParameter.push_back(prevCell); - outParameter.push_back(prevOutput); - } - - arma::mat output1; - arma::mat output2; - arma::mat output3; - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(prevOutput), - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)) - ), - output2GateModule); - - output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + - boost::apply_visitor(outputParameterVisitor, output2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(0, 0, 1 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)) - ), - outputGateModule); - - arma::mat cell = prevCell; - - // Input gate * hidden state. - arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % boost::apply_visitor(outputParameterVisitor, - hiddenStateModule); - - // Forget gate * cell. - arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cell; - - arma::mat nextCell = cmul1 + cmul2; - - boost::apply_visitor( - ForwardVisitor( - std::move(nextCell), - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) - ), - cellModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)) - ), - cellActivationModule); - - output = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % boost::apply_visitor(outputParameterVisitor, - outputGateModule); - - prevCell = nextCell; - prevOutput = output; - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - prevOutput.zeros(); - prevCell.zeros(); - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -217,121 +77,7 @@ class LSTM template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (backwardStep > 0) - { - gy += boost::apply_visitor(deltaVisitor, output2GateModule); - } - - arma::mat g1 = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % gy; - - arma::mat g2 = boost::apply_visitor(outputParameterVisitor, - outputGateModule) % gy; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)), - std::move(g2), - std::move(boost::apply_visitor(deltaVisitor, - cellActivationModule)) - ), - cellActivationModule); - - cellActivationError = boost::apply_visitor(deltaVisitor, - cellActivationModule); - - if (backwardStep > 0) - { - cellActivationError += forgetGateError; - } - - arma::mat g4 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % cellActivationError; - - arma::mat g5 = boost::apply_visitor(outputParameterVisitor, - hiddenStateModule) % cellActivationError; - - forgetGateError = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cellActivationError; - - arma::mat g7 = cellParameter[cellParameter.size() - - backwardStep - 1] % cellActivationError; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)), - std::move(g5), - std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)), - std::move(g4), - std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)), - std::move(g7), - std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)), - std::move(g1), - std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) - ), - outputGateModule); - - prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, inputGateModule); - prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, hiddenStateModule); - prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, forgetGateModule); - prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, outputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) - ), - output2GateModule); - - backwardStep++; - if (backwardStep == rho) - { - backwardStep = 0; - cellParameter.clear(); - } - - g = boost::apply_visitor(deltaVisitor, input2GateModule); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -343,29 +89,7 @@ class LSTM template void Gradient(arma::Mat&& input, arma::Mat&& /* error */, - arma::Mat&& /* gradient */) - { - boost::apply_visitor( - GradientVisitor( - std::move(input), - std::move(prevError) - ), - input2GateModule); - - boost::apply_visitor( - GradientVisitor( - std::move(outParameter[outParameter.size() - gradientStep - 1]), - std::move(prevError) - ), - output2GateModule); - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - outParameter.clear(); - } - } + arma::Mat&& /* gradient */); //! The value of the deterministic parameter. bool Deterministic() const { return deterministic; } @@ -409,13 +133,7 @@ class LSTM * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(rho, "rho"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -513,4 +231,7 @@ class LSTM } // namespace ann } // namespace mlpack +// Include implementation. +#include "lstm_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/lstm_impl.hpp b/src/mlpack/methods/ann/layer/lstm_impl.hpp new file mode 100644 index 00000000000..6ebe89b2907 --- /dev/null +++ b/src/mlpack/methods/ann/layer/lstm_impl.hpp @@ -0,0 +1,273 @@ +/** + * @file lstm_impl.hpp + * @author Marcus Edel + * + * Implementation of the LSTM class, which implements a lstm network + * layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_LSTM_IMPL_HPP + +// In case it hasn't yet been included. +#include "linear.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +LSTM::LSTM() +{ + // Nothing to do here. +} + +template +LSTM::LSTM( + const size_t inSize, + const size_t outSize, + const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); +} + +template +template +void LSTM::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(prevOutput), std::move( + boost::apply_visitor(outputParameterVisitor, output2GateModule))), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 0, 0, 1 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule))), inputGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 1 * outSize, 0, 2 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule))), hiddenStateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 2 * outSize, 0, 3 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule))), forgetGateModule); + + boost::apply_visitor(ForwardVisitor(std::move(output.submat( + 3 * outSize, 0, 4 * outSize - 1, 0)), std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule))), outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor(ForwardVisitor(std::move(nextCell), std::move( + boost::apply_visitor(outputParameterVisitor, cellModule))), cellModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellModule)), std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule))), cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } +} + +template +template +void LSTM::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, cellActivationModule)), std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, cellActivationModule))), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputGateModule)), std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule))), + inputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, hiddenStateModule)), std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule))), + hiddenStateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, forgetGateModule)), std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule))), + forgetGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, outputGateModule)), std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule))), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, input2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule))), + input2GateModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, output2GateModule)), std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule))), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); +} + +template +template +void LSTM::Gradient( + arma::Mat&& input, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(prevError)), + input2GateModule); + + boost::apply_visitor(GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError)), output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } +} + +template +template +void LSTM::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp index e93077ebbad..6bf179b7a5a 100644 --- a/src/mlpack/methods/ann/layer/max_pooling.hpp +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -53,11 +53,8 @@ template < class MaxPooling { public: - //! Create the PoolingLayer object. - MaxPooling() - { - /* Nothing to do here */ - } + //! Create the MaxPooling object. + MaxPooling(); /** * Create the MaxPooling object using the specified number of units. @@ -69,25 +66,10 @@ class MaxPooling * @param floor Rounding operator (floor or ceil). */ MaxPooling(const size_t kW, - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - reset(false), - floor(floor), - offset(0), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - deterministic(false) - { - /* Nothing to do here. */ - } + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -97,63 +79,7 @@ class MaxPooling * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - const size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - if (!deterministic) - { - poolingIndices.push_back(outputTemp); - } - - if (!reset) - { - size_t elements = inputWidth * inputHeight; - indicesCol = arma::linspace >(0, (elements - 1), - elements); - - indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); - - reset = true; - } - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - if (!deterministic) - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - poolingIndices.back().slice(s)); - } - else - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - inputTemp.slice(s)); - } - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -167,24 +93,7 @@ class MaxPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(mappedError.slice(s), gTemp.slice(s), - poolingIndices.back().slice(s)); - } - - poolingIndices.pop_back(); - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -230,13 +139,7 @@ class MaxPooling * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -368,8 +271,10 @@ class MaxPooling std::vector poolingIndices; }; // class MaxPooling - } // namespace ann } // namespace mlpack -#endif \ No newline at end of file +// Include implementation. +#include "max_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/max_pooling_impl.hpp b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp new file mode 100644 index 00000000000..95aeea86049 --- /dev/null +++ b/src/mlpack/methods/ann/layer/max_pooling_impl.hpp @@ -0,0 +1,149 @@ +/** + * @file max_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MaxPooling class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MAX_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "max_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MaxPooling::MaxPooling() +{ + // Nothing to do here. +} + +template +MaxPooling::MaxPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) +{ + // Nothing to do here. +} + +template +template +void MaxPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MaxPooling::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MaxPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp index e0c097f6ddc..70e061c7e2b 100644 --- a/src/mlpack/methods/ann/layer/mean_pooling.hpp +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -34,10 +34,7 @@ class MeanPooling { public: //! Create the MeanPooling object. - MeanPooling() - { - /* Nothing to do here */ - } + MeanPooling(); /** * Create the MeanPooling object using the specified number of units. @@ -48,26 +45,10 @@ class MeanPooling * @param dH Width of the stride operation. */ MeanPooling(const size_t kW, - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - reset(false), - floor(floor), - deterministic(false), - offset(0) - - { - /* Nothing to do here. */ - } + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -77,41 +58,7 @@ class MeanPooling * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - - Pooling(inputTemp.slice(s), outputTemp.slice(s)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -125,21 +72,7 @@ class MeanPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } + arma::Mat&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -185,13 +118,7 @@ class MeanPooling * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: @@ -319,4 +246,7 @@ class MeanPooling } // namespace ann } // namespace mlpack -#endif \ No newline at end of file +// Include implementation. +#include "mean_pooling_impl.hpp" + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp new file mode 100644 index 00000000000..5008763752b --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_pooling_impl.hpp @@ -0,0 +1,126 @@ +/** + * @file mean_pooling_impl.hpp + * @author Marcus Edel + * @author Nilay Jain + * + * Implementation of the MeanPooling layer class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_pooling.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanPooling::MeanPooling() +{ + // Nothing to do here. +} + +template +MeanPooling::MeanPooling( + const size_t kW, + const size_t kH, + const size_t dW, + const size_t dH, + const bool floor) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) +{ + // Nothing to do here. +} + +template +template +void MeanPooling::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; +} + +template +template +void MeanPooling::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); +} + +template +template +void MeanPooling::Serialize( + Archive& ar, + const unsigned int /* version */) +{ + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp index 6abdc15c21c..280b6e79b11 100644 --- a/src/mlpack/methods/ann/layer/mean_squared_error.hpp +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -2,7 +2,7 @@ * @file mean_squared_error.hpp * @author Marcus Edel * - * Definition and implementation of the mean squared error performance function. + * Definition of the mean squared error performance function. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -37,7 +37,7 @@ class MeanSquaredError /** * Create the MeanSquaredError object. */ - MeanSquaredError() { /* Nothing to do here. */ } + MeanSquaredError(); /* * Computes the mean squared error function. @@ -46,11 +46,7 @@ class MeanSquaredError * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - return arma::mean(arma::mean(arma::square(input - target))); - } - + double Forward(const arma::Mat&& input, const arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. * @@ -61,10 +57,7 @@ class MeanSquaredError template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = (input - target); - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -81,6 +74,12 @@ class MeanSquaredError //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -92,7 +91,10 @@ class MeanSquaredError OutputDataType outputParameter; }; // class MeanSquaredError -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "mean_squared_error_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp new file mode 100644 index 00000000000..037a9445295 --- /dev/null +++ b/src/mlpack/methods/ann/layer/mean_squared_error_impl.hpp @@ -0,0 +1,57 @@ +/** + * @file mean_squared_error_impl.hpp + * @author Marcus Edel + * + * Implementation of the mean squared error performance function. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MEAN_SQUARED_ERROR_IMPL_HPP + +// In case it hasn't yet been included. +#include "mean_squared_error.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MeanSquaredError::MeanSquaredError() +{ + // Nothing to do here. +} + +template +template +double MeanSquaredError::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + return arma::mean(arma::mean(arma::square(input - target))); +} + +template +template +void MeanSquaredError::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = (input - target); +} + +template +template +void MeanSquaredError::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp index 2caa8a37a35..b2985b27b83 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -32,10 +32,7 @@ class MultiplyConstant /** * Create the MultiplyConstant object. */ - MultiplyConstant(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } + MultiplyConstant(const double scalar); /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -45,10 +42,7 @@ class MultiplyConstant * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - output = input * scalar; - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network. The backward pass @@ -59,10 +53,7 @@ class MultiplyConstant * @param g The calculated gradient. */ template - void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) - { - g = gy * scalar; - } + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -83,10 +74,7 @@ class MultiplyConstant * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored constant scalar value. @@ -102,7 +90,10 @@ class MultiplyConstant OutputDataType outputParameter; }; // class MultiplyConstant -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "multiply_constant_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp new file mode 100644 index 00000000000..07dc4799779 --- /dev/null +++ b/src/mlpack/methods/ann/layer/multiply_constant_impl.hpp @@ -0,0 +1,51 @@ +/** + * @file multiply_constant_impl.hpp + * @author Marcus Edel + * + * Implementation of the MultiplyConstantLayer class, which multiplies the + * input by a (non-learnable) constant. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_IMPL_HPP + +// In case it hasn't yet been included. +#include "multiply_constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +MultiplyConstant::MultiplyConstant( + const double scalar) : scalar(scalar) +{ + // Nothing to do here. +} + +template +template +void MultiplyConstant::Forward( + const InputType&& input, OutputType&& output) +{ + output = input * scalar; +} + +template +template +void MultiplyConstant::Backward( + const DataType&& /* input */, DataType&& gy, DataType&& g) +{ + g = gy * scalar; +} + +template +template +void MultiplyConstant::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(scalar, "scalar"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 0de8cb7cd5b..84c6a2d7904 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -38,7 +38,7 @@ class NegativeLogLikelihood /** * Create the NegativeLogLikelihoodLayer object. */ - NegativeLogLikelihood() { /* Nothing to do here. */ } + NegativeLogLikelihood(); /* * Computes the Negative log likelihood. @@ -47,21 +47,7 @@ class NegativeLogLikelihood * @param output Resulting output activation. */ template - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - return output; - } + double Forward(const arma::Mat&& input, arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log @@ -77,18 +63,7 @@ class NegativeLogLikelihood template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -105,6 +80,12 @@ class NegativeLogLikelihood //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored delta object. OutputDataType delta; @@ -116,7 +97,10 @@ class NegativeLogLikelihood OutputDataType outputParameter; }; // class NegativeLogLikelihood -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "negative_log_likelihood_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp new file mode 100644 index 00000000000..2ec7799efe0 --- /dev/null +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp @@ -0,0 +1,76 @@ +/** + * @file negative_log_likelihood_impl.hpp + * @author Marcus Edel + * + * Implementation of the NegativeLogLikelihood class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_IMPL_HPP + +// In case it hasn't yet been included. +#include "negative_log_likelihood.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +NegativeLogLikelihood::NegativeLogLikelihood() +{ + // Nothing to do here. +} + +template +template +double NegativeLogLikelihood::Forward( + const arma::Mat&& input, arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + return output; +} + +template +template +void NegativeLogLikelihood::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } +} + +template +template +void NegativeLogLikelihood::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp index 5870071faef..297127a6ae8 100644 --- a/src/mlpack/methods/ann/layer/recurrent.hpp +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -56,41 +56,7 @@ class Recurrent const InputModuleType& input, const FeedbackModuleType& feedback, const TransferModuleType& transfer, - const size_t rho) : - startModule(new StartModuleType(start)), - inputModule(new InputModuleType(input)), - feedbackModule(new FeedbackModuleType(feedback)), - transferModule(new TransferModuleType(transfer)), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - - { - initialModule = new Sequential<>(); - mergeModule = new AddMerge<>(); - recurrentModule = new Sequential<>(false); - - boost::apply_visitor(AddVisitor(inputModule), initialModule); - boost::apply_visitor(AddVisitor(startModule), initialModule); - boost::apply_visitor(AddVisitor(transferModule), initialModule); - - boost::apply_visitor(weightSizeVisitor, startModule); - boost::apply_visitor(weightSizeVisitor, inputModule); - boost::apply_visitor(weightSizeVisitor, feedbackModule); - boost::apply_visitor(weightSizeVisitor, transferModule); - - boost::apply_visitor(AddVisitor(inputModule), mergeModule); - boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); - boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); - boost::apply_visitor(AddVisitor(transferModule), recurrentModule); - - network.push_back(initialModule); - network.push_back(mergeModule); - network.push_back(feedbackModule); - network.push_back(recurrentModule); - } + const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -100,48 +66,7 @@ class Recurrent * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - initialModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, inputModule))), - inputModule); - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, transferModule)), std::move( - boost::apply_visitor(outputParameterVisitor, feedbackModule))), - feedbackModule); - - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - recurrentModule); - } - - output = boost::apply_visitor(outputParameterVisitor, transferModule); - - // Save the feedback output parameter when training the module. - if (!deterministic) - { - feedbackOutputParameter.push_back(output); - } - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - backwardStep = 0; - - if (!recurrentError.is_empty()) - { - recurrentError.zeros(); - } - } - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -155,44 +80,7 @@ class Recurrent template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (!recurrentError.is_empty()) - { - recurrentError += gy; - } - else - { - recurrentError = gy; - } - - if (backwardStep < (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, recurrentModule)), std::move(recurrentError), - std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), - recurrentModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, inputModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), - inputModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, feedbackModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( - boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, initialModule)), std::move(recurrentError), - std::move(g)), initialModule); - } - - recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); - backwardStep++; - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -204,38 +92,7 @@ class Recurrent template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - if (gradientStep < (rho - 1)) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - recurrentModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); - - boost::apply_visitor(GradientVisitor(std::move( - feedbackOutputParameter[feedbackOutputParameter.size() - 2 - - gradientStep]), std::move(boost::apply_visitor(deltaVisitor, - mergeModule))), feedbackModule); - } - else - { - boost::apply_visitor(GradientZeroVisitor(), recurrentModule); - boost::apply_visitor(GradientZeroVisitor(), inputModule); - boost::apply_visitor(GradientZeroVisitor(), feedbackModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, startModule))), initialModule); - } - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - feedbackOutputParameter.clear(); - } - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -274,10 +131,7 @@ class Recurrent * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Locally-stored start module. @@ -353,4 +207,7 @@ class Recurrent } // namespace ann } // namespace mlpack +// Include implementation. +#include "recurrent_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp index 1d1405d8863..ffb7320b232 100644 --- a/src/mlpack/methods/ann/layer/recurrent_attention.hpp +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -62,18 +62,7 @@ class RecurrentAttention RecurrentAttention(const size_t outSize, const RNNModuleType& rnn, const ActionModuleType& action, - const size_t rho) : - outSize(outSize), - rnnModule(new RNNModuleType(rnn)), - actionModule(new ActionModuleType(action)), - rho(rho), - forwardStep(0), - backwardStep(0), - deterministic(false) - { - network.push_back(rnnModule); - network.push_back(actionModule); - } + const size_t rho); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -83,57 +72,7 @@ class RecurrentAttention * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // Initialize the action input. - if (initialInput.is_empty()) - { - initialInput = arma::zeros(outSize, input.n_cols); - } - - // Propagate through the action and recurrent module. - for (forwardStep = 0; forwardStep < rho; ++forwardStep) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( - boost::apply_visitor(outputParameterVisitor, actionModule))), - actionModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( - outputParameterVisitor, actionModule))), actionModule); - } - - // Initialize the glimpse input. - arma::mat glimpseInput = arma::zeros(input.n_elem, 2); - glimpseInput.col(0) = input; - glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, - actionModule).n_elem - 1, 1) = boost::apply_visitor( - outputParameterVisitor, actionModule); - - boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), - std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), - rnnModule); - - // Save the output parameter when training the module. - if (!deterministic) - { - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(SaveOutputParameterVisitor( - std::move(moduleOutputParameter)), network[l]); - } - } - } - - output = boost::apply_visitor(outputParameterVisitor, rnnModule); - - forwardStep = 0; - backwardStep = 0; - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -147,81 +86,7 @@ class RecurrentAttention template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (intermediateGradient.is_empty() && backwardStep == 0) - { - // Initialize the attention gradients. - size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + - boost::apply_visitor(weightSizeVisitor, actionModule); - - intermediateGradient = arma::zeros(weights, 1); - attentionGradient = arma::zeros(weights, 1); - - // Initialize the action error. - actionError = arma::zeros( - boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, - boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); - } - - // Propagate the attention gradients. - if (backwardStep == 0) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), rnnModule); - boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), actionModule); - - attentionGradient.zeros(); - } - - // Back-propagate through time. - for (; backwardStep < rho; backwardStep++) - { - if (backwardStep == 0) - { - recurrentError = gy; - } - else - { - recurrentError = actionDelta; - } - - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(LoadOutputParameterVisitor( - std::move(moduleOutputParameter)), network[network.size() - 1 - l]); - } - - if (backwardStep == (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, actionModule)), std::move(actionError), - std::move(actionDelta)), actionModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(initialInput), - std::move(actionError), std::move(actionDelta)), actionModule); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(recurrentError), - std::move(rnnDelta)), rnnModule); - - if (backwardStep == 0) - { - g = rnnDelta.col(1); - } - else - { - g += rnnDelta.col(1); - } - - IntermediateGradient(); - } - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -233,14 +98,7 @@ class RecurrentAttention template void Gradient(arma::Mat&& /* input */, arma::Mat&& /* error */, - arma::Mat&& /* gradient */) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), rnnModule); - boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), actionModule); - } + arma::Mat&& /* gradient */); //! Get the model modules. std::vector& Model() { return network; } @@ -279,13 +137,7 @@ class RecurrentAttention * Serialize the layer */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(forwardStep, "forwardStep"); - ar & data::CreateNVP(backwardStep, "backwardStep"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: //! Calculate the gradient of the attention module. @@ -405,4 +257,7 @@ class RecurrentAttention } // namespace ann } // namespace mlpack +// Include implementation. +#include "recurrent_attention_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp new file mode 100644 index 00000000000..6642894fa88 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_attention_impl.hpp @@ -0,0 +1,204 @@ +/** + * @file recurrent_attention_impl.hpp + * @author Marcus Edel + * + * Implementation of the RecurrentAttention class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_ATTENTION_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent_attention.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template +RecurrentAttention::RecurrentAttention( + const size_t outSize, + const RNNModuleType& rnn, + const ActionModuleType& action, + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) +{ + network.push_back(rnnModule); + network.push_back(actionModule); +} + +template +template +void RecurrentAttention::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; +} + +template +template +void RecurrentAttention::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } +} + +template +template +void RecurrentAttention::Gradient( + arma::Mat&& /* input */, + arma::Mat&& /* error */, + arma::Mat&& /* gradient */) +{ + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); +} + +template +template +void RecurrentAttention::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/recurrent_impl.hpp b/src/mlpack/methods/ann/layer/recurrent_impl.hpp new file mode 100644 index 00000000000..8f1525c4551 --- /dev/null +++ b/src/mlpack/methods/ann/layer/recurrent_impl.hpp @@ -0,0 +1,206 @@ +/** + * @file recurrent_impl.hpp + * @author Marcus Edel + * + * Implementation of the LinearLayer class also known as fully-connected layer + * or affine transformation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_RECURRENT_IMPL_HPP + +// In case it hasn't yet been included. +#include "recurrent.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +template< + typename StartModuleType, + typename InputModuleType, + typename FeedbackModuleType, + typename TransferModuleType +> +Recurrent::Recurrent( + const StartModuleType& start, + const InputModuleType& input, + const FeedbackModuleType& feedback, + const TransferModuleType& transfer, + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) +{ + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); +} + +template +template +void Recurrent::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } +} + +template +template +void Recurrent::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; +} + +template +template +void Recurrent::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } +} + +template +template +void Recurrent::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(rho, "rho"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp index bc938d1a766..fd192f0a1f2 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -34,10 +34,7 @@ class ReinforceNormal * * @param stdev Standard deviation used during the forward and backward pass. */ - ReinforceNormal(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } + ReinforceNormal(const double stdev); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -47,22 +44,7 @@ class ReinforceNormal * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - - moduleInputParameter.push_back(input); - } - else - { - // Use maximum a posteriori. - output = input; - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -74,17 +56,7 @@ class ReinforceNormal * @param g The calculated gradient. */ template - void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) - { - g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= reward; - g *= -1; - - moduleInputParameter.pop_back(); - } - + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -111,6 +83,12 @@ class ReinforceNormal //! Modify the value of the deterministic parameter. double& Reward() { return reward; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -134,7 +112,10 @@ class ReinforceNormal bool deterministic; }; // class ReinforceNormal -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "reinforce_normal_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp new file mode 100644 index 00000000000..1eaa25d8fae --- /dev/null +++ b/src/mlpack/methods/ann/layer/reinforce_normal_impl.hpp @@ -0,0 +1,69 @@ +/** + * @file reinforce_normal_impl.hpp + * @author Marcus Edel + * + * Implementation of the ReinforceNormalLayer class, which implements the + * REINFORCE algorithm for the normal distribution. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "reinforce_normal.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +ReinforceNormal::ReinforceNormal( + const double stdev) : stdev(stdev) +{ + // Nothing to do here. +} + +template +template +void ReinforceNormal::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } +} + +template +template +void ReinforceNormal::Backward( + const DataType&& input, DataType&& /* gy */, DataType&& g) +{ + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); +} + +template +template +void ReinforceNormal::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index d683830cc88..d3c42a008c9 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -2,7 +2,7 @@ * @file select.hpp * @author Marcus Edel * - * Definition and implementation of the Select module. + * Definition of the Select module. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -38,12 +38,7 @@ class Select * @param index The column which should be extracted from the given input. * @param elements The number of elements that should be used. */ - Select(const size_t index, const size_t elements = 0) : - index(index), - elements(elements) - { - /* Nothing to do here. */ - } + Select(const size_t index, const size_t elements = 0); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -53,17 +48,7 @@ class Select * @param output Resulting output activation. */ template - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (elements == 0) - { - output = input.col(index); - } - else - { - output = input.submat(0, index, elements - 1, index); - } - } + void Forward(const arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -77,17 +62,7 @@ class Select template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - if (elements == 0) - { - g = gy; - } - else - { - g = gy.submat(0, 0, elements - 1, 0); - } - } + arma::Mat&& g); //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -104,6 +79,12 @@ class Select //! Modify the delta. OutputDataType& Delta() { return delta; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& ar, const unsigned int /* version */); + private: //! Locally-stored column index. size_t index; @@ -121,7 +102,10 @@ class Select OutputDataType outputParameter; }; // class Select -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "select_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/select_impl.hpp b/src/mlpack/methods/ann/layer/select_impl.hpp new file mode 100644 index 00000000000..a40cb968461 --- /dev/null +++ b/src/mlpack/methods/ann/layer/select_impl.hpp @@ -0,0 +1,75 @@ +/** + * @file select_impl.hpp + * @author Marcus Edel + * + * Implementation of the Select module. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SELECT_IMPL_HPP + +// In case it hasn't yet been included. +#include "constant.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Select::Select( + const size_t index, + const size_t elements) : + index(index), + elements(elements) + { + // Nothing to do here. + } + +template +template +void Select::Forward( + const arma::Mat&& input, arma::Mat&& output) +{ + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } +} + +template +template +void Select::Backward( + const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g) +{ + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } +} + +template +template +void Select::Serialize( + Archive& ar, const unsigned int /* version */) +{ + ar & data::CreateNVP(index, "index"); + ar & data::CreateNVP(elements, "elements"); +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp index 277b3342dc2..ca729c9da13 100644 --- a/src/mlpack/methods/ann/layer/sequential.hpp +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -47,22 +47,10 @@ class Sequential * * @param model Expose the all network modules. */ - Sequential(const bool model = true) : model(model), reset(false) - { - /* Nothing to do here. */ - } + Sequential(const bool model = true); //! Destroy the Sequential object. - ~Sequential() - { - if (!model) - { - for (LayerTypes& layer : network) - { - boost::apply_visitor(deleteVisitor, layer); - } - } - } + ~Sequential(); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -72,64 +60,7 @@ class Sequential * @param output Resulting output activation. */ template - void Forward(arma::Mat&& input, arma::Mat&& output) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network.front()))), - network.front()); - - if (!reset) - { - if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network.front()); - } - - if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network.front()); - } - } - - for (size_t i = 1; i < network.size(); ++i) - { - if (!reset) - { - // Set the input width. - boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); - - // Set the input height. - boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); - } - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (!reset) - { - // Get the output width. - if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network[i]); - } - - // Get the output height. - if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network[i]); - } - } - } - - if (!reset) - { - reset = true; - } - - output = boost::apply_visitor(outputParameterVisitor, network.back()); - } + void Forward(arma::Mat&& input, arma::Mat&& output); /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -143,24 +74,7 @@ class Sequential template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, - arma::Mat&& g) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network.back())), std::move(gy), - std::move(boost::apply_visitor(deltaVisitor, network.back()))), - network.back()); - - for (size_t i = 2; i < network.size() + 1; ++i) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[network.size() - i])), std::move( - boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), - std::move(boost::apply_visitor(deltaVisitor, - network[network.size() - i]))), network[network.size() - i]); - } - - g = boost::apply_visitor(deltaVisitor, network.front()); - } + arma::Mat&& g); /* * Calculate the gradient using the output delta and the input activation. @@ -172,18 +86,7 @@ class Sequential template void Gradient(arma::Mat&& input, arma::Mat&& error, - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - network.front()); - - for (size_t i = 1; i < network.size() - 1; ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); - } - } + arma::Mat&& /* gradient */); /* * Add a new module to the model. @@ -235,6 +138,12 @@ class Sequential //! Modify the gradient. arma::mat& Gradient() { return gradient; } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -285,8 +194,10 @@ class Sequential size_t height; }; // class Sequential - } // namespace ann } // namespace mlpack +// Include implementation. +#include "sequential_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/sequential_impl.hpp b/src/mlpack/methods/ann/layer/sequential_impl.hpp new file mode 100644 index 00000000000..df20a7a7668 --- /dev/null +++ b/src/mlpack/methods/ann/layer/sequential_impl.hpp @@ -0,0 +1,154 @@ +/** + * @file sequential_impl.hpp + * @author Marcus Edel + * + * Implementation of the Sequential class, which acts as a feed-forward fully + * connected network container. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_SEQUENTIAL_IMPL_HPP + +// In case it hasn't yet been included. +#include "sequential.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +Sequential::Sequential( + const bool model) : model(model), reset(false) +{ + // Nothing to do here. +} + +template +Sequential::~Sequential() +{ + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } +} + +template +template +void Sequential::Forward( + arma::Mat&& input, arma::Mat&& output) +{ + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + +if (!reset) +{ + reset = true; +} + + output = boost::apply_visitor(outputParameterVisitor, network.back()); +} + +template +template +void Sequential::Backward( + const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g) +{ + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); +} + +template +template +void Sequential::Gradient( + arma::Mat&& input, + arma::Mat&& error, + arma::Mat&& /* gradient */) +{ + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } +} + +template +template +void Sequential::Serialize( + Archive& /* ar */, const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp index d2802dacabc..f820e351aa8 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -40,12 +40,7 @@ class VRClassReward * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ - VRClassReward(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } + VRClassReward(const double scale = 1, const bool sizeAverage = true); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -56,35 +51,7 @@ class VRClassReward * between 1 and the number of classes. */ template - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols - 1; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols - 1; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return output - reward / (input.n_cols - 1); - } - - return output - reward; - } + double Forward(const arma::Mat&& input, const arma::Mat&& target); /** * Ordinary feed backward pass of a neural network. The negative log @@ -100,29 +67,7 @@ class VRClassReward template void Backward(const arma::Mat&& input, const arma::Mat&& target, - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < (input.n_cols - 1); ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - - double vrReward = reward - input(0, 1); - if (sizeAverage) - { - vrReward /= input.n_cols - 1; - } - - const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; - - output(0, 1) = norm * (input(0, 1) - reward); - boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); - } + arma::Mat&& output); //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -159,6 +104,12 @@ class VRClassReward */ void Add(LayerTypes layer) { network.push_back(layer); } + /** + * Serialize the layer + */ + template + void Serialize(Archive& /* ar */, const unsigned int /* version */); + private: //! Locally-stored value to scale the reward. const double scale; @@ -185,7 +136,10 @@ class VRClassReward std::vector network; }; // class VRClassReward -}; // namespace ann -}; // namespace mlpack +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "vr_class_reward_impl.hpp" #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp new file mode 100644 index 00000000000..a4a2703545b --- /dev/null +++ b/src/mlpack/methods/ann/layer/vr_class_reward_impl.hpp @@ -0,0 +1,101 @@ +/** + * @file vr_class_reward_impl.hpp + * @author Marcus Edel + * + * Implementation of the VRClassReward class, which implements the variance + * reduced classification reinforcement layer. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_IMPL_HPP + +// In case it hasn't yet been included. +#include "vr_class_reward.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +VRClassReward::VRClassReward( + const double scale, + const bool sizeAverage) : + scale(scale), + sizeAverage(sizeAverage) +{ + // Nothing to do here. +} + +template +template +double VRClassReward::Forward( + const arma::Mat&& input, const arma::Mat&& target) +{ + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; +} + +template +template +void VRClassReward::Backward( + const arma::Mat&& input, + const arma::Mat&& target, + arma::Mat&& output) +{ + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); +} + +template +template +void VRClassReward::Serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif From 36b47f46a876e72bb714ef60e4f1b36cff9b9a60 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 9 Nov 2016 01:24:04 +0100 Subject: [PATCH 61/82] Increase the number of template arguments for the boost list class. --- src/mlpack/prereqs.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index 69d396ba443..178341ead5a 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -66,7 +66,7 @@ using enable_if_t = typename enable_if::type; #undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS #undef BOOST_MPL_LIMIT_LIST_SIZE #define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS -#define BOOST_MPL_LIMIT_LIST_SIZE 30 +#define BOOST_MPL_LIMIT_LIST_SIZE 40 // We'll need the necessary boost::serialization features, as well as what we // use with mlpack. In Boost 1.59 and newer, the BOOST_PFTO code is no longer From e45c115665b95d10ca0e05ba193bb5a7c7493fe7 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 8 Nov 2016 22:46:11 +0100 Subject: [PATCH 62/82] Remove unused ann layer. --- src/mlpack/methods/ann/layer/hard_tanh.hpp | 151 ++++++++++++++++++--- 1 file changed, 135 insertions(+), 16 deletions(-) diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index 88c8ad2d853..c707017fcbf 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -1,16 +1,16 @@ /** - * @file hard_tanh.hpp + * @file hard_tanh_layer.hpp * @author Dhawal Arora * - * Definition and implementation of the HardTanH layer. + * Definition and implementation of the HardTanHLayer layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP -#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP #include @@ -46,18 +46,22 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class HardTanH +class HardTanHLayer { public: /** - * Create the HardTanH object using the specified parameters. The range + * Create the HardTanHLayer object using the specified parameters. The range * of the linear region can be adjusted by specifying the maxValue and * minValue. Default (maxValue = 1, minValue = -1). * * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanH(const double maxValue = 1, const double minValue = -1); + HardTanHLayer(const double maxValue = 1, const double minValue = -1) : + maxValue(maxValue), minValue(minValue) + { + // Nothing to do here. + } /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -67,7 +71,10 @@ class HardTanH * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output); + void Forward(const InputType& input, OutputType& output) + { + Fn(input, output); + } /** * Ordinary feed backward pass of a neural network, calculating the function @@ -79,9 +86,50 @@ class HardTanH * @param g The calculated gradient. */ template - void Backward(const DataType&& input, - DataType&& gy, - DataType&& g); + void Backward(const DataType& input, + const DataType& gy, + DataType& g) + { + DataType derivative; + Deriv(input, derivative); + g = gy % derivative; + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const arma::Cube& input, + const arma::Mat& gy, + arma::Cube& g) + { + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(input.n_rows, + input.n_cols, input.n_slices); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + arma::Col temp = gy.col(i).subvec( + j * input.n_rows * input.n_cols, + (j + 1) * input.n_rows * input.n_cols - 1); + + mappedError.slice(s + i) = arma::Mat(temp.memptr(), + input.n_rows, input.n_cols); + } + } + + arma::Cube derivative; + Deriv(input, derivative); + g = mappedError % derivative; + } //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -112,9 +160,83 @@ class HardTanH * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */); + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(maxValue, "maxValue"); + ar & data::CreateNVP(minValue, "minValue"); + } private: + /** + * Computes the HardTanH function. + * + * @param x Input data. + * @return f(x). + */ + double Fn(const double x) + { + if (x > maxValue) + return maxValue; + else if (x < minValue) + return minValue; + return x; + } + + /** + * Computes the HardTanH function using a dense matrix as input. + * + * @param x Input data. + * @param y The resulting output activation. + */ + + template + void Fn(const arma::Mat& x, arma::Mat& y) + { + y = x; + y.transform( [&](eT val) { return std::min( + std::max( val, minValue ), maxValue ); } ); + } + + /** + * Computes the HardTanH function using a 3rd-order tensor as input. + * + * @param x Input data. + * @param y The resulting output activation. + */ + template + void Fn(const arma::Cube& x, arma::Cube& y) + { + y = x; + for (size_t s = 0; s < x.n_slices; s++) + Fn(x.slice(s), y.slice(s)); + } + + /** + * Computes the first derivative of the HardTanH function. + * + * @param x Input data. + * @return f'(x) + */ + double Deriv(const double x) + { + return (x > maxValue || x < minValue) ? 0 : 1; + } + + /** + * Computes the first derivative of the HardTanH function. + * + * @param y Input activations. + * @param x The resulting derivatives. + */ + template + void Deriv(const InputType& x, OutputType& y) + { + y = x; + + for (size_t i = 0; i < x.n_elem; i++) + y(i) = Deriv(x(i)); + } + //! Locally-stored delta object. OutputDataType delta; @@ -129,12 +251,9 @@ class HardTanH //! Minimum value for the HardTanH function. double minValue; -}; // class HardTanH +}; // class HardTanHLayer } // namespace ann } // namespace mlpack -// Include implementation. -#include "hard_tanh_impl.hpp" - #endif From 96fbde2f7e63a853626a1cfbfd1ac4bbcf52d92d Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 26 Nov 2016 20:48:26 +0100 Subject: [PATCH 63/82] Use the stride parameter inside the convolution function. --- .../ann/convolution_rules/fft_convolution.hpp | 20 ++++++++++++++----- .../convolution_rules/naive_convolution.hpp | 4 ++++ .../ann/convolution_rules/svd_convolution.hpp | 16 +++++++++++---- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index 225626e34b8..af1b6f5c9e9 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -55,7 +55,9 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -92,7 +94,9 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -140,7 +144,9 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -174,7 +180,9 @@ class FFTConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -205,7 +213,9 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index c90574293ba..d65ca499d61 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -104,7 +104,11 @@ class NaiveConvolution filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, +<<<<<<< HEAD output, 1, 1); +======= + output, dW, dH); +>>>>>>> Use the stride parameter inside the convolution function. } /* diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index 5206ec1996f..e61b735bf8d 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -55,7 +55,9 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output) + arma::Mat& output, + const size_t dW = 1, + const size_t dH = 1) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -121,7 +123,9 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -152,7 +156,9 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -182,7 +188,9 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output) + arma::Cube& output, + const size_t dW = 1, + const size_t dH = 1) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From d5a5b3a14aac73efacdedf4b56f6e4e5e9d2e0b6 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 3 Dec 2016 22:56:33 +0100 Subject: [PATCH 64/82] Increase the number of template arguments for the boost list class. --- src/mlpack/methods/ann/layer/leaky_relu.hpp | 177 -------------------- 1 file changed, 177 deletions(-) delete mode 100644 src/mlpack/methods/ann/layer/leaky_relu.hpp diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp deleted file mode 100644 index d8160f1a50b..00000000000 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/** - * @file leaky_relu.hpp - * @author Dhawal Arora - * - * Definition of LeakyReLU layer first introduced in the acoustic model, - * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, - * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP -#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP - -#include - -namespace mlpack { -namespace ann /** Artificial Neural Network. */ { - -/** - * The LeakyReLU activation function, defined by - * - * @f{eqnarray*}{ - * f(x) &=& \max(x, alpha*x) \\ - * f'(x) &=& \left\{ - * \begin{array}{lr} - * 1 & : x > 0 \\ - * alpha & : x \le 0 - * \end{array} - * \right. - * @f} - * - * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, - * arma::sp_mat or arma::cube). - */ -template < - typename InputDataType = arma::mat, - typename OutputDataType = arma::mat -> -class LeakyReLU -{ - public: - /** - * Create the LeakyReLU object using the specified parameters. - * The non zero gradient can be adjusted by specifying tha parameter - * alpha in the range 0 to 1. Default (alpha = 0.03) - * - * @param alpha Non zero gradient - */ - LeakyReLU(const double alpha = 0.03); - - /** - * Ordinary feed forward pass of a neural network, evaluating the function - * f(x) by propagating the activity forward through f. - * - * @param input Input data used for evaluating the specified function. - * @param output Resulting output activation. - */ - template - void Forward(const InputType&& input, OutputType&& output); - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const DataType&& input, DataType&& gy, DataType&& g); - - //! Get the input parameter. - InputDataType const& InputParameter() const { return inputParameter; } - //! Modify the input parameter. - InputDataType& InputParameter() { return inputParameter; } - - //! Get the output parameter. - OutputDataType const& OutputParameter() const { return outputParameter; } - //! Modify the output parameter. - OutputDataType& OutputParameter() { return outputParameter; } - - //! Get the delta. - OutputDataType const& Delta() const { return delta; } - //! Modify the delta. - OutputDataType& Delta() { return delta; } - - //! Get the non zero gradient. - double const& Alpha() const { return alpha; } - //! Modify the non zero gradient. - double& Alpha() { return alpha; } - - /** - * Serialize the layer. - */ - template - void Serialize(Archive& ar, const unsigned int /* version */); - - private: - /** - * Computes the LeakReLU function - * - * @param x Input data. - * @return f(x). - */ - double Fn(const double x) - { - return std::max(x, alpha * x); - } - - /** - * Computes the Leaky ReLU function using a dense matrix as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Mat& x, arma::Mat& y) - { - y = arma::max(x, alpha * x); - } - - /** - * Computes the first derivative of the LeakyReLU function. - * - * @param x Input data. - * @return f'(x) - */ - double Deriv(const double x) - { - return (x >= 0) ? 1 : alpha; - } - - /** - * Computes the first derivative of the LeakyReLU function. - * - * @param y Input activations. - * @param x The resulting derivatives. - */ - - template - void Deriv(const InputType& x, OutputType& y) - { - y = x; - - for (size_t i = 0; i < x.n_elem; i++) - { - y(i) = Deriv(x(i)); - } - } - - //! Locally-stored delta object. - OutputDataType delta; - - //! Locally-stored input parameter object. - InputDataType inputParameter; - - //! Locally-stored output parameter object. - OutputDataType outputParameter; - - //! Leakyness Parameter in the range 0 Date: Sun, 4 Dec 2016 00:03:20 +0100 Subject: [PATCH 65/82] Remove stride paramater from svd and fft convolution rule. --- .../ann/convolution_rules/fft_convolution.hpp | 20 +++++-------------- .../convolution_rules/naive_convolution.hpp | 4 ++++ .../ann/convolution_rules/svd_convolution.hpp | 16 ++++----------- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index af1b6f5c9e9..225626e34b8 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -55,9 +55,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { arma::Mat inputPadded = input; arma::Mat filterPadded = filter; @@ -94,9 +92,7 @@ class FFTConvolution std::is_same::value, void>::type Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // In case of the full convolution outputRows and outputCols doesn't // represent the true output size when the padLastDim parameter is set, @@ -144,9 +140,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter.slice(0), @@ -180,9 +174,7 @@ class FFTConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input, filter.slice(0), @@ -213,9 +205,7 @@ class FFTConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; FFTConvolution::Convolution(input.slice(0), filter, diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index d65ca499d61..c1f49ab1494 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -104,11 +104,15 @@ class NaiveConvolution filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, +<<<<<<< HEAD <<<<<<< HEAD output, 1, 1); ======= output, dW, dH); >>>>>>> Use the stride parameter inside the convolution function. +======= + output, 1, 1); +>>>>>>> Remove stride paramater from svd and fft convolution rule. } /* diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp index e61b735bf8d..5206ec1996f 100644 --- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp @@ -55,9 +55,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Mat& filter, - arma::Mat& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Mat& output) { // Use the naive convolution in case the filter isn't two dimensional or the // filter is bigger than the input. @@ -123,9 +121,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter.slice(0), @@ -156,9 +152,7 @@ class SVDConvolution template static void Convolution(const arma::Mat& input, const arma::Cube& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input, filter.slice(0), convOutput); @@ -188,9 +182,7 @@ class SVDConvolution template static void Convolution(const arma::Cube& input, const arma::Mat& filter, - arma::Cube& output, - const size_t dW = 1, - const size_t dH = 1) + arma::Cube& output) { arma::Mat convOutput; SVDConvolution::Convolution(input.slice(0), filter, convOutput); From f0258865058f8322d4f937f501bf8c2f92d1655f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 4 Dec 2016 00:27:30 +0100 Subject: [PATCH 66/82] Refactor ann layer. --- src/mlpack/methods/ann/layer/add.hpp | 38 +++ src/mlpack/methods/ann/layer/add_merge.hpp | 36 ++ src/mlpack/methods/ann/layer/concat.hpp | 133 ++++++++ .../methods/ann/layer/concat_performance.hpp | 59 ++++ src/mlpack/methods/ann/layer/constant.hpp | 41 +++ src/mlpack/methods/ann/layer/convolution.hpp | 217 ++++++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 91 ++++++ src/mlpack/methods/ann/layer/dropout.hpp | 56 ++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 86 ++--- src/mlpack/methods/ann/layer/join.hpp | 34 ++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 188 +++++++++++ src/mlpack/methods/ann/layer/linear.hpp | 59 ++++ .../methods/ann/layer/linear_no_bias.hpp | 54 +++ src/mlpack/methods/ann/layer/log_softmax.hpp | 54 +++ src/mlpack/methods/ann/layer/lookup.hpp | 43 +++ src/mlpack/methods/ann/layer/lstm.hpp | 309 ++++++++++++++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 129 ++++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 100 ++++++ .../methods/ann/layer/mean_squared_error.hpp | 31 ++ .../methods/ann/layer/multiply_constant.hpp | 33 ++ .../ann/layer/negative_log_likelihood.hpp | 47 +++ src/mlpack/methods/ann/layer/recurrent.hpp | 169 ++++++++++ src/mlpack/methods/ann/layer/select.hpp | 51 +++ src/mlpack/methods/ann/layer/sequential.hpp | 126 +++++++ 24 files changed, 2121 insertions(+), 63 deletions(-) create mode 100644 src/mlpack/methods/ann/layer/leaky_relu.hpp diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp index 1afb1121300..af975fb9afe 100644 --- a/src/mlpack/methods/ann/layer/add.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -39,7 +39,14 @@ class Add * * @param outSize The number of output units. */ +<<<<<<< HEAD Add(const size_t outSize); +======= + Add(const size_t outSize) : outSize(outSize) + { + weights.set_size(outSize, 1); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -49,7 +56,14 @@ class Add * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = input + weights; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -63,7 +77,14 @@ class Add template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = gy; + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -75,7 +96,14 @@ class Add template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& gradient); +======= + arma::Mat&& gradient) + { + gradient = error; + } +>>>>>>> Refactor ann layer. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -106,7 +134,14 @@ class Add * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored number of output units. @@ -131,7 +166,10 @@ class Add } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "add_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp index 222c3ef1a2f..401a4c4b3bd 100644 --- a/src/mlpack/methods/ann/layer/add_merge.hpp +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -38,7 +38,14 @@ class AddMerge { public: //! Create the AddMerge object. +<<<<<<< HEAD AddMerge(); +======= + AddMerge() + { + // Nothing to do here. + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -48,7 +55,19 @@ class AddMerge * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const InputType&& /* input */, OutputType&& output); +======= + void Forward(const InputType&& /* input */, OutputType&& output) + { + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + for (size_t i = 1; i < network.size(); ++i) + { + output += boost::apply_visitor(outputParameterVisitor, network[i]); + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -62,7 +81,14 @@ class AddMerge template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = gy; + } +>>>>>>> Refactor ann layer. /* * Add a new module to the model. @@ -106,7 +132,14 @@ class AddMerge * Serialize the layer. */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(network, "network"); + } +>>>>>>> Refactor ann layer. private: std::vector network; @@ -133,7 +166,10 @@ class AddMerge } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "add_merge_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp index 808309f7122..b124916cb56 100644 --- a/src/mlpack/methods/ann/layer/concat.hpp +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -45,7 +45,16 @@ class Concat * @param model Expose all network modules. * @param same Merge the error in the backward pass. */ +<<<<<<< HEAD Concat(const bool model = true, const bool same = true); +======= + Concat(const bool model = true, const bool same = true) : + model(model), + same(same) + { + parameters.set_size(0, 0); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -55,7 +64,46 @@ class Concat * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + size_t outSize = 0; + + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (boost::apply_visitor( + outputParameterVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + } + } + + output = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + if (elements < outSize) + { + output.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(outputParameterVisitor, network[i])); + } + else + { + output.col(i) = arma::vectorise(boost::apply_visitor( + outputParameterVisitor, network[i])); + } + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -69,7 +117,71 @@ class Concat template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + size_t outSize = 0; + size_t elements = 0; + + for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) + { + elements = boost::apply_visitor(outputParameterVisitor, + network[i]).n_elem; + + arma::mat delta; + if (gy.n_cols == 1) + { + delta = gy.submat(j, 0, j + elements - 1, 0); + } + else + { + delta = gy.submat(0, i, elements - 1, i); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(delta), std::move( + boost::apply_visitor(deltaVisitor, network[i]))), network[i]); + + if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) + { + outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + } + + if (same) + { + if (i == 0) + { + g = std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g += std::move(boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + + if (!same) + { + g = arma::zeros(outSize, network.size()); + for (size_t i = 0; i < network.size(); ++i) + { + size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; + if (elements < outSize) + { + g.submat(0, i, elements - 1, i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + else + { + g.col(i) = arma::vectorise( + boost::apply_visitor(deltaVisitor, network[i])); + } + } + } + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -81,7 +193,18 @@ class Concat template void Gradient(arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + for (size_t i = 0; i < network.size(); ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i])), std::move(error)), network[i]); + } + } +>>>>>>> Refactor ann layer. /* * Add a new module to the model. @@ -133,12 +256,15 @@ class Concat //! Modify the gradient. arma::mat& Gradient() { return gradient; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -177,10 +303,17 @@ class Concat arma::mat gradient; }; // class Concat +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "concat_impl.hpp" +======= + +} // namespace ann +} // namespace mlpack + +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp index 39b7e10f126..d26fada8716 100644 --- a/src/mlpack/methods/ann/layer/concat_performance.hpp +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -47,7 +47,16 @@ class ConcatPerformance * @param outputLayer Output layer used to evaluate the network. */ ConcatPerformance(const size_t inSize, +<<<<<<< HEAD OutputLayerType&& outputLayer = OutputLayerType()); +======= + OutputLayerType&& outputLayer = OutputLayerType()) : + inSize(inSize), + outputLayer(std::move(outputLayer)) + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /* * Computes the Negative log likelihood. @@ -56,7 +65,24 @@ class ConcatPerformance * @param output Resulting output activation. */ template +<<<<<<< HEAD double Forward(const arma::Mat&& input, arma::Mat&& target); +======= + double Forward(const arma::Mat&& input, arma::Mat&& target) + { + const size_t elements = input.n_elem / inSize; + + double output = 0; + for (size_t i = 0; i < input.n_elem; i+= elements) + { + arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); + output += outputLayer.Forward(std::move(subInput), std::move(target)); + } + + return output; + } + +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network. The negative log * likelihood layer expectes that the input contains log-probabilities for @@ -71,7 +97,32 @@ class ConcatPerformance template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD arma::Mat&& output); +======= + arma::Mat&& output) + { + const size_t elements = input.n_elem / inSize; + + arma::mat subInput = input.submat(0, 0, elements - 1, 0); + arma::mat subOutput; + + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output = arma::zeros(subOutput.n_elem, inSize); + output.col(0) = subOutput; + + for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) + { + subInput = input.submat(i, 0, i + elements - 1, 0); + outputLayer.Backward(std::move(subInput), std::move(target), + std::move(subOutput)); + + output.col(j) = subOutput; + } + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -88,12 +139,15 @@ class ConcatPerformance //! Modify the delta. OutputDataType& Delta() { return delta; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Locally-stored number of inputs. size_t inSize; @@ -111,10 +165,15 @@ class ConcatPerformance OutputDataType outputParameter; }; // class ConcatPerformance +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "concat_performance_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp index b24b44aa802..6a561268c7d 100644 --- a/src/mlpack/methods/ann/layer/constant.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -41,7 +41,17 @@ class Constant * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ +<<<<<<< HEAD Constant(const size_t outSize, const double scalar); +======= + Constant(const size_t outSize, const double scalar) : + inSize(0), + outSize(outSize) + { + constantOutput = OutputDataType(outSize, 1); + constantOutput.fill(scalar); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -51,7 +61,19 @@ class Constant * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); +======= + void Forward(const InputType&& input, OutputType&& output) + { + if (inSize == 0) + { + inSize = input.n_elem; + } + + output = constantOutput; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -62,9 +84,16 @@ class Constant * @param g The calculated gradient. */ template +<<<<<<< HEAD void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g); +======= + void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) + { + g = arma::zeros(inSize, 1); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -85,7 +114,14 @@ class Constant * Serialize the layer. */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(constantOutput, "constantOutput"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored number of input units. @@ -107,10 +143,15 @@ class Constant OutputDataType outputParameter; }; // class ConstantLayer +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "constant_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index a7f647ed226..ccb1d0752f6 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -47,7 +47,14 @@ class Convolution { public: //! Create the Convolution object. +<<<<<<< HEAD Convolution(); +======= + Convolution() + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /** * Create the Convolution object using the specified number of input maps, @@ -73,12 +80,41 @@ class Convolution const size_t padW = 0, const size_t padH = 0, const size_t inputWidth = 0, +<<<<<<< HEAD const size_t inputHeight = 0); +======= + const size_t inputHeight = 0) : + inSize(inSize), + outSize(outSize), + kW(kW), + kH(kH), + dW(dW), + dH(dH), + padW(padW), + padH(padH), + inputWidth(inputWidth), + inputHeight(inputHeight), + outputWidth(0), + outputHeight(0) + { + weights.set_size((outSize * inSize * kW * kH) + outSize, 1); + } +>>>>>>> Refactor ann layer. /* * Set the weight and bias term. */ +<<<<<<< HEAD void Reset(); +======= + void Reset() + { + weight = arma::cube(weights.memptr(), kW, kH, + outSize * inSize, false,false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -88,7 +124,52 @@ class Convolution * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); + + if (padW != 0 || padH != 0) + { + Pad(inputTemp, padW, padH, inputPaddedTemp); + } + + size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); + size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); + + outputTemp = arma::zeros >(wConv, hConv, outSize); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat convOutput; + + if (padW != 0 || padH != 0) + { + ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + else + { + ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), + weight.slice(outMapIdx), convOutput, dW, dH); + } + + outputTemp.slice(outMap) += convOutput; + } + + outputTemp.slice(outMap) += bias(outMap); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -102,7 +183,46 @@ class Convolution template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), + outputWidth, outputHeight, outSize); + gTemp = arma::zeros >(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) + { + arma::Mat rotatedFilter; + Rotate180(weight.slice(outMapIdx), rotatedFilter); + + arma::Mat output; + BackwardConvolutionRule::Convolution(mappedError.slice(outMap), + rotatedFilter, output, dW, dH); + + if (padW != 0 || padH != 0) + { + gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, + rotatedFilter.n_cols / 2, + rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, + rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); + } + else + { + gTemp.slice(inMap) += output; + } + + + } + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -114,7 +234,78 @@ class Convolution template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& gradient); +======= + arma::Mat&& gradient) + { + arma::cube mappedError; + if (padW != 0 && padH != 0) + { + mappedError = arma::cube(error.memptr(), outputWidth / padW, + outputHeight / padH, outSize); + } + else + { + mappedError = arma::cube(error.memptr(), outputWidth, + outputHeight, outSize); + } + + gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, + weight.n_slices); + + for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) + { + for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, + s += outSize) + { + arma::Cube inputSlices; + if (padW != 0 || padH != 0) + { + inputSlices = inputPaddedTemp.slices(inMap, inMap); + } + else + { + inputSlices = inputTemp.slices(inMap, inMap); + } + + arma::Cube deltaSlices = mappedError.slices(outMap, outMap); + + arma::Cube output; + GradientConvolutionRule::Convolution(inputSlices, deltaSlices, + output, dW, dH); + + if ((padW != 0 || padH != 0) && + (gradientTemp.n_rows < output.n_rows && + gradientTemp.n_cols < output.n_cols)) + { + for (size_t i = 0; i < output.n_slices; i++) + { + arma::mat subOutput = output.slice(i); + + gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, + subOutput.n_cols / 2, + subOutput.n_rows / 2 + gradientTemp.n_rows - 1, + subOutput.n_cols / 2 + gradientTemp.n_cols - 1); + } + } + else + { + for (size_t i = 0; i < output.n_slices; i++) + { + gradientTemp.slice(s) += output.slice(i); + } + } + } + + gradient.submat(weight.n_elem + outMap, 0, + weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( + outMap, outMap)); + } + + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + } +>>>>>>> Refactor ann layer. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -165,7 +356,26 @@ class Convolution * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + ar & data::CreateNVP(padW, "padW"); + ar & data::CreateNVP(padH, "padH"); + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inputWidth, "inputWidth"); + ar & data::CreateNVP(inputHeight, "inputHeight"); + ar & data::CreateNVP(outputWidth, "outputWidth"); + ar & data::CreateNVP(outputHeight, "outputHeight"); + } +>>>>>>> Refactor ann layer. private: @@ -335,10 +545,17 @@ class Convolution OutputDataType outputParameter; }; // class Convolution +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "convolution_impl.hpp" +======= + +} // namespace ann +} // namespace mlpack + +>>>>>>> Refactor ann layer. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp index f74c8e6aaac..3fdd09a9115 100644 --- a/src/mlpack/methods/ann/layer/dropconnect.hpp +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -63,7 +63,14 @@ class DropConnect { public: //! Create the DropConnect object. +<<<<<<< HEAD DropConnect(); +======= + DropConnect() + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /** * Creates the DropConnect Layer as a Linear Object that takes input size, @@ -75,9 +82,24 @@ class DropConnect */ DropConnect(const size_t inSize, const size_t outSize, +<<<<<<< HEAD const double ratio = 0.5); ~DropConnect(); +======= + const double ratio = 0.5) : + ratio(ratio), + scale(1.0 / (1 - ratio)), + baseLayer(new Linear(inSize, outSize)) + { + network.push_back(baseLayer); + } + + ~DropConnect() + { + boost::apply_visitor(DeleteVisitor(), baseLayer); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of the DropConnect layer. @@ -86,7 +108,46 @@ class DropConnect * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // The DropConnect mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + } + else + { + // Save weights for denoising. + boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); + + // Scale with input / (1 - ratio) and set values to zero with + // probability ratio. + mask = arma::randu >(denoise.n_rows, denoise.n_cols); + mask.transform([&](double val) { return (val > ratio); }); + + boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), + baseLayer); + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(output) + ), + baseLayer); + + output = output * scale; + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of the DropConnect layer. @@ -98,7 +159,20 @@ class DropConnect template void Backward(arma::Mat&& input, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + boost::apply_visitor( + BackwardVisitor( + std::move(input), + std::move(gy), + std::move(g) + ), + baseLayer); + } +>>>>>>> Refactor ann layer. /** * Calculate the gradient using the output delta and the input activation. @@ -110,7 +184,18 @@ class DropConnect template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + baseLayer); + + // Denoise the weights. + boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); + } +>>>>>>> Refactor ann layer. //! Get the model modules. std::vector& Model() { return network; } @@ -156,12 +241,15 @@ class DropConnect scale = 1.0 / (1.0 - ratio); } +<<<<<<< HEAD /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! The probability of setting a value to zero. double ratio; @@ -203,7 +291,10 @@ class DropConnect } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "dropconnect_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index 4371f21dad3..57db378d85e 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -61,7 +61,18 @@ class Dropout * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ +<<<<<<< HEAD Dropout(const double ratio = 0.5, const bool rescale = true); +======= + Dropout(const double ratio = 0.5, + const bool rescale = true) : + ratio(ratio), + scale(1.0 / (1.0 - ratio)), + rescale(rescale) + { + // Nothing to do here. + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of the dropout layer. @@ -70,7 +81,34 @@ class Dropout * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + // The dropout mask will not be multiplied in the deterministic mode + // (during testing). + if (deterministic) + { + if (!rescale) + { + output = input; + } + else + { + output = input * scale; + } + } + else + { + // Scale with input / (1 - ratio) and set values to zero with probability + // ratio. + mask = arma::randu >(input.n_rows, input.n_cols); + mask.transform( [&](double val) { return (val > ratio); } ); + output = input % mask * scale; + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of the dropout layer. @@ -82,7 +120,14 @@ class Dropout template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = gy % mask * scale; + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -123,7 +168,15 @@ class Dropout * Serialize the layer. */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(ratio, "ratio"); + ar & data::CreateNVP(rescale, "rescale"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored delta object. @@ -154,7 +207,10 @@ class Dropout } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "dropout_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index c707017fcbf..76b19f964af 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -1,16 +1,16 @@ /** - * @file hard_tanh_layer.hpp + * @file hard_tanh.hpp * @author Dhawal Arora * - * Definition and implementation of the HardTanHLayer layer. + * Definition and implementation of the HardTanH layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP -#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP +#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP +#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_HPP #include @@ -46,18 +46,18 @@ template < typename InputDataType = arma::mat, typename OutputDataType = arma::mat > -class HardTanHLayer +class HardTanH { public: /** - * Create the HardTanHLayer object using the specified parameters. The range + * Create the HardTanH object using the specified parameters. The range * of the linear region can be adjusted by specifying the maxValue and * minValue. Default (maxValue = 1, minValue = -1). * * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanHLayer(const double maxValue = 1, const double minValue = -1) : + HardTanH(const double maxValue = 1, const double minValue = -1) : maxValue(maxValue), minValue(minValue) { // Nothing to do here. @@ -71,9 +71,14 @@ class HardTanHLayer * @param output Resulting output activation. */ template - void Forward(const InputType& input, OutputType& output) + void Forward(const InputType&& input, OutputType&& output) { - Fn(input, output); + output = input; + for (size_t i = 0; i < input.n_elem; i++) + { + output(i) = (output(i) > maxValue ? maxValue : + (output(i) < minValue ? minValue : output(i))); + } } /** @@ -86,49 +91,18 @@ class HardTanHLayer * @param g The calculated gradient. */ template - void Backward(const DataType& input, - const DataType& gy, - DataType& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } - - /** - * Ordinary feed backward pass of a neural network, calculating the function - * f(x) by propagating x backwards through f. Using the results from the feed - * forward pass. - * - * @param input The propagated input activation. - * @param gy The backpropagated error. - * @param g The calculated gradient. - */ - template - void Backward(const arma::Cube& input, - const arma::Mat& gy, - arma::Cube& g) + void Backward(const DataType&& input, + DataType&& gy, + DataType&& g) { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(input.n_rows, - input.n_cols, input.n_slices); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + g = gy; + for (size_t i = 0; i < input.n_elem; i++) { - for (size_t i = 0; i < gy.n_cols; i++) + if (input(i) < minValue || input(i) > maxValue) { - arma::Col temp = gy.col(i).subvec( - j * input.n_rows * input.n_cols, - (j + 1) * input.n_rows * input.n_cols - 1); - - mappedError.slice(s + i) = arma::Mat(temp.memptr(), - input.n_rows, input.n_cols); + g(i) = 0; } } - - arma::Cube derivative; - Deriv(input, derivative); - g = mappedError % derivative; } //! Get the input parameter. @@ -197,20 +171,6 @@ class HardTanHLayer std::max( val, minValue ), maxValue ); } ); } - /** - * Computes the HardTanH function using a 3rd-order tensor as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - template - void Fn(const arma::Cube& x, arma::Cube& y) - { - y = x; - for (size_t s = 0; s < x.n_slices; s++) - Fn(x.slice(s), y.slice(s)); - } - /** * Computes the first derivative of the HardTanH function. * @@ -229,7 +189,7 @@ class HardTanHLayer * @param x The resulting derivatives. */ template - void Deriv(const InputType& x, OutputType& y) + void Deriv(const InputType&& x, OutputType& y) { y = x; @@ -251,7 +211,7 @@ class HardTanHLayer //! Minimum value for the HardTanH function. double minValue; -}; // class HardTanHLayer +}; // class HardTanH } // namespace ann } // namespace mlpack diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp index bda31d787ab..abd7ba9c978 100644 --- a/src/mlpack/methods/ann/layer/join.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -34,7 +34,14 @@ class Join { public: //! Create the Join object. +<<<<<<< HEAD Join(); +======= + Join() + { + // Nothing to do here. + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -44,7 +51,16 @@ class Join * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); +======= + void Forward(const InputType&& input, OutputType&& output) + { + inSizeRows = input.n_rows; + inSizeCols = input.n_cols; + output = arma::vectorise(input); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -58,7 +74,14 @@ class Join template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -79,7 +102,15 @@ class Join * Serialize the layer. */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(inSizeRows, "inSizeRows"); + ar & data::CreateNVP(inSizeCols, "inSizeCols"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored number of input rows. @@ -101,7 +132,10 @@ class Join } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "join_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp new file mode 100644 index 00000000000..8e69712b7f9 --- /dev/null +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -0,0 +1,188 @@ +/** + * @file leaky_relu.hpp + * @author Dhawal Arora + * + * Definition and implementation of LeakyReLU layer first introduced + * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP +#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_HPP + +#include + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +/** + * The LeakyReLU activation function, defined by + * + * @f{eqnarray*}{ + * f(x) &=& \max(x, alpha*x) \\ + * f'(x) &=& \left\{ + * \begin{array}{lr} + * 1 & : x > 0 \\ + * alpha & : x \le 0 + * \end{array} + * \right. + * @f} + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class LeakyReLU +{ + public: + /** + * Create the LeakyReLU object using the specified parameters. + * The non zero gradient can be adjusted by specifying tha parameter + * alpha in the range 0 to 1. Default (alpha = 0.03) + * + * @param alpha Non zero gradient + */ + LeakyReLU(const double alpha = 0.03) : alpha(alpha) + { + // Nothing to do here. + } + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output) + { + Fn(input, output); + } + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& input, DataType&& gy, DataType&& g) + { + DataType derivative; + Deriv(input, derivative); + g = gy % derivative; + } + + //! Get the input parameter. + InputDataType const& InputParameter() const { return inputParameter; } + //! Modify the input parameter. + InputDataType& InputParameter() { return inputParameter; } + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + //! Get the non zero gradient. + double const& Alpha() const { return alpha; } + //! Modify the non zero gradient. + double& Alpha() { return alpha; } + + /** + * Serialize the layer. + */ + template + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(alpha, "alpha"); + } + + private: + /** + * Computes the LeakReLU function + * + * @param x Input data. + * @return f(x). + */ + double Fn(const double x) + { + return std::max(x, alpha * x); + } + + /** + * Computes the Leaky ReLU function using a dense matrix as input. + * + * @param x Input data. + * @param y The resulting output activation. + */ + template + void Fn(const arma::Mat& x, arma::Mat& y) + { + y = arma::max(x, alpha * x); + } + + /** + * Computes the first derivative of the LeakyReLU function. + * + * @param x Input data. + * @return f'(x) + */ + double Deriv(const double x) + { + return (x >= 0) ? 1 : alpha; + } + + /** + * Computes the first derivative of the LeakyReLU function. + * + * @param y Input activations. + * @param x The resulting derivatives. + */ + + template + void Deriv(const InputType& x, OutputType& y) + { + y = x; + + for (size_t i = 0; i < x.n_elem; i++) + { + y(i) = Deriv(x(i)); + } + } + + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored input parameter object. + InputDataType inputParameter; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; + + //! Leakyness Parameter in the range 0 >>>>>> Refactor ann layer. /** * Create the Linear layer object using the specified number of units. @@ -45,12 +49,30 @@ class Linear * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD Linear(const size_t inSize, const size_t outSize);; +======= + Linear(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize + outSize, 1); + } +>>>>>>> Refactor ann layer. /* * Reset the layer parameter. */ +<<<<<<< HEAD void Reset(); +======= + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + bias = arma::mat(weights.memptr() + weight.n_elem, + outSize, 1, false, false); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,7 +82,14 @@ class Linear * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = (weight * input) + bias; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -72,9 +101,18 @@ class Linear * @param g The calculated gradient. */ template +<<<<<<< HEAD void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g); +======= + void Backward(const arma::Mat&& /* unused */, + arma::Mat&& gy, + arma::Mat&& g) + { + g = weight.t() * gy; + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -86,7 +124,16 @@ class Linear template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& gradient); +======= + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; + } +>>>>>>> Refactor ann layer. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -117,7 +164,16 @@ class Linear * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored number of input units. @@ -151,7 +207,10 @@ class Linear } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "linear_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp index 972f72db8ec..4e1db7966a7 100644 --- a/src/mlpack/methods/ann/layer/linear_no_bias.hpp +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -37,19 +37,39 @@ class LinearNoBias { public: //! Create the LinearNoBias object. +<<<<<<< HEAD LinearNoBias(); +======= + LinearNoBias() {} +>>>>>>> Refactor ann layer. /** * Create the LinearNoBias object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD LinearNoBias(const size_t inSize, const size_t outSize); +======= + LinearNoBias(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize * inSize, 1); + } +>>>>>>> Refactor ann layer. /* * Reset the layer parameter. */ +<<<<<<< HEAD void Reset(); +======= + void Reset() + { + weight = arma::mat(weights.memptr(), outSize, inSize, false, false); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -59,7 +79,14 @@ class LinearNoBias * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weight * input; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -73,7 +100,14 @@ class LinearNoBias template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = weight.t() * gy; + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -85,7 +119,15 @@ class LinearNoBias template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& gradient); +======= + arma::Mat&& gradient) + { + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( + error * input.t()); + } +>>>>>>> Refactor ann layer. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -116,7 +158,16 @@ class LinearNoBias * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } +>>>>>>> Refactor ann layer. private: @@ -148,7 +199,10 @@ class LinearNoBias } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "linear_no_bias_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp index df9872d6c8a..20c641fbb39 100644 --- a/src/mlpack/methods/ann/layer/log_softmax.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -39,7 +39,11 @@ class LogSoftMax /** * Create the LogSoftmax object. */ +<<<<<<< HEAD LogSoftMax(); +======= + LogSoftMax() { /* Nothing to do here. */ } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -49,7 +53,42 @@ class LogSoftMax * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); +======= + void Forward(const InputType&& input, OutputType&& output) + { + arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); + output = (maxInput - input); + + // Approximation of the hyperbolic tangent. The acuracy however is + // about 0.00001 lower as using tanh. Credits go to Leon Bottou. + output.transform( [](double x) + { + //! Fast approximation of exp(-x) for x positive. + static constexpr double A0 = 1.0; + static constexpr double A1 = 0.125; + static constexpr double A2 = 0.0078125; + static constexpr double A3 = 0.00032552083; + static constexpr double A4 = 1.0172526e-5; + + if (x < 13.0) + { + double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); + y *= y; + y *= y; + y *= y; + y = 1 / y; + + return y; + } + + return 0.0; + } ); + + output = input - (maxInput + std::log(arma::accu(output))); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -63,7 +102,14 @@ class LogSoftMax template void Backward(const arma::Mat&& input, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = gy - arma::exp(input) * arma::accu(gy); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -80,12 +126,15 @@ class LogSoftMax //! Modify the delta. InputDataType& Delta() { return delta; } +<<<<<<< HEAD /** * Serialize the layer. */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Locally-stored delta object. OutputDataType delta; @@ -97,10 +146,15 @@ class LogSoftMax OutputDataType outputParameter; }; // class LogSoftmax +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "log_softmax_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp index 080d4b5746e..8c7c628a704 100644 --- a/src/mlpack/methods/ann/layer/lookup.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -42,7 +42,16 @@ class Lookup * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD Lookup(const size_t inSize, const size_t outSize); +======= + Lookup(const size_t inSize, const size_t outSize) : + inSize(inSize), + outSize(outSize) + { + weights.set_size(outSize, inSize); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -52,7 +61,14 @@ class Lookup * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + output = weights.cols(arma::conv_to::from(input) - 1); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -66,7 +82,14 @@ class Lookup template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + g = gy; + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -78,7 +101,15 @@ class Lookup template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& gradient); +======= + arma::Mat&& gradient) + { + gradient = arma::zeros >(weights.n_rows, weights.n_cols); + gradient.cols(arma::conv_to::from(input) - 1) = error; + } +>>>>>>> Refactor ann layer. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -109,7 +140,16 @@ class Lookup * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + } +>>>>>>> Refactor ann layer. private: @@ -138,7 +178,10 @@ class Lookup } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "lookup_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp index cf70e344d94..bdecd1ac9f0 100644 --- a/src/mlpack/methods/ann/layer/lstm.hpp +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -44,7 +44,11 @@ class LSTM { public: //! Create the LSTM object. +<<<<<<< HEAD LSTM(); +======= + LSTM() { /* Nothing to do here */ } +>>>>>>> Refactor ann layer. /** * Create the LSTM layer object using the specified parameters. @@ -53,7 +57,46 @@ class LSTM * @param outSize The number of output units. * @param rho Maximum number of steps to backpropagate through time (BPTT). */ +<<<<<<< HEAD LSTM(const size_t inSize, const size_t outSize, const size_t rho); +======= + LSTM(const size_t inSize, const size_t outSize, const size_t rho) : + inSize(inSize), + outSize(outSize), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + { + input2GateModule = new Linear<>(inSize, 4 * outSize); + output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); + + network.push_back(input2GateModule); + network.push_back(output2GateModule); + + inputGateModule = new SigmoidLayer<>(); + hiddenStateModule = new TanHLayer<>(); + forgetGateModule = new SigmoidLayer<>(); + outputGateModule = new SigmoidLayer<>(); + + network.push_back(inputGateModule); + network.push_back(hiddenStateModule); + network.push_back(forgetGateModule); + network.push_back(outputGateModule); + + cellModule = new IdentityLayer<>(); + cellActivationModule = new TanHLayer<>(); + + network.push_back(cellModule); + network.push_back(cellActivationModule); + + prevOutput = arma::zeros(outSize, 1); + prevCell = arma::zeros(outSize, 1); + prevError = arma::zeros(4 * outSize, 1); + cellActivationError = arma::zeros(outSize, 1); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -63,7 +106,116 @@ class LSTM * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + cellParameter.push_back(prevCell); + outParameter.push_back(prevOutput); + } + + arma::mat output1; + arma::mat output2; + arma::mat output3; + + boost::apply_visitor( + ForwardVisitor( + std::move(input), + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(prevOutput), + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)) + ), + output2GateModule); + + output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + + boost::apply_visitor(outputParameterVisitor, output2GateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(0, 0, 1 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)) + ), + outputGateModule); + + arma::mat cell = prevCell; + + // Input gate * hidden state. + arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % boost::apply_visitor(outputParameterVisitor, + hiddenStateModule); + + // Forget gate * cell. + arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cell; + + arma::mat nextCell = cmul1 + cmul2; + + boost::apply_visitor( + ForwardVisitor( + std::move(nextCell), + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) + ), + cellModule); + + boost::apply_visitor( + ForwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)) + ), + cellActivationModule); + + output = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % boost::apply_visitor(outputParameterVisitor, + outputGateModule); + + prevCell = nextCell; + prevOutput = output; + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + prevOutput.zeros(); + prevCell.zeros(); + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -77,7 +229,125 @@ class LSTM template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + if (backwardStep > 0) + { + gy += boost::apply_visitor(deltaVisitor, output2GateModule); + } + + arma::mat g1 = boost::apply_visitor(outputParameterVisitor, + cellActivationModule) % gy; + + arma::mat g2 = boost::apply_visitor(outputParameterVisitor, + outputGateModule) % gy; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + cellActivationModule)), + std::move(g2), + std::move(boost::apply_visitor(deltaVisitor, + cellActivationModule)) + ), + cellActivationModule); + + cellActivationError = boost::apply_visitor(deltaVisitor, + cellActivationModule); + + if (backwardStep > 0) + { + cellActivationError += forgetGateError; + } + + arma::mat g4 = boost::apply_visitor(outputParameterVisitor, + inputGateModule) % cellActivationError; + + arma::mat g5 = boost::apply_visitor(outputParameterVisitor, + hiddenStateModule) % cellActivationError; + + forgetGateError = boost::apply_visitor(outputParameterVisitor, + forgetGateModule) % cellActivationError; + + arma::mat g7 = cellParameter[cellParameter.size() - + backwardStep - 1] % cellActivationError; + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + inputGateModule)), + std::move(g5), + std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) + ), + inputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + hiddenStateModule)), + std::move(g4), + std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) + ), + hiddenStateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + forgetGateModule)), + std::move(g7), + std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) + ), + forgetGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + outputGateModule)), + std::move(g1), + std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) + ), + outputGateModule); + + prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, inputGateModule); + prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, hiddenStateModule); + prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, forgetGateModule); + prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( + deltaVisitor, outputGateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + input2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) + ), + input2GateModule); + + boost::apply_visitor( + BackwardVisitor( + std::move(boost::apply_visitor(outputParameterVisitor, + output2GateModule)), + std::move(prevError), + std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) + ), + output2GateModule); + + backwardStep++; + if (backwardStep == rho) + { + backwardStep = 0; + cellParameter.clear(); + } + + g = boost::apply_visitor(deltaVisitor, input2GateModule); + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -89,7 +359,33 @@ class LSTM template void Gradient(arma::Mat&& input, arma::Mat&& /* error */, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + boost::apply_visitor( + GradientVisitor( + std::move(input), + std::move(prevError) + ), + input2GateModule); + + boost::apply_visitor( + GradientVisitor( + std::move(outParameter[outParameter.size() - gradientStep - 1]), + std::move(prevError) + ), + output2GateModule); + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + outParameter.clear(); + } + } +>>>>>>> Refactor ann layer. //! The value of the deterministic parameter. bool Deterministic() const { return deterministic; } @@ -133,7 +429,17 @@ class LSTM * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(weights, "weights"); + ar & data::CreateNVP(inSize, "inSize"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(rho, "rho"); + } +>>>>>>> Refactor ann layer. private: @@ -231,7 +537,10 @@ class LSTM } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "lstm_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp index 6bf179b7a5a..459b438c582 100644 --- a/src/mlpack/methods/ann/layer/max_pooling.hpp +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -53,8 +53,16 @@ template < class MaxPooling { public: +<<<<<<< HEAD //! Create the MaxPooling object. MaxPooling(); +======= + //! Create the PoolingLayer object. + MaxPooling() + { + /* Nothing to do here */ + } +>>>>>>> Refactor ann layer. /** * Create the MaxPooling object using the specified number of units. @@ -66,10 +74,32 @@ class MaxPooling * @param floor Rounding operator (floor or ceil). */ MaxPooling(const size_t kW, +<<<<<<< HEAD const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); +======= + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + reset(false), + floor(floor), + offset(0), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + deterministic(false) + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -79,7 +109,67 @@ class MaxPooling * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + const size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + if (!deterministic) + { + poolingIndices.push_back(outputTemp); + } + + if (!reset) + { + size_t elements = inputWidth * inputHeight; + indicesCol = arma::linspace >(0, (elements - 1), + elements); + + indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); + + reset = true; + } + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + if (!deterministic) + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + poolingIndices.back().slice(s)); + } + else + { + PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), + inputTemp.slice(s)); + } + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -93,7 +183,28 @@ class MaxPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(mappedError.slice(s), gTemp.slice(s), + poolingIndices.back().slice(s)); + } + + poolingIndices.pop_back(); + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -139,7 +250,17 @@ class MaxPooling * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } +>>>>>>> Refactor ann layer. private: @@ -271,6 +392,7 @@ class MaxPooling std::vector poolingIndices; }; // class MaxPooling +<<<<<<< HEAD } // namespace ann } // namespace mlpack @@ -278,3 +400,10 @@ class MaxPooling #include "max_pooling_impl.hpp" #endif +======= + +} // namespace ann +} // namespace mlpack + +#endif +>>>>>>> Refactor ann layer. diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp index 70e061c7e2b..bc465bacf56 100644 --- a/src/mlpack/methods/ann/layer/mean_pooling.hpp +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -34,7 +34,14 @@ class MeanPooling { public: //! Create the MeanPooling object. +<<<<<<< HEAD MeanPooling(); +======= + MeanPooling() + { + /* Nothing to do here */ + } +>>>>>>> Refactor ann layer. /** * Create the MeanPooling object using the specified number of units. @@ -45,10 +52,33 @@ class MeanPooling * @param dH Width of the stride operation. */ MeanPooling(const size_t kW, +<<<<<<< HEAD const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); +======= + const size_t kH, + const size_t dW = 1, + const size_t dH = 1, + const bool floor = true) : + kW(kW), + kH(kH), + dW(dW), + dH(dH), + inputWidth(0), + inputHeight(0), + outputWidth(0), + outputHeight(0), + reset(false), + floor(floor), + deterministic(false), + offset(0) + + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -58,7 +88,45 @@ class MeanPooling * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + size_t slices = input.n_elem / (inputWidth * inputHeight); + inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); + + if (floor) + { + outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); + + offset = 0; + } + else + { + outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); + outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); + + offset = 1; + } + + outputTemp = arma::zeros >(outputWidth, outputHeight, + slices); + + for (size_t s = 0; s < inputTemp.n_slices; s++) + { + + Pooling(inputTemp.slice(s), outputTemp.slice(s)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + outSize = slices; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -72,7 +140,25 @@ class MeanPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, + outputHeight, outSize); + + gTemp = arma::zeros(inputTemp.n_rows, + inputTemp.n_cols, inputTemp.n_slices); + + for (size_t s = 0; s < mappedError.n_slices; s++) + { + Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); + } + + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -118,7 +204,17 @@ class MeanPooling * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(kW, "kW"); + ar & data::CreateNVP(kH, "kH"); + ar & data::CreateNVP(dW, "dW"); + ar & data::CreateNVP(dH, "dH"); + } +>>>>>>> Refactor ann layer. private: @@ -246,7 +342,11 @@ class MeanPooling } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "mean_pooling_impl.hpp" #endif +======= +#endif +>>>>>>> Refactor ann layer. diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp index 280b6e79b11..5bd8a17d37f 100644 --- a/src/mlpack/methods/ann/layer/mean_squared_error.hpp +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -2,7 +2,11 @@ * @file mean_squared_error.hpp * @author Marcus Edel * +<<<<<<< HEAD * Definition of the mean squared error performance function. +======= + * Definition and implementation of the mean squared error performance function. +>>>>>>> Refactor ann layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -37,7 +41,11 @@ class MeanSquaredError /** * Create the MeanSquaredError object. */ +<<<<<<< HEAD MeanSquaredError(); +======= + MeanSquaredError() { /* Nothing to do here. */ } +>>>>>>> Refactor ann layer. /* * Computes the mean squared error function. @@ -46,7 +54,15 @@ class MeanSquaredError * @param output Resulting output activation. */ template +<<<<<<< HEAD double Forward(const arma::Mat&& input, const arma::Mat&& target); +======= + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + return arma::mean(arma::mean(arma::square(input - target))); + } + +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network. * @@ -57,7 +73,14 @@ class MeanSquaredError template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD arma::Mat&& output); +======= + arma::Mat&& output) + { + output = (input - target); + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -74,12 +97,15 @@ class MeanSquaredError //! Modify the delta. OutputDataType& Delta() { return delta; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Locally-stored delta object. OutputDataType delta; @@ -91,10 +117,15 @@ class MeanSquaredError OutputDataType outputParameter; }; // class MeanSquaredError +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "mean_squared_error_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp index b2985b27b83..338d6adff7b 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -32,7 +32,14 @@ class MultiplyConstant /** * Create the MultiplyConstant object. */ +<<<<<<< HEAD MultiplyConstant(const double scalar); +======= + MultiplyConstant(const double scalar) : scalar(scalar) + { + // Nothing to do here. + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -42,7 +49,14 @@ class MultiplyConstant * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); +======= + void Forward(const InputType&& input, OutputType&& output) + { + output = input * scalar; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network. The backward pass @@ -53,7 +67,14 @@ class MultiplyConstant * @param g The calculated gradient. */ template +<<<<<<< HEAD void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); +======= + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) + { + g = gy * scalar; + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -74,7 +95,14 @@ class MultiplyConstant * Serialize the layer. */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(scalar, "scalar"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored constant scalar value. @@ -90,10 +118,15 @@ class MultiplyConstant OutputDataType outputParameter; }; // class MultiplyConstant +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "multiply_constant_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 84c6a2d7904..2334381f96f 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -38,7 +38,11 @@ class NegativeLogLikelihood /** * Create the NegativeLogLikelihoodLayer object. */ +<<<<<<< HEAD NegativeLogLikelihood(); +======= + NegativeLogLikelihood() { /* Nothing to do here. */ } +>>>>>>> Refactor ann layer. /* * Computes the Negative log likelihood. @@ -47,7 +51,25 @@ class NegativeLogLikelihood * @param output Resulting output activation. */ template +<<<<<<< HEAD double Forward(const arma::Mat&& input, arma::Mat&& target); +======= + double Forward(const arma::Mat&& input, arma::Mat&& target) + { + double output = 0; + + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + return output; + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network. The negative log @@ -63,7 +85,29 @@ class NegativeLogLikelihood template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD arma::Mat&& output); +======= + arma::Mat&& output) + { + // std::cout << "------------------------------------------------------\n"; + // std::cout << "NegativeLogLikelihood\n"; + + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < input.n_cols; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + // std::cout << "output: \n" << output << std::endl; + + // std::cout << "------------------------------------------------------\n"; + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -96,9 +140,12 @@ class NegativeLogLikelihood //! Locally-stored output parameter object. OutputDataType outputParameter; }; // class NegativeLogLikelihood +<<<<<<< HEAD } // namespace ann } // namespace mlpack +======= +>>>>>>> Refactor ann layer. // Include implementation. #include "negative_log_likelihood_impl.hpp" diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp index 297127a6ae8..938a90ed122 100644 --- a/src/mlpack/methods/ann/layer/recurrent.hpp +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -56,7 +56,45 @@ class Recurrent const InputModuleType& input, const FeedbackModuleType& feedback, const TransferModuleType& transfer, +<<<<<<< HEAD const size_t rho); +======= + const size_t rho) : + startModule(new StartModuleType(start)), + inputModule(new InputModuleType(input)), + feedbackModule(new FeedbackModuleType(feedback)), + transferModule(new TransferModuleType(transfer)), + rho(rho), + forwardStep(0), + backwardStep(0), + gradientStep(0), + deterministic(false) + + { + initialModule = new Sequential<>(); + mergeModule = new AddMerge<>(); + recurrentModule = new Sequential<>(false); + + boost::apply_visitor(AddVisitor(inputModule), initialModule); + boost::apply_visitor(AddVisitor(startModule), initialModule); + boost::apply_visitor(AddVisitor(transferModule), initialModule); + + boost::apply_visitor(weightSizeVisitor, startModule); + boost::apply_visitor(weightSizeVisitor, inputModule); + boost::apply_visitor(weightSizeVisitor, feedbackModule); + boost::apply_visitor(weightSizeVisitor, transferModule); + + boost::apply_visitor(AddVisitor(inputModule), mergeModule); + boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); + boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); + boost::apply_visitor(AddVisitor(transferModule), recurrentModule); + + network.push_back(initialModule); + network.push_back(mergeModule); + network.push_back(feedbackModule); + network.push_back(recurrentModule); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -66,7 +104,52 @@ class Recurrent * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + initialModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, inputModule))), + inputModule); + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, transferModule)), std::move( + boost::apply_visitor(outputParameterVisitor, feedbackModule))), + feedbackModule); + + boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), + recurrentModule); + } + + output = boost::apply_visitor(outputParameterVisitor, transferModule); + + // Save the feedback output parameter when training the module. + if (!deterministic) + { + feedbackOutputParameter.push_back(output); + } + + forwardStep++; + if (forwardStep == rho) + { + forwardStep = 0; + backwardStep = 0; + + if (!recurrentError.is_empty()) + { + recurrentError.zeros(); + } + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -80,7 +163,48 @@ class Recurrent template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + if (!recurrentError.is_empty()) + { + recurrentError += gy; + } + else + { + recurrentError = gy; + } + + if (backwardStep < (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, recurrentModule)), std::move(recurrentError), + std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), + recurrentModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, inputModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), + inputModule); + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, feedbackModule)), std::move( + boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( + boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, initialModule)), std::move(recurrentError), + std::move(g)), initialModule); + } + + recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); + backwardStep++; + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -92,7 +216,42 @@ class Recurrent template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + if (gradientStep < (rho - 1)) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + recurrentModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); + + boost::apply_visitor(GradientVisitor(std::move( + feedbackOutputParameter[feedbackOutputParameter.size() - 2 - + gradientStep]), std::move(boost::apply_visitor(deltaVisitor, + mergeModule))), feedbackModule); + } + else + { + boost::apply_visitor(GradientZeroVisitor(), recurrentModule); + boost::apply_visitor(GradientZeroVisitor(), inputModule); + boost::apply_visitor(GradientZeroVisitor(), feedbackModule); + + boost::apply_visitor(GradientVisitor(std::move(input), std::move( + boost::apply_visitor(deltaVisitor, startModule))), initialModule); + } + + gradientStep++; + if (gradientStep == rho) + { + gradientStep = 0; + feedbackOutputParameter.clear(); + } + } +>>>>>>> Refactor ann layer. //! Get the model modules. std::vector& Model() { return network; } @@ -131,7 +290,14 @@ class Recurrent * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + } +>>>>>>> Refactor ann layer. private: //! Locally-stored start module. @@ -207,7 +373,10 @@ class Recurrent } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "recurrent_impl.hpp" +======= +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index d3c42a008c9..6d07ebbad5a 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -2,7 +2,11 @@ * @file select.hpp * @author Marcus Edel * +<<<<<<< HEAD * Definition of the Select module. +======= + * Definition and implementation of the Select module. +>>>>>>> Refactor ann layer. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -36,9 +40,20 @@ class Select * Create the Select object. * * @param index The column which should be extracted from the given input. +<<<<<<< HEAD * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0); +======= + * @param index The number of elements that should be used. + */ + Select(const size_t index, const size_t elements = 0) : + index(index), + elements(elements) + { + /* Nothing to do here. */ + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -48,7 +63,21 @@ class Select * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (elements == 0) + { + output = input.col(index); + } + else + { + output = input.submat(0, index, elements - 1, index); + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -62,7 +91,21 @@ class Select template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + if (elements == 0) + { + g = gy; + } + else + { + g = gy.submat(0, 0, elements - 1, 0); + } + } +>>>>>>> Refactor ann layer. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -79,12 +122,15 @@ class Select //! Modify the delta. OutputDataType& Delta() { return delta; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Locally-stored column index. size_t index; @@ -102,10 +148,15 @@ class Select OutputDataType outputParameter; }; // class Select +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "select_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor ann layer. #endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp index ca729c9da13..8164d623414 100644 --- a/src/mlpack/methods/ann/layer/sequential.hpp +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -47,10 +47,29 @@ class Sequential * * @param model Expose the all network modules. */ +<<<<<<< HEAD Sequential(const bool model = true); //! Destroy the Sequential object. ~Sequential(); +======= + Sequential(const bool model = true) : model(model), reset(false) + { + /* Nothing to do here. */ + } + + //! Destroy the Sequential object. + ~Sequential() + { + if (!model) + { + for (LayerTypes& layer : network) + { + boost::apply_visitor(deleteVisitor, layer); + } + } + } +>>>>>>> Refactor ann layer. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,7 +79,68 @@ class Sequential * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + boost::apply_visitor(ForwardVisitor(std::move(input), std::move( + boost::apply_visitor(outputParameterVisitor, network.front()))), + network.front()); + + if (!reset) + { + if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network.front()); + } + + if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network.front()); + } + } + + for (size_t i = 1; i < network.size(); ++i) + { + if (!reset) + { + // Set the input width. + boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); + + // Set the input height. + boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); + } + + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(outputParameterVisitor, network[i]))), + network[i]); + + if (!reset) + { + // Get the output width. + if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) + { + width = boost::apply_visitor(outputWidthVisitor, network[i]); + } + + // Get the output height. + if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) + { + height = boost::apply_visitor(outputHeightVisitor, network[i]); + } + } + } + + if (!reset) + { + reset = true; + } + + output = boost::apply_visitor(outputParameterVisitor, network.back()); + } +>>>>>>> Refactor ann layer. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -74,7 +154,28 @@ class Sequential template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network.back())), std::move(gy), + std::move(boost::apply_visitor(deltaVisitor, network.back()))), + network.back()); + + for (size_t i = 2; i < network.size() + 1; ++i) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[network.size() - i])), std::move( + boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), + std::move(boost::apply_visitor(deltaVisitor, + network[network.size() - i]))), network[network.size() - i]); + } + + g = boost::apply_visitor(deltaVisitor, network.front()); + } +>>>>>>> Refactor ann layer. /* * Calculate the gradient using the output delta and the input activation. @@ -86,7 +187,22 @@ class Sequential template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), + network.front()); + + for (size_t i = 1; i < network.size() - 1; ++i) + { + boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, network[i - 1])), std::move( + boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); + } + } +>>>>>>> Refactor ann layer. /* * Add a new module to the model. @@ -138,12 +254,15 @@ class Sequential //! Modify the gradient. arma::mat& Gradient() { return gradient; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor ann layer. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -194,10 +313,17 @@ class Sequential size_t height; }; // class Sequential +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "sequential_impl.hpp" +======= + +} // namespace ann +} // namespace mlpack + +>>>>>>> Refactor ann layer. #endif From 8d9de82475d4f1dc52dfc657a883ef3b2623bb80 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 5 Dec 2016 16:54:23 +0100 Subject: [PATCH 67/82] Minor style fixes. --- src/mlpack/methods/ann/layer/dropout.hpp | 2 +- src/mlpack/methods/ann/layer/negative_log_likelihood.hpp | 7 ------- src/mlpack/methods/ann/layer/select.hpp | 4 ++++ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index 57db378d85e..4919189ce5b 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -65,7 +65,7 @@ class Dropout Dropout(const double ratio = 0.5, const bool rescale = true); ======= Dropout(const double ratio = 0.5, - const bool rescale = true) : + const bool rescale = true) : ratio(ratio), scale(1.0 / (1.0 - ratio)), rescale(rescale) diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 2334381f96f..9bc3fb4a86d 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -90,9 +90,6 @@ class NegativeLogLikelihood ======= arma::Mat&& output) { - // std::cout << "------------------------------------------------------\n"; - // std::cout << "NegativeLogLikelihood\n"; - output = arma::zeros >(input.n_rows, input.n_cols); for (size_t i = 0; i < input.n_cols; ++i) { @@ -102,10 +99,6 @@ class NegativeLogLikelihood output(currentTarget, i) = -1; } - - // std::cout << "output: \n" << output << std::endl; - - // std::cout << "------------------------------------------------------\n"; } >>>>>>> Refactor ann layer. diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index 6d07ebbad5a..debb96d291d 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -40,12 +40,16 @@ class Select * Create the Select object. * * @param index The column which should be extracted from the given input. +<<<<<<< HEAD <<<<<<< HEAD * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0); ======= * @param index The number of elements that should be used. +======= + * @param elements The number of elements that should be used. +>>>>>>> Minor style fixes. */ Select(const size_t index, const size_t elements = 0) : index(index), From 1dfe0c6a93057e0bdbd4f7df9bc871cb60bd630d Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Tue, 6 Dec 2016 22:29:46 +0100 Subject: [PATCH 68/82] Refactor recurrent network test. --- src/mlpack/tests/recurrent_network_test.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index f7546e83b50..ff5daae9ede 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,11 +3,6 @@ * @author Marcus Edel * * Tests the recurrent network. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include From 24748b08488955e8747dce8be7bd2ceb636618d5 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 13:52:50 +0100 Subject: [PATCH 69/82] Minor style fixes. --- src/mlpack/methods/ann/layer/convolution.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index ccb1d0752f6..17fcc6dcd95 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -110,7 +110,7 @@ class Convolution void Reset() { weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false,false); + outSize * inSize, false, false); bias = arma::mat(weights.memptr() + weight.n_elem, outSize, 1, false, false); } @@ -215,8 +215,6 @@ class Convolution { gTemp.slice(inMap) += output; } - - } } From f3d48b88996551a5e071d49efc31bf1529a063fe Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:47:53 +0100 Subject: [PATCH 70/82] Refactor neural visual attention modules. --- src/mlpack/methods/ann/layer/glimpse.hpp | 191 ++++++++++++++++++ .../methods/ann/layer/recurrent_attention.hpp | 171 ++++++++++++++++ .../methods/ann/layer/reinforce_normal.hpp | 48 +++++ .../methods/ann/layer/vr_class_reward.hpp | 75 +++++++ 4 files changed, 485 insertions(+) diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp index e007fae30b9..45e2ab98617 100644 --- a/src/mlpack/methods/ann/layer/glimpse.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -100,7 +100,20 @@ class Glimpse const size_t depth = 3, const size_t scale = 2, const size_t inputWidth = 0, +<<<<<<< HEAD const size_t inputHeight = 0); +======= + const size_t inputHeight = 0) : + inSize(inSize), + size(size), + depth(depth), + scale(scale), + inputWidth(inputWidth), + inputHeight(inputHeight) + { + // Nothing to do here. + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed forward pass of the glimpse layer. @@ -109,7 +122,90 @@ class Glimpse * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); + outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); + + location = input.submat(0, 1, 1, 1); + + if (!deterministic) + { + locationParameter.push_back(location); + } + + inputDepth = inputTemp.n_slices / inSize; + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, + inputTemp.n_slices / inSize); + + inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, + padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, + inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, + (inputIdx + 1) * inputDepth - 1); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + outputTemp.slice(j) = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingInput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); + } + else + { + ReSampling(poolingInput, outputTemp.slice(j)); + } + } + } + } + } + + for (size_t i = 0; i < outputTemp.n_slices; ++i) + { + outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); + } + + output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); + + outputWidth = outputTemp.n_rows; + outputHeight = outputTemp.n_cols; + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed backward pass of the glimpse layer. @@ -121,7 +217,94 @@ class Glimpse template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + // Generate a cube using the backpropagated error matrix. + arma::Cube mappedError = arma::zeros(outputWidth, + outputHeight, 1); + + location = locationParameter.back(); + locationParameter.pop_back(); + + for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) + { + for (size_t i = 0; i < gy.n_cols; i++) + { + mappedError.slice(s + i) = arma::Mat(gy.memptr(), + outputWidth, outputHeight); + } + } + + gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, + inputTemp.n_slices); + + for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) + { + for (size_t depthIdx = 0, glimpseSize = size; + depthIdx < depth; depthIdx++, glimpseSize *= scale) + { + size_t padSize = std::floor((glimpseSize - 1) / 2); + + arma::Cube inputPadded = arma::zeros >( + inputTemp.n_rows + padSize * 2, inputTemp.n_cols + + padSize * 2, inputTemp.n_slices / inSize); + + size_t h = inputPadded.n_rows - glimpseSize; + size_t w = inputPadded.n_cols - glimpseSize; + + size_t x = std::min(h, (size_t) std::max(0.0, + (location(0, inputIdx) + 1) / 2.0 * h)); + size_t y = std::min(w, (size_t) std::max(0.0, + (location(1, inputIdx) + 1) / 2.0 * w)); + + if (depthIdx == 0) + { + for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = mappedError.slice(j); + } + } + else + { + for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; + j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) + { + arma::Mat poolingOutput = inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice); + + if (scale == 2) + { + Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), + poolingOutput); + } + else + { + DownwardReSampling(inputTemp.slice(paddedSlice), + mappedError.slice(j), poolingOutput); + } + + inputPadded.subcube(x, y, + paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, + paddedSlice) = poolingOutput; + } + } + + gTemp += inputPadded.tube(padSize, padSize, padSize + + inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); + } + } + + Transform(gTemp); + g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); + } +>>>>>>> Refactor neural visual attention modules. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -170,12 +353,15 @@ class Glimpse //! Modify the value of the deterministic parameter. bool& Deterministic() { return deterministic; } +<<<<<<< HEAD /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); +======= +>>>>>>> Refactor neural visual attention modules. private: /* * Transform the given input by changing rows to columns. @@ -421,10 +607,15 @@ class Glimpse bool deterministic; }; // class GlimpseLayer +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "glimpse_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor neural visual attention modules. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp index ffb7320b232..43fe7f5ba26 100644 --- a/src/mlpack/methods/ann/layer/recurrent_attention.hpp +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -62,7 +62,22 @@ class RecurrentAttention RecurrentAttention(const size_t outSize, const RNNModuleType& rnn, const ActionModuleType& action, +<<<<<<< HEAD const size_t rho); +======= + const size_t rho) : + outSize(outSize), + rnnModule(new RNNModuleType(rnn)), + actionModule(new ActionModuleType(action)), + rho(rho), + forwardStep(0), + backwardStep(0), + deterministic(false) + { + network.push_back(rnnModule); + network.push_back(actionModule); + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -72,7 +87,61 @@ class RecurrentAttention * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); +======= + void Forward(arma::Mat&& input, arma::Mat&& output) + { + // Initialize the action input. + if (initialInput.is_empty()) + { + initialInput = arma::zeros(outSize, input.n_cols); + } + + // Propagate through the action and recurrent module. + for (forwardStep = 0; forwardStep < rho; ++forwardStep) + { + if (forwardStep == 0) + { + boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( + boost::apply_visitor(outputParameterVisitor, actionModule))), + actionModule); + } + else + { + boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( + outputParameterVisitor, actionModule))), actionModule); + } + + // Initialize the glimpse input. + arma::mat glimpseInput = arma::zeros(input.n_elem, 2); + glimpseInput.col(0) = input; + glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, + actionModule).n_elem - 1, 1) = boost::apply_visitor( + outputParameterVisitor, actionModule); + + boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), + std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), + rnnModule); + + // Save the output parameter when training the module. + if (!deterministic) + { + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(SaveOutputParameterVisitor( + std::move(moduleOutputParameter)), network[l]); + } + } + } + + output = boost::apply_visitor(outputParameterVisitor, rnnModule); + + forwardStep = 0; + backwardStep = 0; + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -86,7 +155,85 @@ class RecurrentAttention template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD arma::Mat&& g); +======= + arma::Mat&& g) + { + if (intermediateGradient.is_empty() && backwardStep == 0) + { + // Initialize the attention gradients. + size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + + boost::apply_visitor(weightSizeVisitor, actionModule); + + intermediateGradient = arma::zeros(weights, 1); + attentionGradient = arma::zeros(weights, 1); + + // Initialize the action error. + actionError = arma::zeros( + boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, + boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); + } + + // Propagate the attention gradients. + if (backwardStep == 0) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), rnnModule); + boost::apply_visitor(GradientSetVisitor( + std::move(intermediateGradient), offset), actionModule); + + attentionGradient.zeros(); + } + + // Back-propagate through time. + for (; backwardStep < rho; backwardStep++) + { + if (backwardStep == 0) + { + recurrentError = gy; + } + else + { + recurrentError = actionDelta; + } + + for (size_t l = 0; l < network.size(); ++l) + { + boost::apply_visitor(LoadOutputParameterVisitor( + std::move(moduleOutputParameter)), network[network.size() - 1 - l]); + } + + if (backwardStep == (rho - 1)) + { + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, actionModule)), std::move(actionError), + std::move(actionDelta)), actionModule); + } + else + { + boost::apply_visitor(BackwardVisitor(std::move(initialInput), + std::move(actionError), std::move(actionDelta)), actionModule); + } + + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( + outputParameterVisitor, rnnModule)), std::move(recurrentError), + std::move(rnnDelta)), rnnModule); + + if (backwardStep == 0) + { + g = rnnDelta.col(1); + } + else + { + g += rnnDelta.col(1); + } + + IntermediateGradient(); + } + } +>>>>>>> Refactor neural visual attention modules. /* * Calculate the gradient using the output delta and the input activation. @@ -98,7 +245,18 @@ class RecurrentAttention template void Gradient(arma::Mat&& /* input */, arma::Mat&& /* error */, +<<<<<<< HEAD arma::Mat&& /* gradient */); +======= + arma::Mat&& /* gradient */) + { + size_t offset = 0; + offset += boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), rnnModule); + boost::apply_visitor(GradientUpdateVisitor( + std::move(attentionGradient), offset), actionModule); + } +>>>>>>> Refactor neural visual attention modules. //! Get the model modules. std::vector& Model() { return network; } @@ -137,7 +295,17 @@ class RecurrentAttention * Serialize the layer */ template +<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); +======= + void Serialize(Archive& ar, const unsigned int /* version */) + { + ar & data::CreateNVP(rho, "rho"); + ar & data::CreateNVP(outSize, "outSize"); + ar & data::CreateNVP(forwardStep, "forwardStep"); + ar & data::CreateNVP(backwardStep, "backwardStep"); + } +>>>>>>> Refactor neural visual attention modules. private: //! Calculate the gradient of the attention module. @@ -257,7 +425,10 @@ class RecurrentAttention } // namespace ann } // namespace mlpack +<<<<<<< HEAD // Include implementation. #include "recurrent_attention_impl.hpp" +======= +>>>>>>> Refactor neural visual attention modules. #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp index fd192f0a1f2..0442ada4a4c 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -34,7 +34,14 @@ class ReinforceNormal * * @param stdev Standard deviation used during the forward and backward pass. */ +<<<<<<< HEAD ReinforceNormal(const double stdev); +======= + ReinforceNormal(const double stdev) : stdev(stdev) + { + // Nothing to do here. + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -44,7 +51,26 @@ class ReinforceNormal * @param output Resulting output activation. */ template +<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); +======= + void Forward(const arma::Mat&& input, arma::Mat&& output) + { + if (!deterministic) + { + // Multiply by standard deviations and re-center the means to the mean. + output = arma::randn >(input.n_rows, input.n_cols) * + stdev + input; + + moduleInputParameter.push_back(input); + } + else + { + // Use maximum a posteriori. + output = input; + } + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -56,7 +82,21 @@ class ReinforceNormal * @param g The calculated gradient. */ template +<<<<<<< HEAD void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); +======= + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) + { + g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); + + // Multiply by reward and multiply by -1. + g *= reward; + g *= -1; + + moduleInputParameter.pop_back(); + } + +>>>>>>> Refactor neural visual attention modules. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -83,12 +123,15 @@ class ReinforceNormal //! Modify the value of the deterministic parameter. double& Reward() { return reward; } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor neural visual attention modules. private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -112,10 +155,15 @@ class ReinforceNormal bool deterministic; }; // class ReinforceNormal +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "reinforce_normal_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor neural visual attention modules. #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp index f820e351aa8..a057b80e4a2 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -40,7 +40,16 @@ class VRClassReward * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ +<<<<<<< HEAD VRClassReward(const double scale = 1, const bool sizeAverage = true); +======= + VRClassReward(const double scale = 1, const bool sizeAverage = true) : + scale(scale), + sizeAverage(sizeAverage) + { + // Nothing to do here. + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -51,7 +60,39 @@ class VRClassReward * between 1 and the number of classes. */ template +<<<<<<< HEAD double Forward(const arma::Mat&& input, const arma::Mat&& target); +======= + double Forward(const arma::Mat&& input, const arma::Mat&& target) + { + double output = 0; + + for (size_t i = 0; i < input.n_cols - 1; ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output -= input(currentTarget, i); + } + + reward = 0; + arma::uword index = 0; + + for (size_t i = 0; i < input.n_cols - 1; i++) + { + input.unsafe_col(i).max(index); + reward = ((index + 1) == target(i)) * scale; + } + + if (sizeAverage) + { + return output - reward / (input.n_cols - 1); + } + + return output - reward; + } +>>>>>>> Refactor neural visual attention modules. /** * Ordinary feed backward pass of a neural network. The negative log @@ -67,7 +108,33 @@ class VRClassReward template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD arma::Mat&& output); +======= + arma::Mat&& output) + { + output = arma::zeros >(input.n_rows, input.n_cols); + for (size_t i = 0; i < (input.n_cols - 1); ++i) + { + size_t currentTarget = target(i) - 1; + Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, + "Target class out of range."); + + output(currentTarget, i) = -1; + } + + double vrReward = reward - input(0, 1); + if (sizeAverage) + { + vrReward /= input.n_cols - 1; + } + + const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; + + output(0, 1) = norm * (input(0, 1) - reward); + boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); + } +>>>>>>> Refactor neural visual attention modules. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -104,12 +171,15 @@ class VRClassReward */ void Add(LayerTypes layer) { network.push_back(layer); } +<<<<<<< HEAD /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +======= +>>>>>>> Refactor neural visual attention modules. private: //! Locally-stored value to scale the reward. const double scale; @@ -136,10 +206,15 @@ class VRClassReward std::vector network; }; // class VRClassReward +<<<<<<< HEAD } // namespace ann } // namespace mlpack // Include implementation. #include "vr_class_reward_impl.hpp" +======= +}; // namespace ann +}; // namespace mlpack +>>>>>>> Refactor neural visual attention modules. #endif From e4e73e6ca7e3433024a6b1fdfc5554f0d0f0ab01 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Mon, 12 Dec 2016 14:50:59 +0100 Subject: [PATCH 71/82] Use refactored rnn,ffn classes for the ann tests. --- src/mlpack/tests/recurrent_network_test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index ff5daae9ede..f7546e83b50 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -3,6 +3,11 @@ * @author Marcus Edel * * Tests the recurrent network. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #include From 4ed0e6fdbc6fb66495e041ed45caf391a028ff51 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 14 Dec 2016 22:46:38 +0100 Subject: [PATCH 72/82] Split layer modules into definition and implementation. --- src/mlpack/methods/ann/layer/add.hpp | 26 ++++++ src/mlpack/methods/ann/layer/add_merge.hpp | 22 +++++ src/mlpack/methods/ann/layer/concat.hpp | 31 +++++++ .../methods/ann/layer/concat_performance.hpp | 24 +++++ src/mlpack/methods/ann/layer/constant.hpp | 24 +++++ src/mlpack/methods/ann/layer/convolution.hpp | 37 ++++++++ src/mlpack/methods/ann/layer/dropconnect.hpp | 34 +++++++ src/mlpack/methods/ann/layer/dropout.hpp | 22 +++++ src/mlpack/methods/ann/layer/glimpse.hpp | 24 +++++ src/mlpack/methods/ann/layer/hard_tanh.hpp | 93 ++----------------- src/mlpack/methods/ann/layer/join.hpp | 22 +++++ src/mlpack/methods/ann/layer/leaky_relu.hpp | 29 ++---- src/mlpack/methods/ann/layer/linear.hpp | 36 +++++++ .../methods/ann/layer/linear_no_bias.hpp | 34 +++++++ src/mlpack/methods/ann/layer/log_softmax.hpp | 24 +++++ src/mlpack/methods/ann/layer/lookup.hpp | 26 ++++++ src/mlpack/methods/ann/layer/lstm.hpp | 30 ++++++ src/mlpack/methods/ann/layer/max_pooling.hpp | 33 +++++++ src/mlpack/methods/ann/layer/mean_pooling.hpp | 28 ++++++ .../methods/ann/layer/mean_squared_error.hpp | 28 ++++++ .../methods/ann/layer/multiply_constant.hpp | 22 +++++ .../ann/layer/negative_log_likelihood.hpp | 18 ++++ src/mlpack/methods/ann/layer/recurrent.hpp | 26 ++++++ .../methods/ann/layer/recurrent_attention.hpp | 26 ++++++ .../methods/ann/layer/reinforce_normal.hpp | 24 +++++ src/mlpack/methods/ann/layer/select.hpp | 27 ++++++ src/mlpack/methods/ann/layer/sequential.hpp | 34 +++++++ .../methods/ann/layer/vr_class_reward.hpp | 24 +++++ 28 files changed, 722 insertions(+), 106 deletions(-) diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp index af975fb9afe..88167abc244 100644 --- a/src/mlpack/methods/ann/layer/add.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -39,6 +39,7 @@ class Add * * @param outSize The number of output units. */ +<<<<<<< HEAD <<<<<<< HEAD Add(const size_t outSize); ======= @@ -47,6 +48,9 @@ class Add weights.set_size(outSize, 1); } >>>>>>> Refactor ann layer. +======= + Add(const size_t outSize); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -56,6 +60,7 @@ class Add * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -64,6 +69,9 @@ class Add output = input + weights; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -77,6 +85,7 @@ class Add template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -85,6 +94,9 @@ class Add g = gy; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -96,6 +108,7 @@ class Add template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& gradient); ======= @@ -104,6 +117,9 @@ class Add gradient = error; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& gradient); +>>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -134,6 +150,7 @@ class Add * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -142,6 +159,9 @@ class Add ar & data::CreateNVP(weights, "weights"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of output units. @@ -166,10 +186,16 @@ class Add } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "add_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "add_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp index 401a4c4b3bd..94ee6d36814 100644 --- a/src/mlpack/methods/ann/layer/add_merge.hpp +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -38,6 +38,7 @@ class AddMerge { public: //! Create the AddMerge object. +<<<<<<< HEAD <<<<<<< HEAD AddMerge(); ======= @@ -46,6 +47,9 @@ class AddMerge // Nothing to do here. } >>>>>>> Refactor ann layer. +======= + AddMerge(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -55,6 +59,7 @@ class AddMerge * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const InputType&& /* input */, OutputType&& output); ======= @@ -68,6 +73,9 @@ class AddMerge } } >>>>>>> Refactor ann layer. +======= + void Forward(const InputType&& /* input */, OutputType&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -81,6 +89,7 @@ class AddMerge template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -89,6 +98,9 @@ class AddMerge g = gy; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -132,6 +144,7 @@ class AddMerge * Serialize the layer. */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -140,6 +153,9 @@ class AddMerge ar & data::CreateNVP(network, "network"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: std::vector network; @@ -166,10 +182,16 @@ class AddMerge } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "add_merge_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "add_merge_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp index b124916cb56..d92e11c8f10 100644 --- a/src/mlpack/methods/ann/layer/concat.hpp +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -45,6 +45,7 @@ class Concat * @param model Expose all network modules. * @param same Merge the error in the backward pass. */ +<<<<<<< HEAD <<<<<<< HEAD Concat(const bool model = true, const bool same = true); ======= @@ -55,6 +56,9 @@ class Concat parameters.set_size(0, 0); } >>>>>>> Refactor ann layer. +======= + Concat(const bool model = true, const bool same = true); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,6 +68,7 @@ class Concat * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -104,6 +109,9 @@ class Concat } } >>>>>>> Refactor ann layer. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -117,6 +125,7 @@ class Concat template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -182,6 +191,9 @@ class Concat } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -193,6 +205,7 @@ class Concat template void Gradient(arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -205,6 +218,9 @@ class Concat } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -257,14 +273,20 @@ class Concat arma::mat& Gradient() { return gradient; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -303,6 +325,7 @@ class Concat arma::mat gradient; }; // class Concat +<<<<<<< HEAD <<<<<<< HEAD } // namespace ann } // namespace mlpack @@ -316,4 +339,12 @@ class Concat } // namespace mlpack >>>>>>> Refactor ann layer. +======= +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "concat_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp index d26fada8716..f663b12791b 100644 --- a/src/mlpack/methods/ann/layer/concat_performance.hpp +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -47,6 +47,7 @@ class ConcatPerformance * @param outputLayer Output layer used to evaluate the network. */ ConcatPerformance(const size_t inSize, +<<<<<<< HEAD <<<<<<< HEAD OutputLayerType&& outputLayer = OutputLayerType()); ======= @@ -57,6 +58,9 @@ class ConcatPerformance /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + OutputLayerType&& outputLayer = OutputLayerType()); +>>>>>>> Split layer modules into definition and implementation. /* * Computes the Negative log likelihood. @@ -65,6 +69,7 @@ class ConcatPerformance * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD double Forward(const arma::Mat&& input, arma::Mat&& target); ======= @@ -83,6 +88,9 @@ class ConcatPerformance } >>>>>>> Refactor ann layer. +======= + double Forward(const arma::Mat&& input, arma::Mat&& target); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log * likelihood layer expectes that the input contains log-probabilities for @@ -97,6 +105,7 @@ class ConcatPerformance template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& output); ======= @@ -123,6 +132,9 @@ class ConcatPerformance } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -140,14 +152,20 @@ class ConcatPerformance OutputDataType& Delta() { return delta; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of inputs. size_t inSize; @@ -166,14 +184,20 @@ class ConcatPerformance }; // class ConcatPerformance <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "concat_performance_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp index 6a561268c7d..6390e30ef0b 100644 --- a/src/mlpack/methods/ann/layer/constant.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -41,6 +41,7 @@ class Constant * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ +<<<<<<< HEAD <<<<<<< HEAD Constant(const size_t outSize, const double scalar); ======= @@ -52,6 +53,9 @@ class Constant constantOutput.fill(scalar); } >>>>>>> Refactor ann layer. +======= + Constant(const size_t outSize, const double scalar); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -61,6 +65,7 @@ class Constant * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); ======= @@ -74,6 +79,9 @@ class Constant output = constantOutput; } >>>>>>> Refactor ann layer. +======= + void Forward(const InputType&& input, OutputType&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -84,6 +92,7 @@ class Constant * @param g The calculated gradient. */ template +<<<<<<< HEAD <<<<<<< HEAD void Backward(const DataType&& /* input */, DataType&& /* gy */, @@ -94,6 +103,11 @@ class Constant g = arma::zeros(inSize, 1); } >>>>>>> Refactor ann layer. +======= + void Backward(const DataType&& /* input */, + DataType&& /* gy */, + DataType&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -114,6 +128,7 @@ class Constant * Serialize the layer. */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -122,6 +137,9 @@ class Constant ar & data::CreateNVP(constantOutput, "constantOutput"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input units. @@ -144,14 +162,20 @@ class Constant }; // class ConstantLayer <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "constant_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index 17fcc6dcd95..df7affe61e5 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -47,6 +47,7 @@ class Convolution { public: //! Create the Convolution object. +<<<<<<< HEAD <<<<<<< HEAD Convolution(); ======= @@ -55,6 +56,9 @@ class Convolution /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + Convolution(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the Convolution object using the specified number of input maps, @@ -80,6 +84,7 @@ class Convolution const size_t padW = 0, const size_t padH = 0, const size_t inputWidth = 0, +<<<<<<< HEAD <<<<<<< HEAD const size_t inputHeight = 0); ======= @@ -100,10 +105,14 @@ class Convolution weights.set_size((outSize * inSize * kW * kH) + outSize, 1); } >>>>>>> Refactor ann layer. +======= + const size_t inputHeight = 0); +>>>>>>> Split layer modules into definition and implementation. /* * Set the weight and bias term. */ +<<<<<<< HEAD <<<<<<< HEAD void Reset(); ======= @@ -115,6 +124,9 @@ class Convolution outSize, 1, false, false); } >>>>>>> Refactor ann layer. +======= + void Reset(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -124,6 +136,7 @@ class Convolution * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -170,6 +183,9 @@ class Convolution outputHeight = outputTemp.n_cols; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -183,6 +199,7 @@ class Convolution template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -221,6 +238,9 @@ class Convolution g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -232,6 +252,7 @@ class Convolution template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& gradient); ======= @@ -304,6 +325,9 @@ class Convolution gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& gradient); +>>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -354,6 +378,7 @@ class Convolution * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -374,6 +399,9 @@ class Convolution ar & data::CreateNVP(outputHeight, "outputHeight"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -543,6 +571,7 @@ class Convolution OutputDataType outputParameter; }; // class Convolution +<<<<<<< HEAD <<<<<<< HEAD } // namespace ann } // namespace mlpack @@ -556,4 +585,12 @@ class Convolution } // namespace mlpack >>>>>>> Refactor ann layer. +======= +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "convolution_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp index 3fdd09a9115..cab53be645b 100644 --- a/src/mlpack/methods/ann/layer/dropconnect.hpp +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -63,6 +63,7 @@ class DropConnect { public: //! Create the DropConnect object. +<<<<<<< HEAD <<<<<<< HEAD DropConnect(); ======= @@ -71,6 +72,9 @@ class DropConnect /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + DropConnect(); +>>>>>>> Split layer modules into definition and implementation. /** * Creates the DropConnect Layer as a Linear Object that takes input size, @@ -82,6 +86,7 @@ class DropConnect */ DropConnect(const size_t inSize, const size_t outSize, +<<<<<<< HEAD <<<<<<< HEAD const double ratio = 0.5); @@ -100,6 +105,11 @@ class DropConnect boost::apply_visitor(DeleteVisitor(), baseLayer); } >>>>>>> Refactor ann layer. +======= + const double ratio = 0.5); + + ~DropConnect(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the DropConnect layer. @@ -108,6 +118,7 @@ class DropConnect * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -148,6 +159,9 @@ class DropConnect } } >>>>>>> Refactor ann layer. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the DropConnect layer. @@ -159,6 +173,7 @@ class DropConnect template void Backward(arma::Mat&& input, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -173,6 +188,9 @@ class DropConnect baseLayer); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /** * Calculate the gradient using the output delta and the input activation. @@ -184,6 +202,7 @@ class DropConnect template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -196,6 +215,9 @@ class DropConnect boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -242,14 +264,20 @@ class DropConnect } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! The probability of setting a value to zero. double ratio; @@ -291,10 +319,16 @@ class DropConnect } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "dropconnect_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "dropconnect_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index 4919189ce5b..dda2b8192ee 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -61,6 +61,7 @@ class Dropout * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ +<<<<<<< HEAD <<<<<<< HEAD Dropout(const double ratio = 0.5, const bool rescale = true); ======= @@ -73,6 +74,9 @@ class Dropout // Nothing to do here. } >>>>>>> Refactor ann layer. +======= + Dropout(const double ratio = 0.5, const bool rescale = true); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the dropout layer. @@ -81,6 +85,7 @@ class Dropout * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -109,6 +114,9 @@ class Dropout } } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the dropout layer. @@ -120,6 +128,7 @@ class Dropout template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -128,6 +137,9 @@ class Dropout g = gy % mask * scale; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -168,6 +180,7 @@ class Dropout * Serialize the layer. */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -177,6 +190,9 @@ class Dropout ar & data::CreateNVP(rescale, "rescale"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. @@ -207,10 +223,16 @@ class Dropout } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "dropout_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "dropout_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp index 45e2ab98617..c86215e3a28 100644 --- a/src/mlpack/methods/ann/layer/glimpse.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -100,6 +100,7 @@ class Glimpse const size_t depth = 3, const size_t scale = 2, const size_t inputWidth = 0, +<<<<<<< HEAD <<<<<<< HEAD const size_t inputHeight = 0); ======= @@ -114,6 +115,9 @@ class Glimpse // Nothing to do here. } >>>>>>> Refactor neural visual attention modules. +======= + const size_t inputHeight = 0); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the glimpse layer. @@ -122,6 +126,7 @@ class Glimpse * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -206,6 +211,9 @@ class Glimpse outputHeight = outputTemp.n_cols; } >>>>>>> Refactor neural visual attention modules. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the glimpse layer. @@ -217,6 +225,7 @@ class Glimpse template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -305,6 +314,9 @@ class Glimpse g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); } >>>>>>> Refactor neural visual attention modules. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -354,14 +366,20 @@ class Glimpse bool& Deterministic() { return deterministic; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. private: /* * Transform the given input by changing rows to columns. @@ -608,14 +626,20 @@ class Glimpse }; // class GlimpseLayer <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "glimpse_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/hard_tanh.hpp b/src/mlpack/methods/ann/layer/hard_tanh.hpp index 76b19f964af..88c8ad2d853 100644 --- a/src/mlpack/methods/ann/layer/hard_tanh.hpp +++ b/src/mlpack/methods/ann/layer/hard_tanh.hpp @@ -57,11 +57,7 @@ class HardTanH * @param maxValue Range of the linear region maximum value. * @param minValue Range of the linear region minimum value. */ - HardTanH(const double maxValue = 1, const double minValue = -1) : - maxValue(maxValue), minValue(minValue) - { - // Nothing to do here. - } + HardTanH(const double maxValue = 1, const double minValue = -1); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -71,15 +67,7 @@ class HardTanH * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - output = input; - for (size_t i = 0; i < input.n_elem; i++) - { - output(i) = (output(i) > maxValue ? maxValue : - (output(i) < minValue ? minValue : output(i))); - } - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -93,17 +81,7 @@ class HardTanH template void Backward(const DataType&& input, DataType&& gy, - DataType&& g) - { - g = gy; - for (size_t i = 0; i < input.n_elem; i++) - { - if (input(i) < minValue || input(i) > maxValue) - { - g(i) = 0; - } - } - } + DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -134,69 +112,9 @@ class HardTanH * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(maxValue, "maxValue"); - ar & data::CreateNVP(minValue, "minValue"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: - /** - * Computes the HardTanH function. - * - * @param x Input data. - * @return f(x). - */ - double Fn(const double x) - { - if (x > maxValue) - return maxValue; - else if (x < minValue) - return minValue; - return x; - } - - /** - * Computes the HardTanH function using a dense matrix as input. - * - * @param x Input data. - * @param y The resulting output activation. - */ - - template - void Fn(const arma::Mat& x, arma::Mat& y) - { - y = x; - y.transform( [&](eT val) { return std::min( - std::max( val, minValue ), maxValue ); } ); - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param x Input data. - * @return f'(x) - */ - double Deriv(const double x) - { - return (x > maxValue || x < minValue) ? 0 : 1; - } - - /** - * Computes the first derivative of the HardTanH function. - * - * @param y Input activations. - * @param x The resulting derivatives. - */ - template - void Deriv(const InputType&& x, OutputType& y) - { - y = x; - - for (size_t i = 0; i < x.n_elem; i++) - y(i) = Deriv(x(i)); - } - //! Locally-stored delta object. OutputDataType delta; @@ -216,4 +134,7 @@ class HardTanH } // namespace ann } // namespace mlpack +// Include implementation. +#include "hard_tanh_impl.hpp" + #endif diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp index abd7ba9c978..f874dc27334 100644 --- a/src/mlpack/methods/ann/layer/join.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -34,6 +34,7 @@ class Join { public: //! Create the Join object. +<<<<<<< HEAD <<<<<<< HEAD Join(); ======= @@ -42,6 +43,9 @@ class Join // Nothing to do here. } >>>>>>> Refactor ann layer. +======= + Join(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -51,6 +55,7 @@ class Join * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); ======= @@ -61,6 +66,9 @@ class Join output = arma::vectorise(input); } >>>>>>> Refactor ann layer. +======= + void Forward(const InputType&& input, OutputType&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -74,6 +82,7 @@ class Join template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -82,6 +91,9 @@ class Join g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -102,6 +114,7 @@ class Join * Serialize the layer. */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -111,6 +124,9 @@ class Join ar & data::CreateNVP(inSizeCols, "inSizeCols"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input rows. @@ -132,10 +148,16 @@ class Join } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "join_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "join_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/leaky_relu.hpp b/src/mlpack/methods/ann/layer/leaky_relu.hpp index 8e69712b7f9..d8160f1a50b 100644 --- a/src/mlpack/methods/ann/layer/leaky_relu.hpp +++ b/src/mlpack/methods/ann/layer/leaky_relu.hpp @@ -2,8 +2,8 @@ * @file leaky_relu.hpp * @author Dhawal Arora * - * Definition and implementation of LeakyReLU layer first introduced - * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, + * Definition of LeakyReLU layer first introduced in the acoustic model, + * Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng, * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014 * * mlpack is free software; you may redistribute it and/or modify it under the @@ -51,10 +51,7 @@ class LeakyReLU * * @param alpha Non zero gradient */ - LeakyReLU(const double alpha = 0.03) : alpha(alpha) - { - // Nothing to do here. - } + LeakyReLU(const double alpha = 0.03); /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,10 +61,7 @@ class LeakyReLU * @param output Resulting output activation. */ template - void Forward(const InputType&& input, OutputType&& output) - { - Fn(input, output); - } + void Forward(const InputType&& input, OutputType&& output); /** * Ordinary feed backward pass of a neural network, calculating the function @@ -79,12 +73,7 @@ class LeakyReLU * @param g The calculated gradient. */ template - void Backward(const DataType&& input, DataType&& gy, DataType&& g) - { - DataType derivative; - Deriv(input, derivative); - g = gy % derivative; - } + void Backward(const DataType&& input, DataType&& gy, DataType&& g); //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -110,10 +99,7 @@ class LeakyReLU * Serialize the layer. */ template - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(alpha, "alpha"); - } + void Serialize(Archive& ar, const unsigned int /* version */); private: /** @@ -185,4 +171,7 @@ class LeakyReLU } // namespace ann } // namespace mlpack +// Include implementation. +#include "leaky_relu_impl.hpp" + #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/linear.hpp b/src/mlpack/methods/ann/layer/linear.hpp index 95e651c8c56..01320871248 100644 --- a/src/mlpack/methods/ann/layer/linear.hpp +++ b/src/mlpack/methods/ann/layer/linear.hpp @@ -37,11 +37,15 @@ class Linear { public: //! Create the Linear object. +<<<<<<< HEAD <<<<<<< HEAD Linear(); ======= Linear() {} >>>>>>> Refactor ann layer. +======= + Linear(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the Linear layer object using the specified number of units. @@ -49,6 +53,7 @@ class Linear * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD <<<<<<< HEAD Linear(const size_t inSize, const size_t outSize);; ======= @@ -59,10 +64,14 @@ class Linear weights.set_size(outSize * inSize + outSize, 1); } >>>>>>> Refactor ann layer. +======= + Linear(const size_t inSize, const size_t outSize);; +>>>>>>> Split layer modules into definition and implementation. /* * Reset the layer parameter. */ +<<<<<<< HEAD <<<<<<< HEAD void Reset(); ======= @@ -73,6 +82,9 @@ class Linear outSize, 1, false, false); } >>>>>>> Refactor ann layer. +======= + void Reset(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -82,6 +94,7 @@ class Linear * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -90,6 +103,9 @@ class Linear output = (weight * input) + bias; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -101,6 +117,7 @@ class Linear * @param g The calculated gradient. */ template +<<<<<<< HEAD <<<<<<< HEAD void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, @@ -113,6 +130,11 @@ class Linear g = weight.t() * gy; } >>>>>>> Refactor ann layer. +======= + void Backward(const arma::Mat&& /* input */, + arma::Mat&& gy, + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -124,6 +146,7 @@ class Linear template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& gradient); ======= @@ -134,6 +157,9 @@ class Linear gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& gradient); +>>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -164,6 +190,7 @@ class Linear * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -174,6 +201,9 @@ class Linear ar & data::CreateNVP(outSize, "outSize"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input units. @@ -207,10 +237,16 @@ class Linear } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "linear_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "linear_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp index 4e1db7966a7..17b6aba8a7c 100644 --- a/src/mlpack/methods/ann/layer/linear_no_bias.hpp +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -37,17 +37,22 @@ class LinearNoBias { public: //! Create the LinearNoBias object. +<<<<<<< HEAD <<<<<<< HEAD LinearNoBias(); ======= LinearNoBias() {} >>>>>>> Refactor ann layer. +======= + LinearNoBias(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the LinearNoBias object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD <<<<<<< HEAD LinearNoBias(const size_t inSize, const size_t outSize); ======= @@ -58,10 +63,14 @@ class LinearNoBias weights.set_size(outSize * inSize, 1); } >>>>>>> Refactor ann layer. +======= + LinearNoBias(const size_t inSize, const size_t outSize); +>>>>>>> Split layer modules into definition and implementation. /* * Reset the layer parameter. */ +<<<<<<< HEAD <<<<<<< HEAD void Reset(); ======= @@ -70,6 +79,9 @@ class LinearNoBias weight = arma::mat(weights.memptr(), outSize, inSize, false, false); } >>>>>>> Refactor ann layer. +======= + void Reset(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -79,6 +91,7 @@ class LinearNoBias * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -87,6 +100,9 @@ class LinearNoBias output = weight * input; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -100,6 +116,7 @@ class LinearNoBias template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -108,6 +125,9 @@ class LinearNoBias g = weight.t() * gy; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -119,6 +139,7 @@ class LinearNoBias template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& gradient); ======= @@ -128,6 +149,9 @@ class LinearNoBias error * input.t()); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& gradient); +>>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -158,6 +182,7 @@ class LinearNoBias * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -168,6 +193,9 @@ class LinearNoBias ar & data::CreateNVP(outSize, "outSize"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -199,10 +227,16 @@ class LinearNoBias } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "linear_no_bias_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "linear_no_bias_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp index 20c641fbb39..5c5f9bb72ce 100644 --- a/src/mlpack/methods/ann/layer/log_softmax.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -39,11 +39,15 @@ class LogSoftMax /** * Create the LogSoftmax object. */ +<<<<<<< HEAD <<<<<<< HEAD LogSoftMax(); ======= LogSoftMax() { /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + LogSoftMax(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -53,6 +57,7 @@ class LogSoftMax * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); ======= @@ -89,6 +94,9 @@ class LogSoftMax output = input - (maxInput + std::log(arma::accu(output))); } >>>>>>> Refactor ann layer. +======= + void Forward(const InputType&& input, OutputType&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -102,6 +110,7 @@ class LogSoftMax template void Backward(const arma::Mat&& input, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -110,6 +119,9 @@ class LogSoftMax g = gy - arma::exp(input) * arma::accu(gy); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -127,14 +139,20 @@ class LogSoftMax InputDataType& Delta() { return delta; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. OutputDataType delta; @@ -147,14 +165,20 @@ class LogSoftMax }; // class LogSoftmax <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "log_softmax_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp index 8c7c628a704..07d39161909 100644 --- a/src/mlpack/methods/ann/layer/lookup.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -42,6 +42,7 @@ class Lookup * @param inSize The number of input units. * @param outSize The number of output units. */ +<<<<<<< HEAD <<<<<<< HEAD Lookup(const size_t inSize, const size_t outSize); ======= @@ -52,6 +53,9 @@ class Lookup weights.set_size(outSize, inSize); } >>>>>>> Refactor ann layer. +======= + Lookup(const size_t inSize, const size_t outSize); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -61,6 +65,7 @@ class Lookup * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -69,6 +74,9 @@ class Lookup output = weights.cols(arma::conv_to::from(input) - 1); } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -82,6 +90,7 @@ class Lookup template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -90,6 +99,9 @@ class Lookup g = gy; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -101,6 +113,7 @@ class Lookup template void Gradient(const arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& gradient); ======= @@ -110,6 +123,9 @@ class Lookup gradient.cols(arma::conv_to::from(input) - 1) = error; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& gradient); +>>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -140,6 +156,7 @@ class Lookup * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -150,6 +167,9 @@ class Lookup ar & data::CreateNVP(outSize, "outSize"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -178,10 +198,16 @@ class Lookup } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "lookup_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "lookup_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp index bdecd1ac9f0..bc976105973 100644 --- a/src/mlpack/methods/ann/layer/lstm.hpp +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -44,11 +44,15 @@ class LSTM { public: //! Create the LSTM object. +<<<<<<< HEAD <<<<<<< HEAD LSTM(); ======= LSTM() { /* Nothing to do here */ } >>>>>>> Refactor ann layer. +======= + LSTM(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the LSTM layer object using the specified parameters. @@ -57,6 +61,7 @@ class LSTM * @param outSize The number of output units. * @param rho Maximum number of steps to backpropagate through time (BPTT). */ +<<<<<<< HEAD <<<<<<< HEAD LSTM(const size_t inSize, const size_t outSize, const size_t rho); ======= @@ -97,6 +102,9 @@ class LSTM cellActivationError = arma::zeros(outSize, 1); } >>>>>>> Refactor ann layer. +======= + LSTM(const size_t inSize, const size_t outSize, const size_t rho); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -106,6 +114,7 @@ class LSTM * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -216,6 +225,9 @@ class LSTM } } >>>>>>> Refactor ann layer. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -229,6 +241,7 @@ class LSTM template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -348,6 +361,9 @@ class LSTM g = boost::apply_visitor(deltaVisitor, input2GateModule); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -359,6 +375,7 @@ class LSTM template void Gradient(arma::Mat&& input, arma::Mat&& /* error */, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -386,6 +403,9 @@ class LSTM } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. //! The value of the deterministic parameter. bool Deterministic() const { return deterministic; } @@ -429,6 +449,7 @@ class LSTM * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -440,6 +461,9 @@ class LSTM ar & data::CreateNVP(rho, "rho"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -537,10 +561,16 @@ class LSTM } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "lstm_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "lstm_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp index 459b438c582..7d509a568cb 100644 --- a/src/mlpack/methods/ann/layer/max_pooling.hpp +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -53,6 +53,7 @@ template < class MaxPooling { public: +<<<<<<< HEAD <<<<<<< HEAD //! Create the MaxPooling object. MaxPooling(); @@ -63,6 +64,10 @@ class MaxPooling /* Nothing to do here */ } >>>>>>> Refactor ann layer. +======= + //! Create the MaxPooling object. + MaxPooling(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the MaxPooling object using the specified number of units. @@ -75,10 +80,14 @@ class MaxPooling */ MaxPooling(const size_t kW, <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); +<<<<<<< HEAD ======= const size_t kH, const size_t dW = 1, @@ -100,6 +109,8 @@ class MaxPooling /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -109,6 +120,7 @@ class MaxPooling * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -170,6 +182,9 @@ class MaxPooling outSize = slices; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -183,6 +198,7 @@ class MaxPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -205,6 +221,9 @@ class MaxPooling g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -250,6 +269,7 @@ class MaxPooling * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -261,6 +281,9 @@ class MaxPooling ar & data::CreateNVP(dH, "dH"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -392,6 +415,7 @@ class MaxPooling std::vector poolingIndices; }; // class MaxPooling +<<<<<<< HEAD <<<<<<< HEAD } // namespace ann } // namespace mlpack @@ -407,3 +431,12 @@ class MaxPooling #endif >>>>>>> Refactor ann layer. +======= +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "max_pooling_impl.hpp" + +#endif +>>>>>>> Split layer modules into definition and implementation. diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp index bc465bacf56..13ee58e3687 100644 --- a/src/mlpack/methods/ann/layer/mean_pooling.hpp +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -34,6 +34,7 @@ class MeanPooling { public: //! Create the MeanPooling object. +<<<<<<< HEAD <<<<<<< HEAD MeanPooling(); ======= @@ -42,6 +43,9 @@ class MeanPooling /* Nothing to do here */ } >>>>>>> Refactor ann layer. +======= + MeanPooling(); +>>>>>>> Split layer modules into definition and implementation. /** * Create the MeanPooling object using the specified number of units. @@ -53,10 +57,14 @@ class MeanPooling */ MeanPooling(const size_t kW, <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); +<<<<<<< HEAD ======= const size_t kH, const size_t dW = 1, @@ -79,6 +87,8 @@ class MeanPooling /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -88,6 +98,7 @@ class MeanPooling * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -127,6 +138,9 @@ class MeanPooling outSize = slices; } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -140,6 +154,7 @@ class MeanPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -159,6 +174,9 @@ class MeanPooling g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -204,6 +222,7 @@ class MeanPooling * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -215,6 +234,9 @@ class MeanPooling ar & data::CreateNVP(dH, "dH"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: @@ -343,10 +365,16 @@ class MeanPooling } // namespace mlpack <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. // Include implementation. #include "mean_pooling_impl.hpp" #endif +<<<<<<< HEAD ======= #endif >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp index 5bd8a17d37f..b98bbd8db67 100644 --- a/src/mlpack/methods/ann/layer/mean_squared_error.hpp +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -2,11 +2,15 @@ * @file mean_squared_error.hpp * @author Marcus Edel * +<<<<<<< HEAD <<<<<<< HEAD * Definition of the mean squared error performance function. ======= * Definition and implementation of the mean squared error performance function. >>>>>>> Refactor ann layer. +======= + * Definition of the mean squared error performance function. +>>>>>>> Split layer modules into definition and implementation. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -41,11 +45,15 @@ class MeanSquaredError /** * Create the MeanSquaredError object. */ +<<<<<<< HEAD <<<<<<< HEAD MeanSquaredError(); ======= MeanSquaredError() { /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + MeanSquaredError(); +>>>>>>> Split layer modules into definition and implementation. /* * Computes the mean squared error function. @@ -54,6 +62,7 @@ class MeanSquaredError * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD double Forward(const arma::Mat&& input, const arma::Mat&& target); ======= @@ -63,6 +72,9 @@ class MeanSquaredError } >>>>>>> Refactor ann layer. +======= + double Forward(const arma::Mat&& input, const arma::Mat&& target); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. * @@ -73,6 +85,7 @@ class MeanSquaredError template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& output); ======= @@ -81,6 +94,9 @@ class MeanSquaredError output = (input - target); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -98,14 +114,20 @@ class MeanSquaredError OutputDataType& Delta() { return delta; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. OutputDataType delta; @@ -118,14 +140,20 @@ class MeanSquaredError }; // class MeanSquaredError <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "mean_squared_error_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp index 338d6adff7b..e4a9e83c636 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -32,6 +32,7 @@ class MultiplyConstant /** * Create the MultiplyConstant object. */ +<<<<<<< HEAD <<<<<<< HEAD MultiplyConstant(const double scalar); ======= @@ -40,6 +41,9 @@ class MultiplyConstant // Nothing to do here. } >>>>>>> Refactor ann layer. +======= + MultiplyConstant(const double scalar); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -49,6 +53,7 @@ class MultiplyConstant * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const InputType&& input, OutputType&& output); ======= @@ -57,6 +62,9 @@ class MultiplyConstant output = input * scalar; } >>>>>>> Refactor ann layer. +======= + void Forward(const InputType&& input, OutputType&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The backward pass @@ -67,6 +75,7 @@ class MultiplyConstant * @param g The calculated gradient. */ template +<<<<<<< HEAD <<<<<<< HEAD void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); ======= @@ -75,6 +84,9 @@ class MultiplyConstant g = gy * scalar; } >>>>>>> Refactor ann layer. +======= + void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -95,6 +107,7 @@ class MultiplyConstant * Serialize the layer. */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -103,6 +116,9 @@ class MultiplyConstant ar & data::CreateNVP(scalar, "scalar"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored constant scalar value. @@ -119,14 +135,20 @@ class MultiplyConstant }; // class MultiplyConstant <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "multiply_constant_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index 9bc3fb4a86d..bb662fde8aa 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -38,11 +38,15 @@ class NegativeLogLikelihood /** * Create the NegativeLogLikelihoodLayer object. */ +<<<<<<< HEAD <<<<<<< HEAD NegativeLogLikelihood(); ======= NegativeLogLikelihood() { /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= + NegativeLogLikelihood(); +>>>>>>> Split layer modules into definition and implementation. /* * Computes the Negative log likelihood. @@ -51,6 +55,7 @@ class NegativeLogLikelihood * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD double Forward(const arma::Mat&& input, arma::Mat&& target); ======= @@ -70,6 +75,9 @@ class NegativeLogLikelihood return output; } >>>>>>> Refactor ann layer. +======= + double Forward(const arma::Mat&& input, arma::Mat&& target); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log @@ -85,6 +93,7 @@ class NegativeLogLikelihood template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& output); ======= @@ -101,6 +110,9 @@ class NegativeLogLikelihood } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -140,6 +152,12 @@ class NegativeLogLikelihood ======= >>>>>>> Refactor ann layer. +<<<<<<< HEAD +======= +} // namespace ann +} // namespace mlpack + +>>>>>>> Split layer modules into definition and implementation. // Include implementation. #include "negative_log_likelihood_impl.hpp" diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp index 938a90ed122..2709ab36eb7 100644 --- a/src/mlpack/methods/ann/layer/recurrent.hpp +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -56,6 +56,7 @@ class Recurrent const InputModuleType& input, const FeedbackModuleType& feedback, const TransferModuleType& transfer, +<<<<<<< HEAD <<<<<<< HEAD const size_t rho); ======= @@ -95,6 +96,9 @@ class Recurrent network.push_back(recurrentModule); } >>>>>>> Refactor ann layer. +======= + const size_t rho); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -104,6 +108,7 @@ class Recurrent * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -150,6 +155,9 @@ class Recurrent } } >>>>>>> Refactor ann layer. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -163,6 +171,7 @@ class Recurrent template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -205,6 +214,9 @@ class Recurrent backwardStep++; } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -216,6 +228,7 @@ class Recurrent template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -252,6 +265,9 @@ class Recurrent } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -290,6 +306,7 @@ class Recurrent * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -298,6 +315,9 @@ class Recurrent ar & data::CreateNVP(rho, "rho"); } >>>>>>> Refactor ann layer. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored start module. @@ -373,10 +393,16 @@ class Recurrent } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "recurrent_impl.hpp" ======= >>>>>>> Refactor ann layer. +======= +// Include implementation. +#include "recurrent_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp index 43fe7f5ba26..7bd506328d8 100644 --- a/src/mlpack/methods/ann/layer/recurrent_attention.hpp +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -62,6 +62,7 @@ class RecurrentAttention RecurrentAttention(const size_t outSize, const RNNModuleType& rnn, const ActionModuleType& action, +<<<<<<< HEAD <<<<<<< HEAD const size_t rho); ======= @@ -78,6 +79,9 @@ class RecurrentAttention network.push_back(actionModule); } >>>>>>> Refactor neural visual attention modules. +======= + const size_t rho); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -87,6 +91,7 @@ class RecurrentAttention * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -142,6 +147,9 @@ class RecurrentAttention backwardStep = 0; } >>>>>>> Refactor neural visual attention modules. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -155,6 +163,7 @@ class RecurrentAttention template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -234,6 +243,9 @@ class RecurrentAttention } } >>>>>>> Refactor neural visual attention modules. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -245,6 +257,7 @@ class RecurrentAttention template void Gradient(arma::Mat&& /* input */, arma::Mat&& /* error */, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -257,6 +270,9 @@ class RecurrentAttention std::move(attentionGradient), offset), actionModule); } >>>>>>> Refactor neural visual attention modules. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -295,6 +311,7 @@ class RecurrentAttention * Serialize the layer */ template +<<<<<<< HEAD <<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); ======= @@ -306,6 +323,9 @@ class RecurrentAttention ar & data::CreateNVP(backwardStep, "backwardStep"); } >>>>>>> Refactor neural visual attention modules. +======= + void Serialize(Archive& ar, const unsigned int /* version */); +>>>>>>> Split layer modules into definition and implementation. private: //! Calculate the gradient of the attention module. @@ -425,10 +445,16 @@ class RecurrentAttention } // namespace ann } // namespace mlpack +<<<<<<< HEAD <<<<<<< HEAD // Include implementation. #include "recurrent_attention_impl.hpp" ======= >>>>>>> Refactor neural visual attention modules. +======= +// Include implementation. +#include "recurrent_attention_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp index 0442ada4a4c..0ec6315a331 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -34,6 +34,7 @@ class ReinforceNormal * * @param stdev Standard deviation used during the forward and backward pass. */ +<<<<<<< HEAD <<<<<<< HEAD ReinforceNormal(const double stdev); ======= @@ -42,6 +43,9 @@ class ReinforceNormal // Nothing to do here. } >>>>>>> Refactor neural visual attention modules. +======= + ReinforceNormal(const double stdev); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -51,6 +55,7 @@ class ReinforceNormal * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -71,6 +76,9 @@ class ReinforceNormal } } >>>>>>> Refactor neural visual attention modules. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -82,6 +90,7 @@ class ReinforceNormal * @param g The calculated gradient. */ template +<<<<<<< HEAD <<<<<<< HEAD void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); ======= @@ -97,6 +106,9 @@ class ReinforceNormal } >>>>>>> Refactor neural visual attention modules. +======= + void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -124,14 +136,20 @@ class ReinforceNormal double& Reward() { return reward; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -156,14 +174,20 @@ class ReinforceNormal }; // class ReinforceNormal <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "reinforce_normal_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index debb96d291d..4edac84578e 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -2,11 +2,15 @@ * @file select.hpp * @author Marcus Edel * +<<<<<<< HEAD <<<<<<< HEAD * Definition of the Select module. ======= * Definition and implementation of the Select module. >>>>>>> Refactor ann layer. +======= + * Definition of the Select module. +>>>>>>> Split layer modules into definition and implementation. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -45,6 +49,7 @@ class Select * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0); +<<<<<<< HEAD ======= * @param index The number of elements that should be used. ======= @@ -58,6 +63,8 @@ class Select /* Nothing to do here. */ } >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -67,6 +74,7 @@ class Select * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); ======= @@ -82,6 +90,9 @@ class Select } } >>>>>>> Refactor ann layer. +======= + void Forward(const arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -95,6 +106,7 @@ class Select template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -110,6 +122,9 @@ class Select } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -127,14 +142,20 @@ class Select OutputDataType& Delta() { return delta; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored column index. size_t index; @@ -153,14 +174,20 @@ class Select }; // class Select <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "select_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp index 8164d623414..e5b81519138 100644 --- a/src/mlpack/methods/ann/layer/sequential.hpp +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -47,6 +47,7 @@ class Sequential * * @param model Expose the all network modules. */ +<<<<<<< HEAD <<<<<<< HEAD Sequential(const bool model = true); @@ -70,6 +71,12 @@ class Sequential } } >>>>>>> Refactor ann layer. +======= + Sequential(const bool model = true); + + //! Destroy the Sequential object. + ~Sequential(); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -79,6 +86,7 @@ class Sequential * @param output Resulting output activation. */ template +<<<<<<< HEAD <<<<<<< HEAD void Forward(arma::Mat&& input, arma::Mat&& output); ======= @@ -141,6 +149,9 @@ class Sequential output = boost::apply_visitor(outputParameterVisitor, network.back()); } >>>>>>> Refactor ann layer. +======= + void Forward(arma::Mat&& input, arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -154,6 +165,7 @@ class Sequential template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& g); ======= @@ -176,6 +188,9 @@ class Sequential g = boost::apply_visitor(deltaVisitor, network.front()); } >>>>>>> Refactor ann layer. +======= + arma::Mat&& g); +>>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -187,6 +202,7 @@ class Sequential template void Gradient(arma::Mat&& input, arma::Mat&& error, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& /* gradient */); ======= @@ -203,6 +219,9 @@ class Sequential } } >>>>>>> Refactor ann layer. +======= + arma::Mat&& /* gradient */); +>>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -255,14 +274,20 @@ class Sequential arma::mat& Gradient() { return gradient; } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor ann layer. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -313,6 +338,7 @@ class Sequential size_t height; }; // class Sequential +<<<<<<< HEAD <<<<<<< HEAD } // namespace ann } // namespace mlpack @@ -326,4 +352,12 @@ class Sequential } // namespace mlpack >>>>>>> Refactor ann layer. +======= +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "sequential_impl.hpp" + +>>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp index a057b80e4a2..5aa9f73d382 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -40,6 +40,7 @@ class VRClassReward * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ +<<<<<<< HEAD <<<<<<< HEAD VRClassReward(const double scale = 1, const bool sizeAverage = true); ======= @@ -50,6 +51,9 @@ class VRClassReward // Nothing to do here. } >>>>>>> Refactor neural visual attention modules. +======= + VRClassReward(const double scale = 1, const bool sizeAverage = true); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,6 +64,7 @@ class VRClassReward * between 1 and the number of classes. */ template +<<<<<<< HEAD <<<<<<< HEAD double Forward(const arma::Mat&& input, const arma::Mat&& target); ======= @@ -93,6 +98,9 @@ class VRClassReward return output - reward; } >>>>>>> Refactor neural visual attention modules. +======= + double Forward(const arma::Mat&& input, const arma::Mat&& target); +>>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log @@ -108,6 +116,7 @@ class VRClassReward template void Backward(const arma::Mat&& input, const arma::Mat&& target, +<<<<<<< HEAD <<<<<<< HEAD arma::Mat&& output); ======= @@ -135,6 +144,9 @@ class VRClassReward boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); } >>>>>>> Refactor neural visual attention modules. +======= + arma::Mat&& output); +>>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -172,14 +184,20 @@ class VRClassReward void Add(LayerTypes layer) { network.push_back(layer); } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); +<<<<<<< HEAD ======= >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored value to scale the reward. const double scale; @@ -207,14 +225,20 @@ class VRClassReward }; // class VRClassReward <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "vr_class_reward_impl.hpp" +<<<<<<< HEAD ======= }; // namespace ann }; // namespace mlpack >>>>>>> Refactor neural visual attention modules. +======= +>>>>>>> Split layer modules into definition and implementation. #endif From 552e74c3393d1c29447e37d0fb8386556230514f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 15 Dec 2016 19:52:26 +0100 Subject: [PATCH 73/82] Remove merge relics. --- .../convolution_rules/naive_convolution.hpp | 8 - src/mlpack/methods/ann/layer/add.hpp | 64 ---- src/mlpack/methods/ann/layer/add_merge.hpp | 58 --- src/mlpack/methods/ann/layer/concat.hpp | 164 --------- .../methods/ann/layer/concat_performance.hpp | 83 ----- src/mlpack/methods/ann/layer/constant.hpp | 65 ---- src/mlpack/methods/ann/layer/convolution.hpp | 252 ------------- src/mlpack/methods/ann/layer/dropconnect.hpp | 125 ------- src/mlpack/methods/ann/layer/dropout.hpp | 78 ---- src/mlpack/methods/ann/layer/glimpse.hpp | 215 ----------- src/mlpack/methods/ann/layer/join.hpp | 56 --- src/mlpack/methods/ann/layer/linear.hpp | 95 ----- .../methods/ann/layer/linear_no_bias.hpp | 88 ----- src/mlpack/methods/ann/layer/log_softmax.hpp | 78 ---- src/mlpack/methods/ann/layer/lookup.hpp | 69 ---- src/mlpack/methods/ann/layer/lstm.hpp | 339 ------------------ src/mlpack/methods/ann/layer/max_pooling.hpp | 162 --------- src/mlpack/methods/ann/layer/mean_pooling.hpp | 128 ------- .../methods/ann/layer/mean_squared_error.hpp | 59 --- .../methods/ann/layer/multiply_constant.hpp | 55 --- .../ann/layer/negative_log_likelihood.hpp | 58 --- src/mlpack/methods/ann/layer/recurrent.hpp | 195 ---------- .../methods/ann/layer/recurrent_attention.hpp | 197 ---------- .../methods/ann/layer/reinforce_normal.hpp | 72 ---- src/mlpack/methods/ann/layer/select.hpp | 82 ----- src/mlpack/methods/ann/layer/sequential.hpp | 160 --------- .../methods/ann/layer/vr_class_reward.hpp | 99 ----- src/mlpack/tests/rmsprop_test.cpp | 53 --- 28 files changed, 3157 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp index c1f49ab1494..c90574293ba 100644 --- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp @@ -104,15 +104,7 @@ class NaiveConvolution filter.n_cols - 1 + input.n_cols - 1) = input; NaiveConvolution::Convolution(inputPadded, filter, -<<<<<<< HEAD -<<<<<<< HEAD output, 1, 1); -======= - output, dW, dH); ->>>>>>> Use the stride parameter inside the convolution function. -======= - output, 1, 1); ->>>>>>> Remove stride paramater from svd and fft convolution rule. } /* diff --git a/src/mlpack/methods/ann/layer/add.hpp b/src/mlpack/methods/ann/layer/add.hpp index 88167abc244..1afb1121300 100644 --- a/src/mlpack/methods/ann/layer/add.hpp +++ b/src/mlpack/methods/ann/layer/add.hpp @@ -39,18 +39,7 @@ class Add * * @param outSize The number of output units. */ -<<<<<<< HEAD -<<<<<<< HEAD Add(const size_t outSize); -======= - Add(const size_t outSize) : outSize(outSize) - { - weights.set_size(outSize, 1); - } ->>>>>>> Refactor ann layer. -======= - Add(const size_t outSize); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -60,18 +49,7 @@ class Add * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = input + weights; - } ->>>>>>> Refactor ann layer. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -85,18 +63,7 @@ class Add template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - g = gy; - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -108,18 +75,7 @@ class Add template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& gradient); -======= - arma::Mat&& gradient) - { - gradient = error; - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& gradient); ->>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -150,18 +106,7 @@ class Add * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of output units. @@ -186,16 +131,7 @@ class Add } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "add_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "add_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/add_merge.hpp b/src/mlpack/methods/ann/layer/add_merge.hpp index 94ee6d36814..222c3ef1a2f 100644 --- a/src/mlpack/methods/ann/layer/add_merge.hpp +++ b/src/mlpack/methods/ann/layer/add_merge.hpp @@ -38,18 +38,7 @@ class AddMerge { public: //! Create the AddMerge object. -<<<<<<< HEAD -<<<<<<< HEAD AddMerge(); -======= - AddMerge() - { - // Nothing to do here. - } ->>>>>>> Refactor ann layer. -======= - AddMerge(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -59,23 +48,7 @@ class AddMerge * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const InputType&& /* input */, OutputType&& output); -======= - void Forward(const InputType&& /* input */, OutputType&& output) - { - output = boost::apply_visitor(outputParameterVisitor, network.front()); - - for (size_t i = 1; i < network.size(); ++i) - { - output += boost::apply_visitor(outputParameterVisitor, network[i]); - } - } ->>>>>>> Refactor ann layer. -======= void Forward(const InputType&& /* input */, OutputType&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -89,18 +62,7 @@ class AddMerge template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - g = gy; - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -144,18 +106,7 @@ class AddMerge * Serialize the layer. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(network, "network"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: std::vector network; @@ -182,16 +133,7 @@ class AddMerge } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "add_merge_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "add_merge_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/concat.hpp b/src/mlpack/methods/ann/layer/concat.hpp index d92e11c8f10..808309f7122 100644 --- a/src/mlpack/methods/ann/layer/concat.hpp +++ b/src/mlpack/methods/ann/layer/concat.hpp @@ -45,20 +45,7 @@ class Concat * @param model Expose all network modules. * @param same Merge the error in the backward pass. */ -<<<<<<< HEAD -<<<<<<< HEAD Concat(const bool model = true, const bool same = true); -======= - Concat(const bool model = true, const bool same = true) : - model(model), - same(same) - { - parameters.set_size(0, 0); - } ->>>>>>> Refactor ann layer. -======= - Concat(const bool model = true, const bool same = true); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -68,50 +55,7 @@ class Concat * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - size_t outSize = 0; - - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (boost::apply_visitor( - outputParameterVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - } - } - - output = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - if (elements < outSize) - { - output.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(outputParameterVisitor, network[i])); - } - else - { - output.col(i) = arma::vectorise(boost::apply_visitor( - outputParameterVisitor, network[i])); - } - } - } ->>>>>>> Refactor ann layer. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -125,75 +69,7 @@ class Concat template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - size_t outSize = 0; - size_t elements = 0; - - for (size_t i = 0, j = 0; i < network.size(); ++i, j += elements) - { - elements = boost::apply_visitor(outputParameterVisitor, - network[i]).n_elem; - - arma::mat delta; - if (gy.n_cols == 1) - { - delta = gy.submat(j, 0, j + elements - 1, 0); - } - else - { - delta = gy.submat(0, i, elements - 1, i); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(delta), std::move( - boost::apply_visitor(deltaVisitor, network[i]))), network[i]); - - if (boost::apply_visitor(deltaVisitor, network[i]).n_elem > outSize) - { - outSize = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - } - - if (same) - { - if (i == 0) - { - g = std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g += std::move(boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - - if (!same) - { - g = arma::zeros(outSize, network.size()); - for (size_t i = 0; i < network.size(); ++i) - { - size_t elements = boost::apply_visitor(deltaVisitor, network[i]).n_elem; - if (elements < outSize) - { - g.submat(0, i, elements - 1, i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - else - { - g.col(i) = arma::vectorise( - boost::apply_visitor(deltaVisitor, network[i])); - } - } - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -205,22 +81,7 @@ class Concat template void Gradient(arma::Mat&& /* input */, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - for (size_t i = 0; i < network.size(); ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i])), std::move(error)), network[i]); - } - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -272,21 +133,12 @@ class Concat //! Modify the gradient. arma::mat& Gradient() { return gradient; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -325,26 +177,10 @@ class Concat arma::mat gradient; }; // class Concat -<<<<<<< HEAD -<<<<<<< HEAD -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "concat_impl.hpp" - -======= - -} // namespace ann -} // namespace mlpack - ->>>>>>> Refactor ann layer. -======= } // namespace ann } // namespace mlpack // Include implementation. #include "concat_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/concat_performance.hpp b/src/mlpack/methods/ann/layer/concat_performance.hpp index f663b12791b..39b7e10f126 100644 --- a/src/mlpack/methods/ann/layer/concat_performance.hpp +++ b/src/mlpack/methods/ann/layer/concat_performance.hpp @@ -47,20 +47,7 @@ class ConcatPerformance * @param outputLayer Output layer used to evaluate the network. */ ConcatPerformance(const size_t inSize, -<<<<<<< HEAD -<<<<<<< HEAD OutputLayerType&& outputLayer = OutputLayerType()); -======= - OutputLayerType&& outputLayer = OutputLayerType()) : - inSize(inSize), - outputLayer(std::move(outputLayer)) - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= - OutputLayerType&& outputLayer = OutputLayerType()); ->>>>>>> Split layer modules into definition and implementation. /* * Computes the Negative log likelihood. @@ -69,28 +56,7 @@ class ConcatPerformance * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - double Forward(const arma::Mat&& input, arma::Mat&& target); -======= - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - const size_t elements = input.n_elem / inSize; - - double output = 0; - for (size_t i = 0; i < input.n_elem; i+= elements) - { - arma::mat subInput = input.submat(i, 0, i + elements - 1, 0); - output += outputLayer.Forward(std::move(subInput), std::move(target)); - } - - return output; - } - ->>>>>>> Refactor ann layer. -======= double Forward(const arma::Mat&& input, arma::Mat&& target); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log * likelihood layer expectes that the input contains log-probabilities for @@ -105,36 +71,7 @@ class ConcatPerformance template void Backward(const arma::Mat&& input, const arma::Mat&& target, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& output); -======= - arma::Mat&& output) - { - const size_t elements = input.n_elem / inSize; - - arma::mat subInput = input.submat(0, 0, elements - 1, 0); - arma::mat subOutput; - - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output = arma::zeros(subOutput.n_elem, inSize); - output.col(0) = subOutput; - - for (size_t i = elements, j = 0; i < input.n_elem; i+= elements, j++) - { - subInput = input.submat(i, 0, i + elements - 1, 0); - outputLayer.Backward(std::move(subInput), std::move(target), - std::move(subOutput)); - - output.col(j) = subOutput; - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -151,21 +88,12 @@ class ConcatPerformance //! Modify the delta. OutputDataType& Delta() { return delta; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of inputs. size_t inSize; @@ -183,21 +111,10 @@ class ConcatPerformance OutputDataType outputParameter; }; // class ConcatPerformance -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "concat_performance_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/constant.hpp b/src/mlpack/methods/ann/layer/constant.hpp index 6390e30ef0b..b24b44aa802 100644 --- a/src/mlpack/methods/ann/layer/constant.hpp +++ b/src/mlpack/methods/ann/layer/constant.hpp @@ -41,21 +41,7 @@ class Constant * @param outSize The number of output units. * @param scalar The constant value used to create the constant output. */ -<<<<<<< HEAD -<<<<<<< HEAD Constant(const size_t outSize, const double scalar); -======= - Constant(const size_t outSize, const double scalar) : - inSize(0), - outSize(outSize) - { - constantOutput = OutputDataType(outSize, 1); - constantOutput.fill(scalar); - } ->>>>>>> Refactor ann layer. -======= - Constant(const size_t outSize, const double scalar); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network. The forward pass fills the @@ -65,23 +51,7 @@ class Constant * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const InputType&& input, OutputType&& output); -======= - void Forward(const InputType&& input, OutputType&& output) - { - if (inSize == 0) - { - inSize = input.n_elem; - } - - output = constantOutput; - } ->>>>>>> Refactor ann layer. -======= void Forward(const InputType&& input, OutputType&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The backward pass of the @@ -92,22 +62,9 @@ class Constant * @param g The calculated gradient. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Backward(const DataType&& /* input */, - DataType&& /* gy */, - DataType&& g); -======= - void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g) - { - g = arma::zeros(inSize, 1); - } ->>>>>>> Refactor ann layer. -======= void Backward(const DataType&& /* input */, DataType&& /* gy */, DataType&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -128,18 +85,7 @@ class Constant * Serialize the layer. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(constantOutput, "constantOutput"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input units. @@ -161,21 +107,10 @@ class Constant OutputDataType outputParameter; }; // class ConstantLayer -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "constant_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/convolution.hpp b/src/mlpack/methods/ann/layer/convolution.hpp index df7affe61e5..a7f647ed226 100644 --- a/src/mlpack/methods/ann/layer/convolution.hpp +++ b/src/mlpack/methods/ann/layer/convolution.hpp @@ -47,18 +47,7 @@ class Convolution { public: //! Create the Convolution object. -<<<<<<< HEAD -<<<<<<< HEAD Convolution(); -======= - Convolution() - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= - Convolution(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the Convolution object using the specified number of input maps, @@ -84,49 +73,12 @@ class Convolution const size_t padW = 0, const size_t padH = 0, const size_t inputWidth = 0, -<<<<<<< HEAD -<<<<<<< HEAD const size_t inputHeight = 0); -======= - const size_t inputHeight = 0) : - inSize(inSize), - outSize(outSize), - kW(kW), - kH(kH), - dW(dW), - dH(dH), - padW(padW), - padH(padH), - inputWidth(inputWidth), - inputHeight(inputHeight), - outputWidth(0), - outputHeight(0) - { - weights.set_size((outSize * inSize * kW * kH) + outSize, 1); - } ->>>>>>> Refactor ann layer. -======= - const size_t inputHeight = 0); ->>>>>>> Split layer modules into definition and implementation. /* * Set the weight and bias term. */ -<<<<<<< HEAD -<<<<<<< HEAD - void Reset(); -======= - void Reset() - { - weight = arma::cube(weights.memptr(), kW, kH, - outSize * inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } ->>>>>>> Refactor ann layer. -======= void Reset(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -136,56 +88,7 @@ class Convolution * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, inSize); - - if (padW != 0 || padH != 0) - { - Pad(inputTemp, padW, padH, inputPaddedTemp); - } - - size_t wConv = ConvOutSize(inputWidth, kW, dW, padW); - size_t hConv = ConvOutSize(inputHeight, kH, dH, padH); - - outputTemp = arma::zeros >(wConv, hConv, outSize); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat convOutput; - - if (padW != 0 || padH != 0) - { - ForwardConvolutionRule::Convolution(inputPaddedTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - else - { - ForwardConvolutionRule::Convolution(inputTemp.slice(inMap), - weight.slice(outMapIdx), convOutput, dW, dH); - } - - outputTemp.slice(outMap) += convOutput; - } - - outputTemp.slice(outMap) += bias(outMap); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } ->>>>>>> Refactor ann layer. -======= - void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -199,48 +102,7 @@ class Convolution template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), - outputWidth, outputHeight, outSize); - gTemp = arma::zeros >(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0; inMap < inSize; inMap++, outMapIdx++) - { - arma::Mat rotatedFilter; - Rotate180(weight.slice(outMapIdx), rotatedFilter); - - arma::Mat output; - BackwardConvolutionRule::Convolution(mappedError.slice(outMap), - rotatedFilter, output, dW, dH); - - if (padW != 0 || padH != 0) - { - gTemp.slice(inMap) += output.submat(rotatedFilter.n_rows / 2, - rotatedFilter.n_cols / 2, - rotatedFilter.n_rows / 2 + gTemp.n_rows - 1, - rotatedFilter.n_cols / 2 + gTemp.n_cols - 1); - } - else - { - gTemp.slice(inMap) += output; - } - } - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -252,82 +114,7 @@ class Convolution template void Gradient(const arma::Mat&& /* input */, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& gradient); -======= - arma::Mat&& gradient) - { - arma::cube mappedError; - if (padW != 0 && padH != 0) - { - mappedError = arma::cube(error.memptr(), outputWidth / padW, - outputHeight / padH, outSize); - } - else - { - mappedError = arma::cube(error.memptr(), outputWidth, - outputHeight, outSize); - } - - gradientTemp = arma::zeros >(weight.n_rows, weight.n_cols, - weight.n_slices); - - for (size_t outMap = 0, outMapIdx = 0; outMap < outSize; outMap++) - { - for (size_t inMap = 0, s = outMap; inMap < inSize; inMap++, outMapIdx++, - s += outSize) - { - arma::Cube inputSlices; - if (padW != 0 || padH != 0) - { - inputSlices = inputPaddedTemp.slices(inMap, inMap); - } - else - { - inputSlices = inputTemp.slices(inMap, inMap); - } - - arma::Cube deltaSlices = mappedError.slices(outMap, outMap); - - arma::Cube output; - GradientConvolutionRule::Convolution(inputSlices, deltaSlices, - output, dW, dH); - - if ((padW != 0 || padH != 0) && - (gradientTemp.n_rows < output.n_rows && - gradientTemp.n_cols < output.n_cols)) - { - for (size_t i = 0; i < output.n_slices; i++) - { - arma::mat subOutput = output.slice(i); - - gradientTemp.slice(s) += subOutput.submat(subOutput.n_rows / 2, - subOutput.n_cols / 2, - subOutput.n_rows / 2 + gradientTemp.n_rows - 1, - subOutput.n_cols / 2 + gradientTemp.n_cols - 1); - } - } - else - { - for (size_t i = 0; i < output.n_slices; i++) - { - gradientTemp.slice(s) += output.slice(i); - } - } - } - - gradient.submat(weight.n_elem + outMap, 0, - weight.n_elem + outMap, 0) = arma::accu(mappedError.slices( - outMap, outMap)); - } - - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& gradient); ->>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -378,30 +165,7 @@ class Convolution * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - ar & data::CreateNVP(padW, "padW"); - ar & data::CreateNVP(padH, "padH"); - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inputWidth, "inputWidth"); - ar & data::CreateNVP(inputHeight, "inputHeight"); - ar & data::CreateNVP(outputWidth, "outputWidth"); - ar & data::CreateNVP(outputHeight, "outputHeight"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -571,26 +335,10 @@ class Convolution OutputDataType outputParameter; }; // class Convolution -<<<<<<< HEAD -<<<<<<< HEAD -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "convolution_impl.hpp" - -======= - -} // namespace ann -} // namespace mlpack - ->>>>>>> Refactor ann layer. -======= } // namespace ann } // namespace mlpack // Include implementation. #include "convolution_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/dropconnect.hpp b/src/mlpack/methods/ann/layer/dropconnect.hpp index cab53be645b..f74c8e6aaac 100644 --- a/src/mlpack/methods/ann/layer/dropconnect.hpp +++ b/src/mlpack/methods/ann/layer/dropconnect.hpp @@ -63,18 +63,7 @@ class DropConnect { public: //! Create the DropConnect object. -<<<<<<< HEAD -<<<<<<< HEAD DropConnect(); -======= - DropConnect() - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= - DropConnect(); ->>>>>>> Split layer modules into definition and implementation. /** * Creates the DropConnect Layer as a Linear Object that takes input size, @@ -86,30 +75,9 @@ class DropConnect */ DropConnect(const size_t inSize, const size_t outSize, -<<<<<<< HEAD -<<<<<<< HEAD - const double ratio = 0.5); - - ~DropConnect(); -======= - const double ratio = 0.5) : - ratio(ratio), - scale(1.0 / (1 - ratio)), - baseLayer(new Linear(inSize, outSize)) - { - network.push_back(baseLayer); - } - - ~DropConnect() - { - boost::apply_visitor(DeleteVisitor(), baseLayer); - } ->>>>>>> Refactor ann layer. -======= const double ratio = 0.5); ~DropConnect(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the DropConnect layer. @@ -118,50 +86,7 @@ class DropConnect * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // The DropConnect mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - } - else - { - // Save weights for denoising. - boost::apply_visitor(ParametersVisitor(std::move(denoise)), baseLayer); - - // Scale with input / (1 - ratio) and set values to zero with - // probability ratio. - mask = arma::randu >(denoise.n_rows, denoise.n_cols); - mask.transform([&](double val) { return (val > ratio); }); - - boost::apply_visitor(ParametersSetVisitor(std::move(denoise % mask)), - baseLayer); - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(output) - ), - baseLayer); - - output = output * scale; - } - } ->>>>>>> Refactor ann layer. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the DropConnect layer. @@ -173,24 +98,7 @@ class DropConnect template void Backward(arma::Mat&& input, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - boost::apply_visitor( - BackwardVisitor( - std::move(input), - std::move(gy), - std::move(g) - ), - baseLayer); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /** * Calculate the gradient using the output delta and the input activation. @@ -202,22 +110,7 @@ class DropConnect template void Gradient(arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - baseLayer); - - // Denoise the weights. - boost::apply_visitor(ParametersSetVisitor(std::move(denoise)), baseLayer); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -263,21 +156,12 @@ class DropConnect scale = 1.0 / (1.0 - ratio); } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! The probability of setting a value to zero. double ratio; @@ -319,16 +203,7 @@ class DropConnect } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "dropconnect_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "dropconnect_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/dropout.hpp b/src/mlpack/methods/ann/layer/dropout.hpp index dda2b8192ee..4371f21dad3 100644 --- a/src/mlpack/methods/ann/layer/dropout.hpp +++ b/src/mlpack/methods/ann/layer/dropout.hpp @@ -61,22 +61,7 @@ class Dropout * @param ratio The probability of setting a value to zero. * @param rescale If true the input is rescaled when deterministic is False. */ -<<<<<<< HEAD -<<<<<<< HEAD Dropout(const double ratio = 0.5, const bool rescale = true); -======= - Dropout(const double ratio = 0.5, - const bool rescale = true) : - ratio(ratio), - scale(1.0 / (1.0 - ratio)), - rescale(rescale) - { - // Nothing to do here. - } ->>>>>>> Refactor ann layer. -======= - Dropout(const double ratio = 0.5, const bool rescale = true); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the dropout layer. @@ -85,38 +70,7 @@ class Dropout * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - // The dropout mask will not be multiplied in the deterministic mode - // (during testing). - if (deterministic) - { - if (!rescale) - { - output = input; - } - else - { - output = input * scale; - } - } - else - { - // Scale with input / (1 - ratio) and set values to zero with probability - // ratio. - mask = arma::randu >(input.n_rows, input.n_cols); - mask.transform( [&](double val) { return (val > ratio); } ); - output = input % mask * scale; - } - } ->>>>>>> Refactor ann layer. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the dropout layer. @@ -128,18 +82,7 @@ class Dropout template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - g = gy % mask * scale; - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -180,19 +123,7 @@ class Dropout * Serialize the layer. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(ratio, "ratio"); - ar & data::CreateNVP(rescale, "rescale"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. @@ -223,16 +154,7 @@ class Dropout } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "dropout_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "dropout_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/glimpse.hpp b/src/mlpack/methods/ann/layer/glimpse.hpp index c86215e3a28..e007fae30b9 100644 --- a/src/mlpack/methods/ann/layer/glimpse.hpp +++ b/src/mlpack/methods/ann/layer/glimpse.hpp @@ -100,24 +100,7 @@ class Glimpse const size_t depth = 3, const size_t scale = 2, const size_t inputWidth = 0, -<<<<<<< HEAD -<<<<<<< HEAD const size_t inputHeight = 0); -======= - const size_t inputHeight = 0) : - inSize(inSize), - size(size), - depth(depth), - scale(scale), - inputWidth(inputWidth), - inputHeight(inputHeight) - { - // Nothing to do here. - } ->>>>>>> Refactor neural visual attention modules. -======= - const size_t inputHeight = 0); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of the glimpse layer. @@ -126,94 +109,7 @@ class Glimpse * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - inputTemp = arma::cube(input.colptr(0), inputWidth, inputHeight, inSize); - outputTemp = arma::Cube(size, size, depth * inputTemp.n_slices); - - location = input.submat(0, 1, 1, 1); - - if (!deterministic) - { - locationParameter.push_back(location); - } - - inputDepth = inputTemp.n_slices / inSize; - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + padSize * 2, - inputTemp.n_slices / inSize); - - inputPadded.tube(padSize, padSize, padSize + inputTemp.n_rows - 1, - padSize + inputTemp.n_cols - 1) = inputTemp.subcube(0, 0, - inputIdx * inputDepth, inputTemp.n_rows - 1, inputTemp.n_cols - 1, - (inputIdx + 1) * inputDepth - 1); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - outputTemp.slice(j) = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < outputTemp.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingInput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Pooling(glimpseSize / size, poolingInput, outputTemp.slice(j)); - } - else - { - ReSampling(poolingInput, outputTemp.slice(j)); - } - } - } - } - } - - for (size_t i = 0; i < outputTemp.n_slices; ++i) - { - outputTemp.slice(i) = arma::trans(outputTemp.slice(i)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - } ->>>>>>> Refactor neural visual attention modules. -======= - void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of the glimpse layer. @@ -225,98 +121,7 @@ class Glimpse template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - // Generate a cube using the backpropagated error matrix. - arma::Cube mappedError = arma::zeros(outputWidth, - outputHeight, 1); - - location = locationParameter.back(); - locationParameter.pop_back(); - - for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++) - { - for (size_t i = 0; i < gy.n_cols; i++) - { - mappedError.slice(s + i) = arma::Mat(gy.memptr(), - outputWidth, outputHeight); - } - } - - gTemp = arma::zeros(inputTemp.n_rows, inputTemp.n_cols, - inputTemp.n_slices); - - for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++) - { - for (size_t depthIdx = 0, glimpseSize = size; - depthIdx < depth; depthIdx++, glimpseSize *= scale) - { - size_t padSize = std::floor((glimpseSize - 1) / 2); - - arma::Cube inputPadded = arma::zeros >( - inputTemp.n_rows + padSize * 2, inputTemp.n_cols + - padSize * 2, inputTemp.n_slices / inSize); - - size_t h = inputPadded.n_rows - glimpseSize; - size_t w = inputPadded.n_cols - glimpseSize; - - size_t x = std::min(h, (size_t) std::max(0.0, - (location(0, inputIdx) + 1) / 2.0 * h)); - size_t y = std::min(w, (size_t) std::max(0.0, - (location(1, inputIdx) + 1) / 2.0 * w)); - - if (depthIdx == 0) - { - for (size_t j = (inputIdx + depthIdx), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = mappedError.slice(j); - } - } - else - { - for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0; - j < mappedError.n_slices; j += (inSize * depth), paddedSlice++) - { - arma::Mat poolingOutput = inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice); - - if (scale == 2) - { - Unpooling(inputTemp.slice(paddedSlice), mappedError.slice(j), - poolingOutput); - } - else - { - DownwardReSampling(inputTemp.slice(paddedSlice), - mappedError.slice(j), poolingOutput); - } - - inputPadded.subcube(x, y, - paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1, - paddedSlice) = poolingOutput; - } - } - - gTemp += inputPadded.tube(padSize, padSize, padSize + - inputTemp.n_rows - 1, padSize + inputTemp.n_cols - 1); - } - } - - Transform(gTemp); - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } ->>>>>>> Refactor neural visual attention modules. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -365,21 +170,12 @@ class Glimpse //! Modify the value of the deterministic parameter. bool& Deterministic() { return deterministic; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& ar, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. private: /* * Transform the given input by changing rows to columns. @@ -625,21 +421,10 @@ class Glimpse bool deterministic; }; // class GlimpseLayer -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "glimpse_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. #endif \ No newline at end of file diff --git a/src/mlpack/methods/ann/layer/join.hpp b/src/mlpack/methods/ann/layer/join.hpp index f874dc27334..bda31d787ab 100644 --- a/src/mlpack/methods/ann/layer/join.hpp +++ b/src/mlpack/methods/ann/layer/join.hpp @@ -34,18 +34,7 @@ class Join { public: //! Create the Join object. -<<<<<<< HEAD -<<<<<<< HEAD Join(); -======= - Join() - { - // Nothing to do here. - } ->>>>>>> Refactor ann layer. -======= - Join(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -55,20 +44,7 @@ class Join * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const InputType&& input, OutputType&& output); -======= - void Forward(const InputType&& input, OutputType&& output) - { - inSizeRows = input.n_rows; - inSizeCols = input.n_cols; - output = arma::vectorise(input); - } ->>>>>>> Refactor ann layer. -======= void Forward(const InputType&& input, OutputType&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -82,18 +58,7 @@ class Join template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - g = arma::mat(gy.memptr(), inSizeRows, inSizeCols, false, false); - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -114,19 +79,7 @@ class Join * Serialize the layer. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(inSizeRows, "inSizeRows"); - ar & data::CreateNVP(inSizeCols, "inSizeCols"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input rows. @@ -148,16 +101,7 @@ class Join } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "join_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "join_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/linear.hpp b/src/mlpack/methods/ann/layer/linear.hpp index 01320871248..0f6be78b647 100644 --- a/src/mlpack/methods/ann/layer/linear.hpp +++ b/src/mlpack/methods/ann/layer/linear.hpp @@ -37,15 +37,7 @@ class Linear { public: //! Create the Linear object. -<<<<<<< HEAD -<<<<<<< HEAD Linear(); -======= - Linear() {} ->>>>>>> Refactor ann layer. -======= - Linear(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the Linear layer object using the specified number of units. @@ -53,38 +45,12 @@ class Linear * @param inSize The number of input units. * @param outSize The number of output units. */ -<<<<<<< HEAD -<<<<<<< HEAD - Linear(const size_t inSize, const size_t outSize);; -======= - Linear(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize + outSize, 1); - } ->>>>>>> Refactor ann layer. -======= Linear(const size_t inSize, const size_t outSize);; ->>>>>>> Split layer modules into definition and implementation. /* * Reset the layer parameter. */ -<<<<<<< HEAD -<<<<<<< HEAD void Reset(); -======= - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - bias = arma::mat(weights.memptr() + weight.n_elem, - outSize, 1, false, false); - } ->>>>>>> Refactor ann layer. -======= - void Reset(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -94,18 +60,7 @@ class Linear * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = (weight * input) + bias; - } ->>>>>>> Refactor ann layer. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -117,24 +72,9 @@ class Linear * @param g The calculated gradient. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, arma::Mat&& g); -======= - void Backward(const arma::Mat&& /* unused */, - arma::Mat&& gy, - arma::Mat&& g) - { - g = weight.t() * gy; - } ->>>>>>> Refactor ann layer. -======= - void Backward(const arma::Mat&& /* input */, - arma::Mat&& gy, - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -146,20 +86,7 @@ class Linear template void Gradient(const arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& gradient); -======= - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - gradient.submat(weight.n_elem, 0, gradient.n_elem - 1, 0) = error; - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& gradient); ->>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -190,20 +117,7 @@ class Linear * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored number of input units. @@ -237,16 +151,7 @@ class Linear } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "linear_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "linear_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/linear_no_bias.hpp b/src/mlpack/methods/ann/layer/linear_no_bias.hpp index 17b6aba8a7c..972f72db8ec 100644 --- a/src/mlpack/methods/ann/layer/linear_no_bias.hpp +++ b/src/mlpack/methods/ann/layer/linear_no_bias.hpp @@ -37,51 +37,19 @@ class LinearNoBias { public: //! Create the LinearNoBias object. -<<<<<<< HEAD -<<<<<<< HEAD LinearNoBias(); -======= - LinearNoBias() {} ->>>>>>> Refactor ann layer. -======= - LinearNoBias(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the LinearNoBias object using the specified number of units. * * @param inSize The number of input units. * @param outSize The number of output units. */ -<<<<<<< HEAD -<<<<<<< HEAD - LinearNoBias(const size_t inSize, const size_t outSize); -======= - LinearNoBias(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize * inSize, 1); - } ->>>>>>> Refactor ann layer. -======= LinearNoBias(const size_t inSize, const size_t outSize); ->>>>>>> Split layer modules into definition and implementation. /* * Reset the layer parameter. */ -<<<<<<< HEAD -<<<<<<< HEAD - void Reset(); -======= - void Reset() - { - weight = arma::mat(weights.memptr(), outSize, inSize, false, false); - } ->>>>>>> Refactor ann layer. -======= void Reset(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -91,18 +59,7 @@ class LinearNoBias * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weight * input; - } ->>>>>>> Refactor ann layer. -======= - void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -116,18 +73,7 @@ class LinearNoBias template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - g = weight.t() * gy; - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -139,19 +85,7 @@ class LinearNoBias template void Gradient(const arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& gradient); -======= - arma::Mat&& gradient) - { - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise( - error * input.t()); - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& gradient); ->>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -182,20 +116,7 @@ class LinearNoBias * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -227,16 +148,7 @@ class LinearNoBias } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "linear_no_bias_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "linear_no_bias_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/log_softmax.hpp b/src/mlpack/methods/ann/layer/log_softmax.hpp index 5c5f9bb72ce..df9872d6c8a 100644 --- a/src/mlpack/methods/ann/layer/log_softmax.hpp +++ b/src/mlpack/methods/ann/layer/log_softmax.hpp @@ -39,15 +39,7 @@ class LogSoftMax /** * Create the LogSoftmax object. */ -<<<<<<< HEAD -<<<<<<< HEAD LogSoftMax(); -======= - LogSoftMax() { /* Nothing to do here. */ } ->>>>>>> Refactor ann layer. -======= - LogSoftMax(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -57,46 +49,7 @@ class LogSoftMax * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const InputType&& input, OutputType&& output); -======= - void Forward(const InputType&& input, OutputType&& output) - { - arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1); - output = (maxInput - input); - - // Approximation of the hyperbolic tangent. The acuracy however is - // about 0.00001 lower as using tanh. Credits go to Leon Bottou. - output.transform( [](double x) - { - //! Fast approximation of exp(-x) for x positive. - static constexpr double A0 = 1.0; - static constexpr double A1 = 0.125; - static constexpr double A2 = 0.0078125; - static constexpr double A3 = 0.00032552083; - static constexpr double A4 = 1.0172526e-5; - - if (x < 13.0) - { - double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4))); - y *= y; - y *= y; - y *= y; - y = 1 / y; - - return y; - } - - return 0.0; - } ); - - output = input - (maxInput + std::log(arma::accu(output))); - } ->>>>>>> Refactor ann layer. -======= void Forward(const InputType&& input, OutputType&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -110,18 +63,7 @@ class LogSoftMax template void Backward(const arma::Mat&& input, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - g = gy - arma::exp(input) * arma::accu(gy); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -138,21 +80,12 @@ class LogSoftMax //! Modify the delta. InputDataType& Delta() { return delta; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer. */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. OutputDataType delta; @@ -164,21 +97,10 @@ class LogSoftMax OutputDataType outputParameter; }; // class LogSoftmax -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "log_softmax_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/lookup.hpp b/src/mlpack/methods/ann/layer/lookup.hpp index 07d39161909..080d4b5746e 100644 --- a/src/mlpack/methods/ann/layer/lookup.hpp +++ b/src/mlpack/methods/ann/layer/lookup.hpp @@ -42,20 +42,7 @@ class Lookup * @param inSize The number of input units. * @param outSize The number of output units. */ -<<<<<<< HEAD -<<<<<<< HEAD Lookup(const size_t inSize, const size_t outSize); -======= - Lookup(const size_t inSize, const size_t outSize) : - inSize(inSize), - outSize(outSize) - { - weights.set_size(outSize, inSize); - } ->>>>>>> Refactor ann layer. -======= - Lookup(const size_t inSize, const size_t outSize); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -65,18 +52,7 @@ class Lookup * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - output = weights.cols(arma::conv_to::from(input) - 1); - } ->>>>>>> Refactor ann layer. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -90,18 +66,7 @@ class Lookup template void Backward(const arma::Mat&& /* input */, const arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - g = gy; - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -113,19 +78,7 @@ class Lookup template void Gradient(const arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& gradient); -======= - arma::Mat&& gradient) - { - gradient = arma::zeros >(weights.n_rows, weights.n_cols); - gradient.cols(arma::conv_to::from(input) - 1) = error; - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& gradient); ->>>>>>> Split layer modules into definition and implementation. //! Get the parameters. OutputDataType const& Parameters() const { return weights; } @@ -156,20 +109,7 @@ class Lookup * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -198,16 +138,7 @@ class Lookup } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "lookup_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "lookup_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/lstm.hpp b/src/mlpack/methods/ann/layer/lstm.hpp index bc976105973..cf70e344d94 100644 --- a/src/mlpack/methods/ann/layer/lstm.hpp +++ b/src/mlpack/methods/ann/layer/lstm.hpp @@ -44,15 +44,7 @@ class LSTM { public: //! Create the LSTM object. -<<<<<<< HEAD -<<<<<<< HEAD LSTM(); -======= - LSTM() { /* Nothing to do here */ } ->>>>>>> Refactor ann layer. -======= - LSTM(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the LSTM layer object using the specified parameters. @@ -61,50 +53,7 @@ class LSTM * @param outSize The number of output units. * @param rho Maximum number of steps to backpropagate through time (BPTT). */ -<<<<<<< HEAD -<<<<<<< HEAD - LSTM(const size_t inSize, const size_t outSize, const size_t rho); -======= - LSTM(const size_t inSize, const size_t outSize, const size_t rho) : - inSize(inSize), - outSize(outSize), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - { - input2GateModule = new Linear<>(inSize, 4 * outSize); - output2GateModule = new LinearNoBias<>(outSize, 4 * outSize); - - network.push_back(input2GateModule); - network.push_back(output2GateModule); - - inputGateModule = new SigmoidLayer<>(); - hiddenStateModule = new TanHLayer<>(); - forgetGateModule = new SigmoidLayer<>(); - outputGateModule = new SigmoidLayer<>(); - - network.push_back(inputGateModule); - network.push_back(hiddenStateModule); - network.push_back(forgetGateModule); - network.push_back(outputGateModule); - - cellModule = new IdentityLayer<>(); - cellActivationModule = new TanHLayer<>(); - - network.push_back(cellModule); - network.push_back(cellActivationModule); - - prevOutput = arma::zeros(outSize, 1); - prevCell = arma::zeros(outSize, 1); - prevError = arma::zeros(4 * outSize, 1); - cellActivationError = arma::zeros(outSize, 1); - } ->>>>>>> Refactor ann layer. -======= LSTM(const size_t inSize, const size_t outSize, const size_t rho); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -114,120 +63,7 @@ class LSTM * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - cellParameter.push_back(prevCell); - outParameter.push_back(prevOutput); - } - - arma::mat output1; - arma::mat output2; - arma::mat output3; - - boost::apply_visitor( - ForwardVisitor( - std::move(input), - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(prevOutput), - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)) - ), - output2GateModule); - - output = boost::apply_visitor(outputParameterVisitor, input2GateModule) + - boost::apply_visitor(outputParameterVisitor, output2GateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(0, 0, 1 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(1 * outSize, 0, 2 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(2 * outSize, 0, 3 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(output.submat(3 * outSize, 0, 4 * outSize - 1, 0)), - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)) - ), - outputGateModule); - - arma::mat cell = prevCell; - - // Input gate * hidden state. - arma::mat cmul1 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % boost::apply_visitor(outputParameterVisitor, - hiddenStateModule); - - // Forget gate * cell. - arma::mat cmul2 = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cell; - - arma::mat nextCell = cmul1 + cmul2; - - boost::apply_visitor( - ForwardVisitor( - std::move(nextCell), - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)) - ), - cellModule); - - boost::apply_visitor( - ForwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, cellModule)), - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)) - ), - cellActivationModule); - - output = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % boost::apply_visitor(outputParameterVisitor, - outputGateModule); - - prevCell = nextCell; - prevOutput = output; - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - prevOutput.zeros(); - prevCell.zeros(); - } - } ->>>>>>> Refactor ann layer. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -241,129 +77,7 @@ class LSTM template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - if (backwardStep > 0) - { - gy += boost::apply_visitor(deltaVisitor, output2GateModule); - } - - arma::mat g1 = boost::apply_visitor(outputParameterVisitor, - cellActivationModule) % gy; - - arma::mat g2 = boost::apply_visitor(outputParameterVisitor, - outputGateModule) % gy; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - cellActivationModule)), - std::move(g2), - std::move(boost::apply_visitor(deltaVisitor, - cellActivationModule)) - ), - cellActivationModule); - - cellActivationError = boost::apply_visitor(deltaVisitor, - cellActivationModule); - - if (backwardStep > 0) - { - cellActivationError += forgetGateError; - } - - arma::mat g4 = boost::apply_visitor(outputParameterVisitor, - inputGateModule) % cellActivationError; - - arma::mat g5 = boost::apply_visitor(outputParameterVisitor, - hiddenStateModule) % cellActivationError; - - forgetGateError = boost::apply_visitor(outputParameterVisitor, - forgetGateModule) % cellActivationError; - - arma::mat g7 = cellParameter[cellParameter.size() - - backwardStep - 1] % cellActivationError; - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - inputGateModule)), - std::move(g5), - std::move(boost::apply_visitor(deltaVisitor, inputGateModule)) - ), - inputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - hiddenStateModule)), - std::move(g4), - std::move(boost::apply_visitor(deltaVisitor, hiddenStateModule)) - ), - hiddenStateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - forgetGateModule)), - std::move(g7), - std::move(boost::apply_visitor(deltaVisitor, forgetGateModule)) - ), - forgetGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - outputGateModule)), - std::move(g1), - std::move(boost::apply_visitor(deltaVisitor, outputGateModule)) - ), - outputGateModule); - - prevError.submat(0, 0, 1 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, inputGateModule); - prevError.submat(1 * outSize, 0, 2 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, hiddenStateModule); - prevError.submat(2 * outSize, 0, 3 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, forgetGateModule); - prevError.submat(3 * outSize, 0, 4 * outSize - 1, 0) = boost::apply_visitor( - deltaVisitor, outputGateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - input2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, input2GateModule)) - ), - input2GateModule); - - boost::apply_visitor( - BackwardVisitor( - std::move(boost::apply_visitor(outputParameterVisitor, - output2GateModule)), - std::move(prevError), - std::move(boost::apply_visitor(deltaVisitor, output2GateModule)) - ), - output2GateModule); - - backwardStep++; - if (backwardStep == rho) - { - backwardStep = 0; - cellParameter.clear(); - } - - g = boost::apply_visitor(deltaVisitor, input2GateModule); - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -375,37 +89,7 @@ class LSTM template void Gradient(arma::Mat&& input, arma::Mat&& /* error */, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - boost::apply_visitor( - GradientVisitor( - std::move(input), - std::move(prevError) - ), - input2GateModule); - - boost::apply_visitor( - GradientVisitor( - std::move(outParameter[outParameter.size() - gradientStep - 1]), - std::move(prevError) - ), - output2GateModule); - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - outParameter.clear(); - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. //! The value of the deterministic parameter. bool Deterministic() const { return deterministic; } @@ -449,21 +133,7 @@ class LSTM * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(weights, "weights"); - ar & data::CreateNVP(inSize, "inSize"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(rho, "rho"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -561,16 +231,7 @@ class LSTM } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "lstm_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "lstm_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/max_pooling.hpp b/src/mlpack/methods/ann/layer/max_pooling.hpp index 7d509a568cb..6bf179b7a5a 100644 --- a/src/mlpack/methods/ann/layer/max_pooling.hpp +++ b/src/mlpack/methods/ann/layer/max_pooling.hpp @@ -53,21 +53,8 @@ template < class MaxPooling { public: -<<<<<<< HEAD -<<<<<<< HEAD //! Create the MaxPooling object. MaxPooling(); -======= - //! Create the PoolingLayer object. - MaxPooling() - { - /* Nothing to do here */ - } ->>>>>>> Refactor ann layer. -======= - //! Create the MaxPooling object. - MaxPooling(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the MaxPooling object using the specified number of units. @@ -79,38 +66,10 @@ class MaxPooling * @param floor Rounding operator (floor or ceil). */ MaxPooling(const size_t kW, -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); -<<<<<<< HEAD -======= - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - reset(false), - floor(floor), - offset(0), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - deterministic(false) - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -120,71 +79,7 @@ class MaxPooling * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - const size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - if (!deterministic) - { - poolingIndices.push_back(outputTemp); - } - - if (!reset) - { - size_t elements = inputWidth * inputHeight; - indicesCol = arma::linspace >(0, (elements - 1), - elements); - - indices = arma::Mat(indicesCol.memptr(), inputWidth, inputHeight); - - reset = true; - } - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - if (!deterministic) - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - poolingIndices.back().slice(s)); - } - else - { - PoolingOperation(inputTemp.slice(s), outputTemp.slice(s), - inputTemp.slice(s)); - } - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } ->>>>>>> Refactor ann layer. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -198,32 +93,7 @@ class MaxPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(mappedError.slice(s), gTemp.slice(s), - poolingIndices.back().slice(s)); - } - - poolingIndices.pop_back(); - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -269,21 +139,7 @@ class MaxPooling * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -415,23 +271,6 @@ class MaxPooling std::vector poolingIndices; }; // class MaxPooling -<<<<<<< HEAD -<<<<<<< HEAD -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "max_pooling_impl.hpp" - -#endif -======= - -} // namespace ann -} // namespace mlpack - -#endif ->>>>>>> Refactor ann layer. -======= } // namespace ann } // namespace mlpack @@ -439,4 +278,3 @@ class MaxPooling #include "max_pooling_impl.hpp" #endif ->>>>>>> Split layer modules into definition and implementation. diff --git a/src/mlpack/methods/ann/layer/mean_pooling.hpp b/src/mlpack/methods/ann/layer/mean_pooling.hpp index 13ee58e3687..70e061c7e2b 100644 --- a/src/mlpack/methods/ann/layer/mean_pooling.hpp +++ b/src/mlpack/methods/ann/layer/mean_pooling.hpp @@ -34,18 +34,7 @@ class MeanPooling { public: //! Create the MeanPooling object. -<<<<<<< HEAD -<<<<<<< HEAD MeanPooling(); -======= - MeanPooling() - { - /* Nothing to do here */ - } ->>>>>>> Refactor ann layer. -======= - MeanPooling(); ->>>>>>> Split layer modules into definition and implementation. /** * Create the MeanPooling object using the specified number of units. @@ -56,39 +45,10 @@ class MeanPooling * @param dH Width of the stride operation. */ MeanPooling(const size_t kW, -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. const size_t kH, const size_t dW = 1, const size_t dH = 1, const bool floor = true); -<<<<<<< HEAD -======= - const size_t kH, - const size_t dW = 1, - const size_t dH = 1, - const bool floor = true) : - kW(kW), - kH(kH), - dW(dW), - dH(dH), - inputWidth(0), - inputHeight(0), - outputWidth(0), - outputHeight(0), - reset(false), - floor(floor), - deterministic(false), - offset(0) - - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -98,49 +58,7 @@ class MeanPooling * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - size_t slices = input.n_elem / (inputWidth * inputHeight); - inputTemp = arma::cube(input.memptr(), inputWidth, inputHeight, slices); - - if (floor) - { - outputWidth = std::floor((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::floor((inputHeight - (double) kH) / (double) dH + 1); - - offset = 0; - } - else - { - outputWidth = std::ceil((inputWidth - (double) kW) / (double) dW + 1); - outputHeight = std::ceil((inputHeight - (double) kH) / (double) dH + 1); - - offset = 1; - } - - outputTemp = arma::zeros >(outputWidth, outputHeight, - slices); - - for (size_t s = 0; s < inputTemp.n_slices; s++) - { - - Pooling(inputTemp.slice(s), outputTemp.slice(s)); - } - - output = arma::Mat(outputTemp.memptr(), outputTemp.n_elem, 1); - - outputWidth = outputTemp.n_rows; - outputHeight = outputTemp.n_cols; - outSize = slices; - } ->>>>>>> Refactor ann layer. -======= - void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -154,29 +72,7 @@ class MeanPooling template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - arma::cube mappedError = arma::cube(gy.memptr(), outputWidth, - outputHeight, outSize); - - gTemp = arma::zeros(inputTemp.n_rows, - inputTemp.n_cols, inputTemp.n_slices); - - for (size_t s = 0; s < mappedError.n_slices; s++) - { - Unpooling(inputTemp.slice(s), mappedError.slice(s), gTemp.slice(s)); - } - - g = arma::mat(gTemp.memptr(), gTemp.n_elem, 1); - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType const& InputParameter() const { return inputParameter; } @@ -222,21 +118,7 @@ class MeanPooling * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(kW, "kW"); - ar & data::CreateNVP(kH, "kH"); - ar & data::CreateNVP(dW, "dW"); - ar & data::CreateNVP(dH, "dH"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: @@ -364,17 +246,7 @@ class MeanPooling } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. // Include implementation. #include "mean_pooling_impl.hpp" #endif -<<<<<<< HEAD -======= -#endif ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. diff --git a/src/mlpack/methods/ann/layer/mean_squared_error.hpp b/src/mlpack/methods/ann/layer/mean_squared_error.hpp index b98bbd8db67..280b6e79b11 100644 --- a/src/mlpack/methods/ann/layer/mean_squared_error.hpp +++ b/src/mlpack/methods/ann/layer/mean_squared_error.hpp @@ -2,15 +2,7 @@ * @file mean_squared_error.hpp * @author Marcus Edel * -<<<<<<< HEAD -<<<<<<< HEAD * Definition of the mean squared error performance function. -======= - * Definition and implementation of the mean squared error performance function. ->>>>>>> Refactor ann layer. -======= - * Definition of the mean squared error performance function. ->>>>>>> Split layer modules into definition and implementation. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -45,15 +37,7 @@ class MeanSquaredError /** * Create the MeanSquaredError object. */ -<<<<<<< HEAD -<<<<<<< HEAD - MeanSquaredError(); -======= - MeanSquaredError() { /* Nothing to do here. */ } ->>>>>>> Refactor ann layer. -======= MeanSquaredError(); ->>>>>>> Split layer modules into definition and implementation. /* * Computes the mean squared error function. @@ -62,19 +46,7 @@ class MeanSquaredError * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD double Forward(const arma::Mat&& input, const arma::Mat&& target); -======= - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - return arma::mean(arma::mean(arma::square(input - target))); - } - ->>>>>>> Refactor ann layer. -======= - double Forward(const arma::Mat&& input, const arma::Mat&& target); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. * @@ -85,18 +57,7 @@ class MeanSquaredError template void Backward(const arma::Mat&& input, const arma::Mat&& target, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& output); -======= - arma::Mat&& output) - { - output = (input - target); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -113,21 +74,12 @@ class MeanSquaredError //! Modify the delta. OutputDataType& Delta() { return delta; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored delta object. OutputDataType delta; @@ -139,21 +91,10 @@ class MeanSquaredError OutputDataType outputParameter; }; // class MeanSquaredError -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "mean_squared_error_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/multiply_constant.hpp b/src/mlpack/methods/ann/layer/multiply_constant.hpp index e4a9e83c636..b2985b27b83 100644 --- a/src/mlpack/methods/ann/layer/multiply_constant.hpp +++ b/src/mlpack/methods/ann/layer/multiply_constant.hpp @@ -32,18 +32,7 @@ class MultiplyConstant /** * Create the MultiplyConstant object. */ -<<<<<<< HEAD -<<<<<<< HEAD MultiplyConstant(const double scalar); -======= - MultiplyConstant(const double scalar) : scalar(scalar) - { - // Nothing to do here. - } ->>>>>>> Refactor ann layer. -======= - MultiplyConstant(const double scalar); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network. Multiply the input with the @@ -53,18 +42,7 @@ class MultiplyConstant * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const InputType&& input, OutputType&& output); -======= - void Forward(const InputType&& input, OutputType&& output) - { - output = input * scalar; - } ->>>>>>> Refactor ann layer. -======= void Forward(const InputType&& input, OutputType&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The backward pass @@ -75,18 +53,7 @@ class MultiplyConstant * @param g The calculated gradient. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); -======= - void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g) - { - g = gy * scalar; - } ->>>>>>> Refactor ann layer. -======= - void Backward(const DataType&& /* input */, DataType&& gy, DataType&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -107,18 +74,7 @@ class MultiplyConstant * Serialize the layer. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(scalar, "scalar"); - } ->>>>>>> Refactor ann layer. -======= void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored constant scalar value. @@ -134,21 +90,10 @@ class MultiplyConstant OutputDataType outputParameter; }; // class MultiplyConstant -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "multiply_constant_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp index bb662fde8aa..84c6a2d7904 100644 --- a/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp +++ b/src/mlpack/methods/ann/layer/negative_log_likelihood.hpp @@ -38,15 +38,7 @@ class NegativeLogLikelihood /** * Create the NegativeLogLikelihoodLayer object. */ -<<<<<<< HEAD -<<<<<<< HEAD NegativeLogLikelihood(); -======= - NegativeLogLikelihood() { /* Nothing to do here. */ } ->>>>>>> Refactor ann layer. -======= - NegativeLogLikelihood(); ->>>>>>> Split layer modules into definition and implementation. /* * Computes the Negative log likelihood. @@ -55,29 +47,7 @@ class NegativeLogLikelihood * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - double Forward(const arma::Mat&& input, arma::Mat&& target); -======= - double Forward(const arma::Mat&& input, arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - return output; - } ->>>>>>> Refactor ann layer. -======= double Forward(const arma::Mat&& input, arma::Mat&& target); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log @@ -93,26 +63,7 @@ class NegativeLogLikelihood template void Backward(const arma::Mat&& input, const arma::Mat&& target, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& output); -======= - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < input.n_cols; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -145,19 +96,10 @@ class NegativeLogLikelihood //! Locally-stored output parameter object. OutputDataType outputParameter; }; // class NegativeLogLikelihood -<<<<<<< HEAD - -} // namespace ann -} // namespace mlpack -======= ->>>>>>> Refactor ann layer. -<<<<<<< HEAD -======= } // namespace ann } // namespace mlpack ->>>>>>> Split layer modules into definition and implementation. // Include implementation. #include "negative_log_likelihood_impl.hpp" diff --git a/src/mlpack/methods/ann/layer/recurrent.hpp b/src/mlpack/methods/ann/layer/recurrent.hpp index 2709ab36eb7..297127a6ae8 100644 --- a/src/mlpack/methods/ann/layer/recurrent.hpp +++ b/src/mlpack/methods/ann/layer/recurrent.hpp @@ -56,49 +56,7 @@ class Recurrent const InputModuleType& input, const FeedbackModuleType& feedback, const TransferModuleType& transfer, -<<<<<<< HEAD -<<<<<<< HEAD const size_t rho); -======= - const size_t rho) : - startModule(new StartModuleType(start)), - inputModule(new InputModuleType(input)), - feedbackModule(new FeedbackModuleType(feedback)), - transferModule(new TransferModuleType(transfer)), - rho(rho), - forwardStep(0), - backwardStep(0), - gradientStep(0), - deterministic(false) - - { - initialModule = new Sequential<>(); - mergeModule = new AddMerge<>(); - recurrentModule = new Sequential<>(false); - - boost::apply_visitor(AddVisitor(inputModule), initialModule); - boost::apply_visitor(AddVisitor(startModule), initialModule); - boost::apply_visitor(AddVisitor(transferModule), initialModule); - - boost::apply_visitor(weightSizeVisitor, startModule); - boost::apply_visitor(weightSizeVisitor, inputModule); - boost::apply_visitor(weightSizeVisitor, feedbackModule); - boost::apply_visitor(weightSizeVisitor, transferModule); - - boost::apply_visitor(AddVisitor(inputModule), mergeModule); - boost::apply_visitor(AddVisitor(feedbackModule), mergeModule); - boost::apply_visitor(AddVisitor(mergeModule), recurrentModule); - boost::apply_visitor(AddVisitor(transferModule), recurrentModule); - - network.push_back(initialModule); - network.push_back(mergeModule); - network.push_back(feedbackModule); - network.push_back(recurrentModule); - } ->>>>>>> Refactor ann layer. -======= - const size_t rho); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -108,56 +66,7 @@ class Recurrent * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - initialModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, inputModule))), - inputModule); - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, transferModule)), std::move( - boost::apply_visitor(outputParameterVisitor, feedbackModule))), - feedbackModule); - - boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), - recurrentModule); - } - - output = boost::apply_visitor(outputParameterVisitor, transferModule); - - // Save the feedback output parameter when training the module. - if (!deterministic) - { - feedbackOutputParameter.push_back(output); - } - - forwardStep++; - if (forwardStep == rho) - { - forwardStep = 0; - backwardStep = 0; - - if (!recurrentError.is_empty()) - { - recurrentError.zeros(); - } - } - } ->>>>>>> Refactor ann layer. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -171,52 +80,7 @@ class Recurrent template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& g); -======= - arma::Mat&& g) - { - if (!recurrentError.is_empty()) - { - recurrentError += gy; - } - else - { - recurrentError = gy; - } - - if (backwardStep < (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, recurrentModule)), std::move(recurrentError), - std::move(boost::apply_visitor(deltaVisitor, recurrentModule))), - recurrentModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, inputModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move(g)), - inputModule); - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, feedbackModule)), std::move( - boost::apply_visitor(deltaVisitor, recurrentModule)), std::move( - boost::apply_visitor(deltaVisitor, feedbackModule))),feedbackModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, initialModule)), std::move(recurrentError), - std::move(g)), initialModule); - } - - recurrentError = boost::apply_visitor(deltaVisitor, feedbackModule); - backwardStep++; - } ->>>>>>> Refactor ann layer. -======= - arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -228,46 +92,7 @@ class Recurrent template void Gradient(arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - if (gradientStep < (rho - 1)) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - recurrentModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, mergeModule))), inputModule); - - boost::apply_visitor(GradientVisitor(std::move( - feedbackOutputParameter[feedbackOutputParameter.size() - 2 - - gradientStep]), std::move(boost::apply_visitor(deltaVisitor, - mergeModule))), feedbackModule); - } - else - { - boost::apply_visitor(GradientZeroVisitor(), recurrentModule); - boost::apply_visitor(GradientZeroVisitor(), inputModule); - boost::apply_visitor(GradientZeroVisitor(), feedbackModule); - - boost::apply_visitor(GradientVisitor(std::move(input), std::move( - boost::apply_visitor(deltaVisitor, startModule))), initialModule); - } - - gradientStep++; - if (gradientStep == rho) - { - gradientStep = 0; - feedbackOutputParameter.clear(); - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -306,18 +131,7 @@ class Recurrent * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - } ->>>>>>> Refactor ann layer. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored start module. @@ -393,16 +207,7 @@ class Recurrent } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "recurrent_impl.hpp" - -======= ->>>>>>> Refactor ann layer. -======= // Include implementation. #include "recurrent_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/recurrent_attention.hpp b/src/mlpack/methods/ann/layer/recurrent_attention.hpp index 7bd506328d8..ffb7320b232 100644 --- a/src/mlpack/methods/ann/layer/recurrent_attention.hpp +++ b/src/mlpack/methods/ann/layer/recurrent_attention.hpp @@ -62,26 +62,7 @@ class RecurrentAttention RecurrentAttention(const size_t outSize, const RNNModuleType& rnn, const ActionModuleType& action, -<<<<<<< HEAD -<<<<<<< HEAD const size_t rho); -======= - const size_t rho) : - outSize(outSize), - rnnModule(new RNNModuleType(rnn)), - actionModule(new ActionModuleType(action)), - rho(rho), - forwardStep(0), - backwardStep(0), - deterministic(false) - { - network.push_back(rnnModule); - network.push_back(actionModule); - } ->>>>>>> Refactor neural visual attention modules. -======= - const size_t rho); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -91,65 +72,7 @@ class RecurrentAttention * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - // Initialize the action input. - if (initialInput.is_empty()) - { - initialInput = arma::zeros(outSize, input.n_cols); - } - - // Propagate through the action and recurrent module. - for (forwardStep = 0; forwardStep < rho; ++forwardStep) - { - if (forwardStep == 0) - { - boost::apply_visitor(ForwardVisitor(std::move(initialInput), std::move( - boost::apply_visitor(outputParameterVisitor, actionModule))), - actionModule); - } - else - { - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(boost::apply_visitor( - outputParameterVisitor, actionModule))), actionModule); - } - - // Initialize the glimpse input. - arma::mat glimpseInput = arma::zeros(input.n_elem, 2); - glimpseInput.col(0) = input; - glimpseInput.submat(0, 1, boost::apply_visitor(outputParameterVisitor, - actionModule).n_elem - 1, 1) = boost::apply_visitor( - outputParameterVisitor, actionModule); - - boost::apply_visitor(ForwardVisitor(std::move(glimpseInput), - std::move(boost::apply_visitor(outputParameterVisitor, rnnModule))), - rnnModule); - - // Save the output parameter when training the module. - if (!deterministic) - { - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(SaveOutputParameterVisitor( - std::move(moduleOutputParameter)), network[l]); - } - } - } - - output = boost::apply_visitor(outputParameterVisitor, rnnModule); - - forwardStep = 0; - backwardStep = 0; - } ->>>>>>> Refactor neural visual attention modules. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -163,89 +86,7 @@ class RecurrentAttention template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - if (intermediateGradient.is_empty() && backwardStep == 0) - { - // Initialize the attention gradients. - size_t weights = boost::apply_visitor(weightSizeVisitor, rnnModule) + - boost::apply_visitor(weightSizeVisitor, actionModule); - - intermediateGradient = arma::zeros(weights, 1); - attentionGradient = arma::zeros(weights, 1); - - // Initialize the action error. - actionError = arma::zeros( - boost::apply_visitor(outputParameterVisitor, actionModule).n_rows, - boost::apply_visitor(outputParameterVisitor, actionModule).n_cols); - } - - // Propagate the attention gradients. - if (backwardStep == 0) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), rnnModule); - boost::apply_visitor(GradientSetVisitor( - std::move(intermediateGradient), offset), actionModule); - - attentionGradient.zeros(); - } - - // Back-propagate through time. - for (; backwardStep < rho; backwardStep++) - { - if (backwardStep == 0) - { - recurrentError = gy; - } - else - { - recurrentError = actionDelta; - } - - for (size_t l = 0; l < network.size(); ++l) - { - boost::apply_visitor(LoadOutputParameterVisitor( - std::move(moduleOutputParameter)), network[network.size() - 1 - l]); - } - - if (backwardStep == (rho - 1)) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, actionModule)), std::move(actionError), - std::move(actionDelta)), actionModule); - } - else - { - boost::apply_visitor(BackwardVisitor(std::move(initialInput), - std::move(actionError), std::move(actionDelta)), actionModule); - } - - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, rnnModule)), std::move(recurrentError), - std::move(rnnDelta)), rnnModule); - - if (backwardStep == 0) - { - g = rnnDelta.col(1); - } - else - { - g += rnnDelta.col(1); - } - - IntermediateGradient(); - } - } ->>>>>>> Refactor neural visual attention modules. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -257,22 +98,7 @@ class RecurrentAttention template void Gradient(arma::Mat&& /* input */, arma::Mat&& /* error */, -<<<<<<< HEAD -<<<<<<< HEAD arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - size_t offset = 0; - offset += boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), rnnModule); - boost::apply_visitor(GradientUpdateVisitor( - std::move(attentionGradient), offset), actionModule); - } ->>>>>>> Refactor neural visual attention modules. -======= - arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. //! Get the model modules. std::vector& Model() { return network; } @@ -311,21 +137,7 @@ class RecurrentAttention * Serialize the layer */ template -<<<<<<< HEAD -<<<<<<< HEAD void Serialize(Archive& ar, const unsigned int /* version */); -======= - void Serialize(Archive& ar, const unsigned int /* version */) - { - ar & data::CreateNVP(rho, "rho"); - ar & data::CreateNVP(outSize, "outSize"); - ar & data::CreateNVP(forwardStep, "forwardStep"); - ar & data::CreateNVP(backwardStep, "backwardStep"); - } ->>>>>>> Refactor neural visual attention modules. -======= - void Serialize(Archive& ar, const unsigned int /* version */); ->>>>>>> Split layer modules into definition and implementation. private: //! Calculate the gradient of the attention module. @@ -445,16 +257,7 @@ class RecurrentAttention } // namespace ann } // namespace mlpack -<<<<<<< HEAD -<<<<<<< HEAD -// Include implementation. -#include "recurrent_attention_impl.hpp" - -======= ->>>>>>> Refactor neural visual attention modules. -======= // Include implementation. #include "recurrent_attention_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/reinforce_normal.hpp b/src/mlpack/methods/ann/layer/reinforce_normal.hpp index 0ec6315a331..fd192f0a1f2 100644 --- a/src/mlpack/methods/ann/layer/reinforce_normal.hpp +++ b/src/mlpack/methods/ann/layer/reinforce_normal.hpp @@ -34,18 +34,7 @@ class ReinforceNormal * * @param stdev Standard deviation used during the forward and backward pass. */ -<<<<<<< HEAD -<<<<<<< HEAD ReinforceNormal(const double stdev); -======= - ReinforceNormal(const double stdev) : stdev(stdev) - { - // Nothing to do here. - } ->>>>>>> Refactor neural visual attention modules. -======= - ReinforceNormal(const double stdev); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -55,30 +44,7 @@ class ReinforceNormal * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (!deterministic) - { - // Multiply by standard deviations and re-center the means to the mean. - output = arma::randn >(input.n_rows, input.n_cols) * - stdev + input; - - moduleInputParameter.push_back(input); - } - else - { - // Use maximum a posteriori. - output = input; - } - } ->>>>>>> Refactor neural visual attention modules. -======= void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -90,25 +56,7 @@ class ReinforceNormal * @param g The calculated gradient. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); -======= - void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g) - { - g = (input - moduleInputParameter.back()) / std::pow(stdev, 2.0); - - // Multiply by reward and multiply by -1. - g *= reward; - g *= -1; - - moduleInputParameter.pop_back(); - } - ->>>>>>> Refactor neural visual attention modules. -======= void Backward(const DataType&& input, DataType&& /* gy */, DataType&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -135,21 +83,12 @@ class ReinforceNormal //! Modify the value of the deterministic parameter. double& Reward() { return reward; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Standard deviation used during the forward and backward pass. const double stdev; @@ -173,21 +112,10 @@ class ReinforceNormal bool deterministic; }; // class ReinforceNormal -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "reinforce_normal_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/select.hpp b/src/mlpack/methods/ann/layer/select.hpp index 4edac84578e..d3c42a008c9 100644 --- a/src/mlpack/methods/ann/layer/select.hpp +++ b/src/mlpack/methods/ann/layer/select.hpp @@ -2,15 +2,7 @@ * @file select.hpp * @author Marcus Edel * -<<<<<<< HEAD -<<<<<<< HEAD * Definition of the Select module. -======= - * Definition and implementation of the Select module. ->>>>>>> Refactor ann layer. -======= - * Definition of the Select module. ->>>>>>> Split layer modules into definition and implementation. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -44,27 +36,9 @@ class Select * Create the Select object. * * @param index The column which should be extracted from the given input. -<<<<<<< HEAD -<<<<<<< HEAD * @param elements The number of elements that should be used. */ Select(const size_t index, const size_t elements = 0); -<<<<<<< HEAD -======= - * @param index The number of elements that should be used. -======= - * @param elements The number of elements that should be used. ->>>>>>> Minor style fixes. - */ - Select(const size_t index, const size_t elements = 0) : - index(index), - elements(elements) - { - /* Nothing to do here. */ - } ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -74,25 +48,7 @@ class Select * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD void Forward(const arma::Mat&& input, arma::Mat&& output); -======= - void Forward(const arma::Mat&& input, arma::Mat&& output) - { - if (elements == 0) - { - output = input.col(index); - } - else - { - output = input.submat(0, index, elements - 1, index); - } - } ->>>>>>> Refactor ann layer. -======= - void Forward(const arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, calculating the function @@ -106,25 +62,7 @@ class Select template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - if (elements == 0) - { - g = gy; - } - else - { - g = gy.submat(0, 0, elements - 1, 0); - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const { return inputParameter; } @@ -141,21 +79,12 @@ class Select //! Modify the delta. OutputDataType& Delta() { return delta; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& ar, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored column index. size_t index; @@ -173,21 +102,10 @@ class Select OutputDataType outputParameter; }; // class Select -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "select_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/sequential.hpp b/src/mlpack/methods/ann/layer/sequential.hpp index e5b81519138..ca729c9da13 100644 --- a/src/mlpack/methods/ann/layer/sequential.hpp +++ b/src/mlpack/methods/ann/layer/sequential.hpp @@ -47,36 +47,10 @@ class Sequential * * @param model Expose the all network modules. */ -<<<<<<< HEAD -<<<<<<< HEAD Sequential(const bool model = true); //! Destroy the Sequential object. ~Sequential(); -======= - Sequential(const bool model = true) : model(model), reset(false) - { - /* Nothing to do here. */ - } - - //! Destroy the Sequential object. - ~Sequential() - { - if (!model) - { - for (LayerTypes& layer : network) - { - boost::apply_visitor(deleteVisitor, layer); - } - } - } ->>>>>>> Refactor ann layer. -======= - Sequential(const bool model = true); - - //! Destroy the Sequential object. - ~Sequential(); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -86,72 +60,7 @@ class Sequential * @param output Resulting output activation. */ template -<<<<<<< HEAD -<<<<<<< HEAD - void Forward(arma::Mat&& input, arma::Mat&& output); -======= - void Forward(arma::Mat&& input, arma::Mat&& output) - { - boost::apply_visitor(ForwardVisitor(std::move(input), std::move( - boost::apply_visitor(outputParameterVisitor, network.front()))), - network.front()); - - if (!reset) - { - if (boost::apply_visitor(outputWidthVisitor, network.front()) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network.front()); - } - - if (boost::apply_visitor(outputHeightVisitor, network.front()) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network.front()); - } - } - - for (size_t i = 1; i < network.size(); ++i) - { - if (!reset) - { - // Set the input width. - boost::apply_visitor(SetInputWidthVisitor(width, true), network[i]); - - // Set the input height. - boost::apply_visitor(SetInputHeightVisitor(height, true), network[i]); - } - - boost::apply_visitor(ForwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(outputParameterVisitor, network[i]))), - network[i]); - - if (!reset) - { - // Get the output width. - if (boost::apply_visitor(outputWidthVisitor, network[i]) != 0) - { - width = boost::apply_visitor(outputWidthVisitor, network[i]); - } - - // Get the output height. - if (boost::apply_visitor(outputHeightVisitor, network[i]) != 0) - { - height = boost::apply_visitor(outputHeightVisitor, network[i]); - } - } - } - - if (!reset) - { - reset = true; - } - - output = boost::apply_visitor(outputParameterVisitor, network.back()); - } ->>>>>>> Refactor ann layer. -======= void Forward(arma::Mat&& input, arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network, using 3rd-order tensors as @@ -165,32 +74,7 @@ class Sequential template void Backward(const arma::Mat&& /* input */, arma::Mat&& gy, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& g); -======= - arma::Mat&& g) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network.back())), std::move(gy), - std::move(boost::apply_visitor(deltaVisitor, network.back()))), - network.back()); - - for (size_t i = 2; i < network.size() + 1; ++i) - { - boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[network.size() - i])), std::move( - boost::apply_visitor(deltaVisitor, network[network.size() - i + 1])), - std::move(boost::apply_visitor(deltaVisitor, - network[network.size() - i]))), network[network.size() - i]); - } - - g = boost::apply_visitor(deltaVisitor, network.front()); - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& g); ->>>>>>> Split layer modules into definition and implementation. /* * Calculate the gradient using the output delta and the input activation. @@ -202,26 +86,7 @@ class Sequential template void Gradient(arma::Mat&& input, arma::Mat&& error, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& /* gradient */); -======= - arma::Mat&& /* gradient */) - { - boost::apply_visitor(GradientVisitor(std::move(input), std::move(error)), - network.front()); - - for (size_t i = 1; i < network.size() - 1; ++i) - { - boost::apply_visitor(GradientVisitor(std::move(boost::apply_visitor( - outputParameterVisitor, network[i - 1])), std::move( - boost::apply_visitor(deltaVisitor, network[i + 1]))), network[i]); - } - } ->>>>>>> Refactor ann layer. -======= arma::Mat&& /* gradient */); ->>>>>>> Split layer modules into definition and implementation. /* * Add a new module to the model. @@ -273,21 +138,12 @@ class Sequential //! Modify the gradient. arma::mat& Gradient() { return gradient; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor ann layer. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Parameter which indicates if the modules should be exposed. bool model; @@ -338,26 +194,10 @@ class Sequential size_t height; }; // class Sequential -<<<<<<< HEAD -<<<<<<< HEAD -} // namespace ann -} // namespace mlpack - -// Include implementation. -#include "sequential_impl.hpp" - -======= - -} // namespace ann -} // namespace mlpack - ->>>>>>> Refactor ann layer. -======= } // namespace ann } // namespace mlpack // Include implementation. #include "sequential_impl.hpp" ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/methods/ann/layer/vr_class_reward.hpp b/src/mlpack/methods/ann/layer/vr_class_reward.hpp index 5aa9f73d382..f820e351aa8 100644 --- a/src/mlpack/methods/ann/layer/vr_class_reward.hpp +++ b/src/mlpack/methods/ann/layer/vr_class_reward.hpp @@ -40,20 +40,7 @@ class VRClassReward * @param scale Parameter used to scale the reward. * @param sizeAverage Take the average over all batches. */ -<<<<<<< HEAD -<<<<<<< HEAD VRClassReward(const double scale = 1, const bool sizeAverage = true); -======= - VRClassReward(const double scale = 1, const bool sizeAverage = true) : - scale(scale), - sizeAverage(sizeAverage) - { - // Nothing to do here. - } ->>>>>>> Refactor neural visual attention modules. -======= - VRClassReward(const double scale = 1, const bool sizeAverage = true); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed forward pass of a neural network, evaluating the function @@ -64,43 +51,7 @@ class VRClassReward * between 1 and the number of classes. */ template -<<<<<<< HEAD -<<<<<<< HEAD - double Forward(const arma::Mat&& input, const arma::Mat&& target); -======= - double Forward(const arma::Mat&& input, const arma::Mat&& target) - { - double output = 0; - - for (size_t i = 0; i < input.n_cols - 1; ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output -= input(currentTarget, i); - } - - reward = 0; - arma::uword index = 0; - - for (size_t i = 0; i < input.n_cols - 1; i++) - { - input.unsafe_col(i).max(index); - reward = ((index + 1) == target(i)) * scale; - } - - if (sizeAverage) - { - return output - reward / (input.n_cols - 1); - } - - return output - reward; - } ->>>>>>> Refactor neural visual attention modules. -======= double Forward(const arma::Mat&& input, const arma::Mat&& target); ->>>>>>> Split layer modules into definition and implementation. /** * Ordinary feed backward pass of a neural network. The negative log @@ -116,37 +67,7 @@ class VRClassReward template void Backward(const arma::Mat&& input, const arma::Mat&& target, -<<<<<<< HEAD -<<<<<<< HEAD - arma::Mat&& output); -======= - arma::Mat&& output) - { - output = arma::zeros >(input.n_rows, input.n_cols); - for (size_t i = 0; i < (input.n_cols - 1); ++i) - { - size_t currentTarget = target(i) - 1; - Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows, - "Target class out of range."); - - output(currentTarget, i) = -1; - } - - double vrReward = reward - input(0, 1); - if (sizeAverage) - { - vrReward /= input.n_cols - 1; - } - - const double norm = sizeAverage ? 2.0 / (input.n_cols - 1) : 2.0; - - output(0, 1) = norm * (input(0, 1) - reward); - boost::apply_visitor(RewardSetVisitor(vrReward), network.back()); - } ->>>>>>> Refactor neural visual attention modules. -======= arma::Mat&& output); ->>>>>>> Split layer modules into definition and implementation. //! Get the input parameter. InputDataType& InputParameter() const {return inputParameter; } @@ -183,21 +104,12 @@ class VRClassReward */ void Add(LayerTypes layer) { network.push_back(layer); } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. /** * Serialize the layer */ template void Serialize(Archive& /* ar */, const unsigned int /* version */); -<<<<<<< HEAD -======= ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. private: //! Locally-stored value to scale the reward. const double scale; @@ -224,21 +136,10 @@ class VRClassReward std::vector network; }; // class VRClassReward -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Split layer modules into definition and implementation. } // namespace ann } // namespace mlpack // Include implementation. #include "vr_class_reward_impl.hpp" -<<<<<<< HEAD -======= -}; // namespace ann -}; // namespace mlpack ->>>>>>> Refactor neural visual attention modules. -======= ->>>>>>> Split layer modules into definition and implementation. #endif diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp index 481741a4cc1..831df74302d 100644 --- a/src/mlpack/tests/rmsprop_test.cpp +++ b/src/mlpack/tests/rmsprop_test.cpp @@ -16,14 +16,6 @@ #include -#include -#include -#include -#include -#include -#include -#include - #include #include "test_tools.hpp" @@ -35,8 +27,6 @@ using namespace mlpack::optimization::test; using namespace mlpack::distribution; using namespace mlpack::regression; -using namespace mlpack::ann; - BOOST_AUTO_TEST_SUITE(RMSpropTest); /** @@ -116,47 +106,4 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionTest) BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance. } -/** - * Run RMSprop on a feedforward neural network and make sure the results are - * acceptable. - */ -BOOST_AUTO_TEST_CASE(FeedforwardTest) -{ - // Test on a non-linearly separable dataset (XOR). - arma::mat input, labels; - input << 0 << 1 << 1 << 0 << arma::endr - << 1 << 0 << 1 << 0 << arma::endr; - labels << 1 << 1 << 0 << 0; - - // Instantiate the first layer. - LinearLayer<> inputLayer(input.n_rows, 8); - BiasLayer<> biasLayer(8); - TanHLayer<> hiddenLayer0; - - // Instantiate the second layer. - LinearLayer<> hiddenLayer1(8, labels.n_rows); - TanHLayer<> outputLayer; - - // Instantiate the output layer. - BinaryClassificationLayer classOutputLayer; - - // Instantiate the feedforward network. - auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, hiddenLayer1, - outputLayer); - FFN net(modules, classOutputLayer); - - RMSprop opt(net, 0.03, 0.99, 1e-8, 300 * input.n_cols, -10); - - net.Train(input, labels, opt); - - arma::mat prediction; - net.Predict(input, prediction); - - BOOST_REQUIRE_EQUAL(prediction(0), 1); - BOOST_REQUIRE_EQUAL(prediction(1), 1); - BOOST_REQUIRE_EQUAL(prediction(2), 0); - BOOST_REQUIRE_EQUAL(prediction(3), 0); -} - BOOST_AUTO_TEST_SUITE_END(); From 07945e69b0a38bd822bdbf16b8eb3f9733d24bad Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 15 Dec 2016 20:55:19 +0100 Subject: [PATCH 74/82] Vectorise isn't supported through all armadillo versions. --- src/mlpack/methods/ann/layer/convolution_impl.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mlpack/methods/ann/layer/convolution_impl.hpp b/src/mlpack/methods/ann/layer/convolution_impl.hpp index 99164d18166..422c9947095 100644 --- a/src/mlpack/methods/ann/layer/convolution_impl.hpp +++ b/src/mlpack/methods/ann/layer/convolution_impl.hpp @@ -287,7 +287,12 @@ void Convolution< outMap, outMap)); } - gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + // gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::vectorise(gradientTemp); + gradient.submat(0, 0, weight.n_elem - 1, 0) = arma::Mat( + gradientTemp.memptr(), gradientTemp.n_elem, 1, false, false); + + + // arma::vectorise(gradientTemp); } template< From 53b485500ceef502573a6442df70d927d6ebd095 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 15 Dec 2016 21:50:09 +0100 Subject: [PATCH 75/82] Decrease the number of parallel builds. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 838397c895d..b2cd2e96fed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ before_install: - printenv - sudo cp .travis/config.hpp /usr/include/armadillo_bits/config.hpp install: - - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j4 + - mkdir build && cd build && cmake -DDEBUG=OFF -DPROFILE=OFF .. && make -j2 script: - travis_wait 30 ./bin/mlpack_test -p notifications: From eb7b26659fecd2f36a7b1733c68d1ecc53f4fd11 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 23 Dec 2016 14:15:14 +0100 Subject: [PATCH 76/82] Add Train() function that uses a default optimizer to train the model. --- src/mlpack/methods/ann/ffn.hpp | 18 +++++ src/mlpack/methods/ann/ffn_impl.hpp | 98 +++++++++++++----------- src/mlpack/methods/ann/rnn.hpp | 18 +++++ src/mlpack/methods/ann/rnn_impl.hpp | 114 +++++++++++++++------------- 4 files changed, 152 insertions(+), 96 deletions(-) diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index 6a6f013683d..260d41233ca 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -94,6 +94,24 @@ class FFN const arma::mat& responses, OptimizerType& optimizer); + /** + * Train the feedforward network on the given input data. By default, the + * RMSprop optimization algorithm is used, but others can be specified + * (such as mlpack::optimization::SGD). + * + * This will use the existing model parameters as a starting point for the + * optimization. If this is not what you want, then you should access the + * parameters vector directly with Parameters() and modify it as desired. + * + * @tparam OptimizerType Type of optimizer to use to train the model. + * @param predictors Input training variables. + * @param responses Outputs results from input training variables. + */ + template< + template class OptimizerType = mlpack::optimization::RMSprop + > + void Train(const arma::mat& predictors, const arma::mat& responses); + /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index d640781fbb2..d462ef93aa6 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -20,9 +20,8 @@ namespace ann /** Artificial Neural Network. */ { template -FFN::FFN(OutputLayerType&& outputLayer, InitializationRuleType initializeRule) : +FFN::FFN( + OutputLayerType&& outputLayer, InitializationRuleType initializeRule) : outputLayer(std::move(outputLayer)), initializeRule(initializeRule), width(0), @@ -33,12 +32,11 @@ FFN -FFN::FFN(const arma::mat& predictors, - const arma::mat& responses, - OutputLayerType&& outputLayer, - InitializationRuleType initializeRule) : +FFN::FFN( + const arma::mat& predictors, + const arma::mat& responses, + OutputLayerType&& outputLayer, + InitializationRuleType initializeRule) : outputLayer(std::move(outputLayer)), initializeRule(initializeRule), width(0), @@ -60,9 +58,7 @@ FFN -FFN::~FFN() +FFN::~FFN() { std::for_each(network.begin(), network.end(), boost::apply_visitor(deleteVisitor)); @@ -70,11 +66,10 @@ FFN template class OptimizerType> -void FFN::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) +void FFN::Train( + const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer) { numFunctions = responses.n_cols; @@ -99,9 +94,37 @@ void FFN -void FFN::Predict(arma::mat& predictors, arma::mat& responses) +template class OptimizerType> +void FFN::Train( + const arma::mat& predictors, const arma::mat& responses) +{ + numFunctions = responses.n_cols; + + this->predictors = std::move(predictors); + this->responses = std::move(responses); + + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + } + + OptimizerType optimizer(*this); + + // Train the model. + Timer::Start("ffn_optimization"); + const double out = optimizer.Optimize(parameter); + Timer::Stop("ffn_optimization"); + + Log::Info << "FFN::FFN(): final objective of trained model is " << out + << "." << std::endl; +} + +template +void FFN::Predict( + arma::mat& predictors, arma::mat& responses) { if (parameter.is_empty()) { @@ -135,11 +158,8 @@ void FFN -double FFN::Evaluate(const arma::mat& /* parameters */, - const size_t i, - const bool deterministic) +double FFN::Evaluate( + const arma::mat& /* parameters */, const size_t i, const bool deterministic) { if (parameter.is_empty()) { @@ -167,11 +187,8 @@ double FFN -void FFN::Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient) +void FFN::Gradient( + const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { @@ -228,9 +245,8 @@ void FFN::ResetDeterministic() } template -void FFN::ResetGradients(arma::mat& gradient) +void FFN::ResetGradients( + arma::mat& gradient) { size_t offset = 0; for (size_t i = 0; i < network.size(); ++i) @@ -241,9 +257,7 @@ void FFN -void FFN::Forward(arma::mat&& input) +void FFN::Forward(arma::mat&& input) { boost::apply_visitor(ForwardVisitor(std::move(input), std::move( boost::apply_visitor(outputParameterVisitor, network.front()))), @@ -300,9 +314,7 @@ void FFN -void FFN::Backward() +void FFN::Backward() { boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( outputParameterVisitor, network.back())), std::move(error), std::move( @@ -319,9 +331,7 @@ void FFN -void FFN::Gradient() +void FFN::Gradient() { boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( boost::apply_visitor(deltaVisitor, network[1]))), network.front()); @@ -340,8 +350,8 @@ void FFN template -void FFN::Serialize(Archive& ar, const unsigned int /* version */) +void FFN::Serialize( + Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); ar & data::CreateNVP(width, "width"); diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp index 6c5c69968d8..c071f052483 100644 --- a/src/mlpack/methods/ann/rnn.hpp +++ b/src/mlpack/methods/ann/rnn.hpp @@ -99,6 +99,24 @@ class RNN const arma::mat& responses, OptimizerType& optimizer); + /** + * Train the recurrent neural network on the given input data. By default, the + * SGD optimization algorithm is used, but others can be specified + * (such as mlpack::optimization::RMSprop). + * + * This will use the existing model parameters as a starting point for the + * optimization. If this is not what you want, then you should access the + * parameters vector directly with Parameters() and modify it as desired. + * + * @tparam OptimizerType Type of optimizer to use to train the model. + * @param predictors Input training variables. + * @param responses Outputs results from input training variables. + */ + template< + template class OptimizerType = mlpack::optimization::SGD + > + void Train(const arma::mat& predictors, const arma::mat& responses); + /** * Predict the responses to a given set of predictors. The responses will * reflect the output of the given output layer as returned by the diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp index a2abb2ce6c3..71897a221e9 100644 --- a/src/mlpack/methods/ann/rnn_impl.hpp +++ b/src/mlpack/methods/ann/rnn_impl.hpp @@ -20,12 +20,11 @@ namespace ann /** Artificial Neural Network. */ { template -RNN::RNN(const size_t rho, - const bool single, - OutputLayerType outputLayer, - InitializationRuleType initializeRule) : +RNN::RNN( + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : rho(rho), outputLayer(outputLayer), initializeRule(initializeRule), @@ -39,14 +38,13 @@ RNN -RNN::RNN(const arma::mat& predictors, - const arma::mat& responses, - const size_t rho, - const bool single, - OutputLayerType outputLayer, - InitializationRuleType initializeRule) : +RNN::RNN( + const arma::mat& predictors, + const arma::mat& responses, + const size_t rho, + const bool single, + OutputLayerType outputLayer, + InitializationRuleType initializeRule) : rho(rho), outputLayer(outputLayer), initializeRule(initializeRule), @@ -72,9 +70,7 @@ RNN -RNN::~RNN() +RNN::~RNN() { for (LayerTypes& layer : network) { @@ -84,11 +80,10 @@ RNN template class OptimizerType> -void RNN::Train(const arma::mat& predictors, - const arma::mat& responses, - OptimizerType& optimizer) +void RNN::Train( + const arma::mat& predictors, + const arma::mat& responses, + OptimizerType& optimizer) { numFunctions = responses.n_cols; @@ -114,9 +109,38 @@ void RNN -void RNN::Predict(arma::mat& predictors, arma::mat& responses) +template class OptimizerType> +void RNN::Train( + const arma::mat& predictors, const arma::mat& responses) +{ + numFunctions = responses.n_cols; + + this->predictors = std::move(predictors); + this->responses = std::move(responses); + + this->deterministic = true; + ResetDeterministic(); + + if (!reset) + { + ResetParameters(); + reset = true; + } + + OptimizerType optimizer(*this); + + // Train the model. + Timer::Start("rnn_optimization"); + const double out = optimizer.Optimize(parameter); + Timer::Stop("rnn_optimization"); + + Log::Info << "RNN::RNN(): final objective of trained model is " << out + << "." << std::endl; +} + +template +void RNN::Predict( + arma::mat& predictors, arma::mat& responses) { if (parameter.is_empty()) { @@ -143,9 +167,8 @@ void RNN -void RNN::SinglePredict(const arma::mat& predictors, arma::mat& responses) +void RNN::SinglePredict( + const arma::mat& predictors, arma::mat& responses) { for (size_t seqNum = 0; seqNum < rho; ++seqNum) { @@ -159,11 +182,8 @@ void RNN -double RNN::Evaluate(const arma::mat& /* parameters */, - const size_t i, - const bool deterministic) +double RNN::Evaluate( + const arma::mat& /* parameters */, const size_t i, const bool deterministic) { if (parameter.is_empty()) { @@ -221,11 +241,8 @@ double RNN -void RNN::Gradient(const arma::mat& parameters, - const size_t i, - arma::mat& gradient) +void RNN::Gradient( + const arma::mat& parameters, const size_t i, arma::mat& gradient) { if (gradient.is_empty()) { @@ -316,9 +333,8 @@ void RNN::ResetDeterministic() } template -void RNN::ResetGradients(arma::mat& gradient) +void RNN::ResetGradients( + arma::mat& gradient) { size_t offset = 0; for (LayerTypes& layer : network) @@ -329,9 +345,7 @@ void RNN -void RNN::Forward(arma::mat&& input) +void RNN::Forward(arma::mat&& input) { boost::apply_visitor(ForwardVisitor(std::move(input), std::move( boost::apply_visitor(outputParameterVisitor, network.front()))), @@ -347,9 +361,7 @@ void RNN -void RNN::Backward() +void RNN::Backward() { boost::apply_visitor(BackwardVisitor( std::move(boost::apply_visitor(outputParameterVisitor, network.back())), @@ -368,9 +380,7 @@ void RNN -void RNN::Gradient() +void RNN::Gradient() { boost::apply_visitor(GradientVisitor(std::move(currentInput), std::move( boost::apply_visitor(deltaVisitor, network[1]))), network.front()); @@ -386,8 +396,8 @@ void RNN template -void RNN::Serialize(Archive& ar, const unsigned int /* version */) +void RNN::Serialize( + Archive& ar, const unsigned int /* version */) { ar & data::CreateNVP(parameter, "parameter"); ar & data::CreateNVP(rho, "rho"); From c79f26b21d92be4ed8e470ed96d260a0050bca3d Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 23 Dec 2016 14:19:20 +0100 Subject: [PATCH 77/82] Remove comment. --- src/mlpack/methods/ann/ffn.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index 260d41233ca..5e4b30fa4dc 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -29,8 +29,6 @@ namespace ann /** Artificial Neural Network. */ { * @tparam OutputLayerType The output layer type used to evaluate the network. * @tparam InitializationRuleType Rule used to initialize the weight matrix. */ - -// NguyenWidrowInitialization template< typename OutputLayerType = NegativeLogLikelihood<>, typename InitializationRuleType = RandomInitialization From 1dd7652103031f6a7252f4631979f583e5d842ee Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 19 Jan 2017 14:32:25 +0100 Subject: [PATCH 78/82] Minor style fix; remove extra space. --- src/mlpack/methods/ann/layer/add_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/ann/layer/add_impl.hpp b/src/mlpack/methods/ann/layer/add_impl.hpp index 8e87078dcef..3ce562007a4 100644 --- a/src/mlpack/methods/ann/layer/add_impl.hpp +++ b/src/mlpack/methods/ann/layer/add_impl.hpp @@ -38,7 +38,7 @@ template template void Add::Backward( const arma::Mat&& /* input */, - const arma::Mat&& gy, + const arma::Mat&& gy, arma::Mat&& g) { g = gy; From 5acbcd3df338359e2159bae0e145c6a5b0ca7e2b Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 19 Jan 2017 14:36:11 +0100 Subject: [PATCH 79/82] Store/Restore the input when saving/loading the network model. --- src/mlpack/methods/ann/ffn_impl.hpp | 2 ++ src/mlpack/methods/ann/rnn_impl.hpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index d462ef93aa6..7461bcb9379 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -356,6 +356,8 @@ void FFN::Serialize( ar & data::CreateNVP(parameter, "parameter"); ar & data::CreateNVP(width, "width"); ar & data::CreateNVP(height, "height"); + ar & data::CreateNVP(currentInput, "currentInput"); + ar & data::CreateNVP(currentTarget, "currentTarget"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp index 71897a221e9..cdac030a2ec 100644 --- a/src/mlpack/methods/ann/rnn_impl.hpp +++ b/src/mlpack/methods/ann/rnn_impl.hpp @@ -405,6 +405,7 @@ void RNN::Serialize( ar & data::CreateNVP(inputSize, "inputSize"); ar & data::CreateNVP(outputSize, "outputSize"); ar & data::CreateNVP(targetSize, "targetSize"); + ar & data::CreateNVP(currentInput, "currentInput"); // If we are loading, we need to initialize the weights. if (Archive::is_loading::value) From 7e759a24de07161e2f7af3f30bdef3d5a5ccc448 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 19 Jan 2017 15:44:34 +0100 Subject: [PATCH 80/82] Simplify the input and target parameter. --- src/mlpack/methods/ann/ffn_impl.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp index 7461bcb9379..6d8ee25e1a3 100644 --- a/src/mlpack/methods/ann/ffn_impl.hpp +++ b/src/mlpack/methods/ann/ffn_impl.hpp @@ -172,14 +172,10 @@ double FFN::Evaluate( ResetDeterministic(); } - currentInput = std::move(arma::mat(predictors.colptr(i), - predictors.n_rows, 1, false, true)); + currentInput = predictors.unsafe_col(i); + currentTarget = responses.unsafe_col(i); Forward(std::move(currentInput)); - - currentTarget = arma::mat(responses.colptr(i), responses.n_rows, - 1, false, true); - double res = outputLayer.Forward(std::move(boost::apply_visitor( outputParameterVisitor, network.back())), std::move(currentTarget)); From 826a399a610d46de34f2cbaf8c0030e7cf19be78 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 19 Jan 2017 15:47:23 +0100 Subject: [PATCH 81/82] Minor style fix; move up comment to avoid potential licence parsing problems. --- src/mlpack/tests/convolution_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mlpack/tests/convolution_test.cpp b/src/mlpack/tests/convolution_test.cpp index 180ca8bab36..a277b9cb41b 100644 --- a/src/mlpack/tests/convolution_test.cpp +++ b/src/mlpack/tests/convolution_test.cpp @@ -3,11 +3,12 @@ * @author Shangtong Zhang * @author Marcus Edel * + * Tests for various convolution strategies. + * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. - * Tests for various convolution strategies. */ #include From ec10c750f85ac33b349e673a5dacdaf4eb28606f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 19 Jan 2017 16:00:05 +0100 Subject: [PATCH 82/82] Remove unused parameter comments. --- .../methods/ann/convolution_rules/fft_convolution.hpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp index 225626e34b8..bbcfecdaebb 100644 --- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp @@ -47,8 +47,6 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. - * @param dW Stride of filter application in the x direction. - * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< @@ -84,8 +82,6 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. - * @param dW Stride of filter application in the x direction. - * @param dH Stride of filter application in the y direction. */ template static typename std::enable_if< @@ -134,8 +130,6 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. - * @param dW Stride of filter application in the x direction. - * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input, @@ -168,8 +162,6 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. - * @param dW Stride of filter application in the x direction. - * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Mat& input, @@ -199,8 +191,6 @@ class FFTConvolution * @param input Input used to perform the convolution. * @param filter Filter used to perform the conolution. * @param output Output data that contains the results of the convolution. - * @param dW Stride of filter application in the x direction. - * @param dH Stride of filter application in the y direction. */ template static void Convolution(const arma::Cube& input,