Skip to content

Commit

Permalink
Merge pull request #309 from mlverse/sparsemax
Browse files Browse the repository at this point in the history
Sparsemax
  • Loading branch information
dfalbel committed Oct 19, 2020
2 parents 544d39c + 9a906b2 commit effadbc
Show file tree
Hide file tree
Showing 16 changed files with 332 additions and 13 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -134,6 +134,7 @@ export(nn_bce_with_logits_loss)
export(nn_bilinear)
export(nn_buffer)
export(nn_celu)
export(nn_contrib_sparsemax)
export(nn_conv1d)
export(nn_conv2d)
export(nn_conv3d)
Expand Down Expand Up @@ -243,6 +244,7 @@ export(nnf_binary_cross_entropy)
export(nnf_binary_cross_entropy_with_logits)
export(nnf_celu)
export(nnf_celu_)
export(nnf_contrib_sparsemax)
export(nnf_conv1d)
export(nnf_conv2d)
export(nnf_conv3d)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Expand Up @@ -28,6 +28,7 @@
- Fixed `nn_batchnorm*` so it returns the same results as PyTorch (#302)
- Fixed a bug that made `nn_module$parameter` when there were shared parameters
between layers. (#306)
- Added `nnf_contrib_sparsemax` and `nn_contrib_sparsemax`. (#309)
- Added ASGD optimizer (@krzjoa #307)

# torch 0.1.0
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Expand Up @@ -97,6 +97,10 @@ cpp_autograd_grad <- function(outputs, inputs, grad_outputs, retain_graph, creat
.Call('_torch_cpp_autograd_grad', PACKAGE = 'torchpkg', outputs, inputs, grad_outputs, retain_graph, create_graph, allow_unused)
}

cpp_contrib_torch_sparsemax <- function(input, dim) {
.Call('_torch_cpp_contrib_torch_sparsemax', PACKAGE = 'torchpkg', input, dim)
}

cpp_cuda_is_available <- function() {
.Call('_torch_cpp_cuda_is_available', PACKAGE = 'torchpkg')
}
Expand Down
22 changes: 22 additions & 0 deletions R/nn-activation.R
Expand Up @@ -1096,3 +1096,25 @@ nn_log_softmax <- nn_module(
nnf_log_softmax(input, self$dim)
}
)

#' Sparsemax activation
#'
#' Sparsemax activation module.
#'
#' @details
#' The SparseMax activation is described in
#' ['From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label Classification'](https://arxiv.org/abs/1602.02068)
#' The implementation is based on [aced125/sparsemax](https://github.com/aced125/sparsemax/tree/master/sparsemax)
#'
#' @param dim The dimension over which to apply the sparsemax function. (-1)
#'
#' @export
nn_contrib_sparsemax <- nn_module(
"nn_contrib_sparsemax",
initialize = function(dim = -1) {
self$dim = dim
},
forward = function(input) {
nnf_contrib_sparsemax(input, self$dim)
}
)
27 changes: 15 additions & 12 deletions R/nn.R
Expand Up @@ -389,22 +389,25 @@ create_nn_module_callable <- function(instance) {
if (is.numeric(y))
return(x[[".__enclos_env__"]][["private"]][["modules_"]][[y]])

if (!is.null(x[[".__enclos_env__"]][["private"]][["parameters_"]])) {
pars <- x[[".__enclos_env__"]][["private"]][["parameters_"]]
if (y %in% names(pars))
return(pars[[y]])
pars <- x[[".__enclos_env__"]][["private"]][["parameters_"]]
if (!is.null(pars)) {
o <- pars[[y]]
if (!is.null(o))
return(o)
}

if (!is.null(x[[".__enclos_env__"]][["private"]][["buffers_"]])) {
bufs <- x[[".__enclos_env__"]][["private"]][["buffers_"]]
if (y %in% names(bufs))
return(bufs[[y]])
bufs <- x[[".__enclos_env__"]][["private"]][["buffers_"]]
if (!is.null(bufs)) {
o <- bufs[[y]]
if (!is.null(o))
return(o)
}

if (!is.null(x[[".__enclos_env__"]][["private"]][["modules_"]])) {
mods <- x[[".__enclos_env__"]][["private"]][["modules_"]]
if (y %in% names(mods))
return(mods[[y]])
mods <- x[[".__enclos_env__"]][["private"]][["modules_"]]
if (!is.null(mods)) {
o <- mods[[y]]
if (!is.null(o))
return(o)
}

NextMethod("[[", x)
Expand Down
27 changes: 27 additions & 0 deletions R/nnf-activation.R
Expand Up @@ -739,3 +739,30 @@ nnf_sigmoid <- function(input) {
torch_sigmoid(input)
}

#' Sparsemax
#'
#' Applies the SparseMax activation.
#'
#' @details
#' The SparseMax activation is described in
#' ['From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label Classification'](https://arxiv.org/abs/1602.02068)
#' The implementation is based on [aced125/sparsemax](https://github.com/aced125/sparsemax/tree/master/sparsemax)
#'
#' @param input the input tensor
#' @param dim The dimension over which to apply the sparsemax function. (-1)
#'
#' @export
nnf_contrib_sparsemax <- function(input, dim = -1) {
if (!is_torch_tensor(input))
value_error("Input should be a tensor and got '{class(input)}.")

dim <- as_1_based_dim(dim)

ptr <- cpp_contrib_torch_sparsemax(input$ptr, dim)

Tensor$new(ptr = ptr)
}




2 changes: 1 addition & 1 deletion R/scalar.R
@@ -1,4 +1,4 @@
Scalar <- R6::R6Class(
Scalar <- R7Class(
classname = "torch_scalar",

public = list(
Expand Down
1 change: 1 addition & 0 deletions lantern/CMakeLists.txt
Expand Up @@ -94,6 +94,7 @@ add_library(lantern SHARED
src/NNUtilsRnn.cpp
src/Storage.cpp
src/Save.cpp
src/Contrib/Sparsemax.cpp
)
add_library(lantern::library ALIAS lantern)

Expand Down
10 changes: 10 additions & 0 deletions lantern/include/lantern/lantern.h
Expand Up @@ -37,6 +37,7 @@

#include <stdint.h>
#include <stdio.h>
#include <string>

extern int lanternLogEnabled;
#define LLOG(...) if ((lanternLogEnabled & 1) == 1) { \
Expand Down Expand Up @@ -545,6 +546,14 @@ extern "C"
LANTERN_HOST_HANDLER;
}

LANTERN_API void * (LANTERN_PTR _lantern_contrib_torch_sparsemax) (void * input, int dim);
HOST_API void * lantern_contrib_torch_sparsemax (void* input, int dim)
{
void * ret = _lantern_contrib_torch_sparsemax(input, dim);
LANTERN_HOST_HANDLER;
return ret;
}

/* Autogen Headers -- Start */
LANTERN_API void* (LANTERN_PTR _lantern__cast_byte_tensor_bool)(void* self, void* non_blocking);
HOST_API void* lantern__cast_byte_tensor_bool(void* self, void* non_blocking) { void* ret = _lantern__cast_byte_tensor_bool(self, non_blocking); LANTERN_HOST_HANDLER return ret; }
Expand Down Expand Up @@ -4263,6 +4272,7 @@ bool lanternInit(const std::string &libPath, std::string *pError)
LOAD_SYMBOL(_lantern_Tensor_names);
LOAD_SYMBOL(_lantern_string_new);
LOAD_SYMBOL(_lantern_string_delete);
LOAD_SYMBOL(_lantern_contrib_torch_sparsemax);
/* Autogen Symbols -- Start */
LOAD_SYMBOL(_lantern__cast_byte_tensor_bool)
LOAD_SYMBOL(_lantern__cast_char_tensor_bool)
Expand Down
133 changes: 133 additions & 0 deletions lantern/src/Contrib/Sparsemax.cpp
@@ -0,0 +1,133 @@
#define LANTERN_BUILD
#include "lantern/lantern.h"
#include <torch/torch.h>
#include <string>
#include <iostream>
#include "../utils.hpp"
#include <torch/torch.h>
#include <stdexcept> // std::out_of_range

using namespace torch::autograd;

// Inherit from Function
class SparseMaxFunction : public Function<SparseMaxFunction> {
public:

static torch::Tensor forward(AutogradContext *ctx, torch::Tensor input, int dim) {

auto input_dim = input.dim();
if (input_dim <= dim || dim < -input_dim)
{
throw std::out_of_range("Dimension out of range");
}

bool needs_reshaping = input_dim > 2;
auto original_size = input.sizes().vec();

if (needs_reshaping)
{
// transpose batch and nth dim
input = input.transpose(0, dim);

// Flatten all dimensions except nth dim
input = input.reshape({input.size(0), -1});

// Transpose flattened dimensions to 0th dim, nth dim to last dim
input = input.transpose(0, -1);
}

// Translate by max for numerical stability
input = input - std::get<0>(input.max(-1, true)).expand_as(input);

auto zs = std::get<0>(input.sort(-1, true));
auto range = torch::arange(1, input.size(-1) + 1);
range = range.expand_as(input).to(input);

// Determine sparsity of projection
auto bound = 1 + range * zs;
auto is_gt = bound.gt(zs.cumsum(-1)).to(input.dtype());
auto k = std::get<0>((is_gt * range).max(-1, true));

// Compute threshold
auto zs_sparse = is_gt * zs;

// Compute taus
auto taus = (zs_sparse.sum(-1, true) - 1) / k;
taus = taus.expand_as(input);

auto output = torch::max(torch::zeros_like(input), input - taus);

// Save context
ctx->save_for_backward({output});
ctx->saved_data["needs_reshaping"] = needs_reshaping;
ctx->saved_data["dim"] = dim;

if (needs_reshaping)
{
// Tranpose flattened dim to last dim, nth dim to 0th dim
output = output.transpose(0, 1);

// Reshape to original size
output = output.reshape(original_size);

// Swap batch dim and nth dim
output = output.transpose(0, dim);
}

return output;
}

static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
auto saved = ctx->get_saved_variables();
auto output = saved[0];
auto grad_output = grad_outputs[0];

bool needs_reshaping = ctx->saved_data["needs_reshaping"].toBool();
int dim = ctx->saved_data["dim"].toInt();
auto original_size = grad_output.sizes().vec();

if (needs_reshaping)
{
// transpose batch and nth dim
grad_output = grad_output.transpose(0, dim);

// Flatten all dimensions except nth dim
grad_output = grad_output.reshape({grad_output.size(0), -1});

// Transpose flattened dimensions to 0th dim, nth dim to last dim
grad_output = grad_output.transpose(0, -1);
}

// Compute gradient
auto nonzeros = torch::ne(output, 0);
auto num_nonzeros = nonzeros.sum(-1, true);
auto sum = (grad_output * nonzeros).sum(-1, true) / num_nonzeros;
auto grad_input = nonzeros * (grad_output - sum.expand_as(grad_output));

if (needs_reshaping)
{
// Tranpose flattened dim to last dim, nth dim to 0th dim
grad_input = grad_input.transpose(0, 1);

// Reshape to original size
grad_input = grad_input.reshape(original_size);

// Swap batch dim and nth dim
grad_input = grad_input.transpose(0, dim);
}

auto o = torch::autograd::variable_list(2);
o[0] = grad_input;

return o;
}
};

void * _lantern_contrib_torch_sparsemax (void * input, int dim)
{
LANTERN_FUNCTION_START
torch::Tensor t = reinterpret_cast<LanternObject<torch::Tensor> *>(input)->get();
torch::Tensor res = SparseMaxFunction::apply(t, dim);
return (void*) new LanternObject<torch::Tensor>(res);
LANTERN_FUNCTION_END
}
19 changes: 19 additions & 0 deletions man/nn_contrib_sparsemax.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/nnf_contrib_sparsemax.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/RcppExports.cpp
Expand Up @@ -282,6 +282,18 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// cpp_contrib_torch_sparsemax
Rcpp::XPtr<XPtrTorchTensor> cpp_contrib_torch_sparsemax(Rcpp::XPtr<XPtrTorchTensor> input, int dim);
RcppExport SEXP _torch_cpp_contrib_torch_sparsemax(SEXP inputSEXP, SEXP dimSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::XPtr<XPtrTorchTensor> >::type input(inputSEXP);
Rcpp::traits::input_parameter< int >::type dim(dimSEXP);
rcpp_result_gen = Rcpp::wrap(cpp_contrib_torch_sparsemax(input, dim));
return rcpp_result_gen;
END_RCPP
}
// cpp_cuda_is_available
bool cpp_cuda_is_available();
RcppExport SEXP _torch_cpp_cuda_is_available() {
Expand Down Expand Up @@ -23836,6 +23848,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_torch_cpp_autograd_node_next_edges", (DL_FUNC) &_torch_cpp_autograd_node_next_edges, 1},
{"_torch_cpp_autograd_edge_function", (DL_FUNC) &_torch_cpp_autograd_edge_function, 1},
{"_torch_cpp_autograd_grad", (DL_FUNC) &_torch_cpp_autograd_grad, 6},
{"_torch_cpp_contrib_torch_sparsemax", (DL_FUNC) &_torch_cpp_contrib_torch_sparsemax, 2},
{"_torch_cpp_cuda_is_available", (DL_FUNC) &_torch_cpp_cuda_is_available, 0},
{"_torch_cpp_cuda_device_count", (DL_FUNC) &_torch_cpp_cuda_device_count, 0},
{"_torch_cpp_cuda_current_device", (DL_FUNC) &_torch_cpp_cuda_current_device, 0},
Expand Down
9 changes: 9 additions & 0 deletions src/contrib.cpp
@@ -0,0 +1,9 @@
#include "torch_types.h"
#include "utils.h"

// [[Rcpp::export]]
Rcpp::XPtr<XPtrTorchTensor> cpp_contrib_torch_sparsemax (Rcpp::XPtr<XPtrTorchTensor> input, int dim)
{
XPtrTorchTensor out = lantern_contrib_torch_sparsemax(input->get(), dim);
return make_xptr<XPtrTorchTensor>(out);
}

0 comments on commit effadbc

Please sign in to comment.