diff --git a/DESCRIPTION b/DESCRIPTION index e36fe06..2fd5edf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bigsparser Title: Sparse Matrix Format with Data on Disk -Version: 0.7.0 +Version: 0.7.1 Authors@R: person(given = "Florian", family = "Privé", role = c("aut", "cre"), email = "florian.prive.21@gmail.com") @@ -13,7 +13,7 @@ Description: Provide a sparse matrix format with data stored on disk, to be License: GPL-3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 URL: https://github.com/privefl/bigsparser BugReports: https://github.com/privefl/bigsparser/issues Depends: R (>= 3.1) diff --git a/R/RcppExports.R b/R/RcppExports.R index 1c40900..cbdb940 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -13,6 +13,18 @@ getXPtrSFBM_corr_compact <- function(path, n, m, p, first_i) { .Call(`_bigsparser_getXPtrSFBM_corr_compact`, path, n, m, p, first_i) } +access_dense_subset <- function(X, ind_row, ind_col) { + .Call(`_bigsparser_access_dense_subset`, X, ind_row, ind_col) +} + +access_dense_subset_compact <- function(X, ind_row, ind_col) { + .Call(`_bigsparser_access_dense_subset_compact`, X, ind_row, ind_col) +} + +access_dense_subset_corr_compact <- function(X, ind_row, ind_col) { + .Call(`_bigsparser_access_dense_subset_corr_compact`, X, ind_row, ind_col) +} + prodVec <- function(X, y) { .Call(`_bigsparser_prodVec`, X, y) } diff --git a/R/SFBM.R b/R/SFBM.R index 2e5d5c1..d6f1e07 100644 --- a/R/SFBM.R +++ b/R/SFBM.R @@ -39,6 +39,8 @@ assert_sparse_matrix <- function(spmat) { #' And some methods: #' - `$save()`: Save the SFBM object in `$rds`. Returns the SFBM. #' - `$add_columns()`: Add new columns from a 'dgCMatrix' or a 'dsCMatrix'. +#' - `$dense_acc()`: Equivalent to `as.matrix(.[ind_row, ind_col])`. Use with +#' caution; `ind_row` and `ind_col` must be positive indices within range. #' #' @importFrom bigassertr assert_class assert_pos assert_one_int stop2 #' @importFrom methods new @@ -137,6 +139,10 @@ SFBM_RC <- methods::setRefClass( .self }, + dense_acc = function(ind_row, ind_col) { + access_dense_subset(.self, ind_row, ind_col) + }, + show = function() { cat(sprintf( "A Sparse Filebacked Big Matrix with %s rows and %s columns.\n", @@ -280,6 +286,10 @@ SFBM_compact_RC <- methods::setRefClass( .self }, + dense_acc = function(ind_row, ind_col) { + access_dense_subset_compact(.self, ind_row, ind_col) + }, + show = function() { cat(sprintf( "A compact Sparse Filebacked Big Matrix with %s rows and %s columns.\n", @@ -382,6 +392,10 @@ SFBM_corr_compact_RC <- methods::setRefClass( .self }, + dense_acc = function(ind_row, ind_col) { + access_dense_subset_corr_compact(.self, ind_row, ind_col) + }, + show = function() { cat(sprintf( "A compact SFBM correlation with %s rows and %s columns.\n", diff --git a/R/spmat-accessor.R b/R/spmat-accessor.R index 90799d4..8264d6b 100644 --- a/R/spmat-accessor.R +++ b/R/spmat-accessor.R @@ -88,9 +88,11 @@ spmat_accessor <- function(compact, corr = FALSE) { #' X[1:3, 2:3] #' X[, 4] # parameter drop is not implemented #' X[-1, 3:4] +#' X$dense_acc(2:4, 3:4) #' #' X2 <- as_SFBM(spmat, compact = TRUE) #' X2[1:3, 2:3] +#' X2$dense_acc(1:3, 2:3) #' setMethod('[', signature(x = "SFBM"), spmat_accessor(compact = FALSE)) diff --git a/man/SFBM-class.Rd b/man/SFBM-class.Rd index 6105b62..2cadc8d 100644 --- a/man/SFBM-class.Rd +++ b/man/SFBM-class.Rd @@ -48,6 +48,8 @@ And some methods: \itemize{ \item \verb{$save()}: Save the SFBM object in \verb{$rds}. Returns the SFBM. \item \verb{$add_columns()}: Add new columns from a 'dgCMatrix' or a 'dsCMatrix'. +\item \verb{$dense_acc()}: Equivalent to \code{as.matrix(.[ind_row, ind_col])}. Use with +caution; \code{ind_row} and \code{ind_col} must be positive indices within range. } } diff --git a/man/crochet.Rd b/man/crochet.Rd index 506627a..22388a5 100644 --- a/man/crochet.Rd +++ b/man/crochet.Rd @@ -39,8 +39,10 @@ X <- as_SFBM(spmat) X[1:3, 2:3] X[, 4] # parameter drop is not implemented X[-1, 3:4] +X$dense_acc(2:4, 3:4) X2 <- as_SFBM(spmat, compact = TRUE) X2[1:3, 2:3] +X2$dense_acc(1:3, 2:3) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d50d523..0924d09 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -55,6 +55,45 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// access_dense_subset +NumericMatrix access_dense_subset(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col); +RcppExport SEXP _bigsparser_access_dense_subset(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Environment >::type X(XSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP); + rcpp_result_gen = Rcpp::wrap(access_dense_subset(X, ind_row, ind_col)); + return rcpp_result_gen; +END_RCPP +} +// access_dense_subset_compact +NumericMatrix access_dense_subset_compact(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col); +RcppExport SEXP _bigsparser_access_dense_subset_compact(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Environment >::type X(XSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP); + rcpp_result_gen = Rcpp::wrap(access_dense_subset_compact(X, ind_row, ind_col)); + return rcpp_result_gen; +END_RCPP +} +// access_dense_subset_corr_compact +NumericMatrix access_dense_subset_corr_compact(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col); +RcppExport SEXP _bigsparser_access_dense_subset_corr_compact(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Environment >::type X(XSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP); + rcpp_result_gen = Rcpp::wrap(access_dense_subset_corr_compact(X, ind_row, ind_col)); + return rcpp_result_gen; +END_RCPP +} // prodVec NumericVector prodVec(Environment X, const NumericVector& y); RcppExport SEXP _bigsparser_prodVec(SEXP XSEXP, SEXP ySEXP) { @@ -265,6 +304,9 @@ static const R_CallMethodDef CallEntries[] = { {"_bigsparser_getXPtrSFBM", (DL_FUNC) &_bigsparser_getXPtrSFBM, 4}, {"_bigsparser_getXPtrSFBM_compact", (DL_FUNC) &_bigsparser_getXPtrSFBM_compact, 5}, {"_bigsparser_getXPtrSFBM_corr_compact", (DL_FUNC) &_bigsparser_getXPtrSFBM_corr_compact, 5}, + {"_bigsparser_access_dense_subset", (DL_FUNC) &_bigsparser_access_dense_subset, 3}, + {"_bigsparser_access_dense_subset_compact", (DL_FUNC) &_bigsparser_access_dense_subset_compact, 3}, + {"_bigsparser_access_dense_subset_corr_compact", (DL_FUNC) &_bigsparser_access_dense_subset_corr_compact, 3}, {"_bigsparser_prodVec", (DL_FUNC) &_bigsparser_prodVec, 2}, {"_bigsparser_cprodVec", (DL_FUNC) &_bigsparser_cprodVec, 2}, {"_bigsparser_corr_prodVec", (DL_FUNC) &_bigsparser_corr_prodVec, 2}, diff --git a/src/dense-accessor.cpp b/src/dense-accessor.cpp new file mode 100644 index 0000000..460fd4a --- /dev/null +++ b/src/dense-accessor.cpp @@ -0,0 +1,142 @@ +/******************************************************************************/ + +// Enable C++11 via this plugin (Rcpp 0.10.3 or later) +// [[Rcpp::plugins(cpp11)]] + +#include +#include + +using namespace Rcpp; + +/******************************************************************************/ + +// [[Rcpp::export]] +NumericMatrix access_dense_subset(Environment X, + const IntegerVector& ind_row, + const IntegerVector& ind_col) { + + XPtr sfbm = X["address"]; + const NumericVector p = X["p"]; + const double * data = sfbm->i_x(); + const IntegerVector ind_row2 = ind_row - 1; + int N = sfbm->nrow(); + NumericVector cur_col(N); + + int n = ind_row.size(); + int m = ind_col.size(); + NumericMatrix mat(n, m); + + for (int j = 0; j < m; j++) { + + int j2 = ind_col[j] - 1; + + size_t lo = 2 * p[j2]; + size_t up = 2 * p[j2 + 1]; + + // first pass: store all elements from column + for (size_t k = lo; k < up; k += 2) { + int ind = data[k]; + double val = data[k + 1]; + cur_col[ind] = val; + } + + // second pass: read requested indices, and store corresponding values + for (int i = 0; i < n; i++) + mat(i, j) = cur_col[ind_row2[i]]; + + // third pass: reset column + for (size_t k = lo; k < up; k += 2) { + int ind = data[k]; + cur_col[ind] = 0; + } + } + + return mat; +} + +/******************************************************************************/ + +// [[Rcpp::export]] +NumericMatrix access_dense_subset_compact(Environment X, + const IntegerVector& ind_row, + const IntegerVector& ind_col) { + + XPtr sfbm = X["address"]; + const NumericVector p = X["p"]; + const IntegerVector first_i = X["first_i"]; + const double * data = sfbm->i_x(); + const IntegerVector ind_row2 = ind_row - 1; + + int n = ind_row.size(); + int m = ind_col.size(); + NumericMatrix mat(n, m); + + for (int j = 0; j < m; j++) { + + int j2 = ind_col[j] - 1; + + int min_i = first_i[j2]; + if (min_i >= 0) { + + size_t lo = p[j2]; + int nb = p[j2 + 1] - lo; + + for (int i = 0; i < n; i++) { + + int i2 = ind_row2[i]; + if (i2 < min_i) continue; // before range + int shift = i2 - min_i; + if (shift >= nb) continue; // after range + + mat(i, j) = data[lo + shift]; + } + } + } + + return mat; +} + +/******************************************************************************/ + +// [[Rcpp::export]] +NumericMatrix access_dense_subset_corr_compact(Environment X, + const IntegerVector& ind_row, + const IntegerVector& ind_col) { + + XPtr sfbm = X["address"]; + const NumericVector p = X["p"]; + const IntegerVector first_i = X["first_i"]; + const int16_t * data = sfbm->i_x(); + const IntegerVector ind_row2 = ind_row - 1; + + int n = ind_row.size(); + int m = ind_col.size(); + NumericMatrix mat(n, m); + + for (int j = 0; j < m; j++) { + + int j2 = ind_col[j] - 1; + + int min_i = first_i[j2]; + if (min_i >= 0) { + + size_t lo = p[j2]; + int nb = p[j2 + 1] - lo; + + for (int i = 0; i < n; i++) { + + int i2 = ind_row2[i]; + if (i2 < min_i) continue; // before range + int shift = i2 - min_i; + if (shift >= nb) continue; // after range + + int16_t val = data[lo + shift]; + mat(i, j) = val / 32767.0; + } + } + } + + return mat; +} + +/******************************************************************************/ diff --git a/tests/testthat/test-accessor.R b/tests/testthat/test-accessor.R index f4871c7..5bc4257 100644 --- a/tests/testthat/test-accessor.R +++ b/tests/testthat/test-accessor.R @@ -19,7 +19,7 @@ test_that("sparse matrix accessors work", { spmat[3, 4] <- 7 list_ind_row <- list_ind_col <- - list(2, 1:5, c(TRUE, FALSE, TRUE, FALSE, TRUE), -3, -(3:5), rep(1, 5)) + list(2, 1:5, c(TRUE, FALSE, TRUE, FALSE, TRUE), -3, -(3:5), sample(5, replace = TRUE)) for (compact in c(TRUE, FALSE)) { X <- as_SFBM(spmat, compact = compact) @@ -33,6 +33,49 @@ test_that("sparse matrix accessors work", { expect_identical(X[], spmat) expect_identical(X[,], spmat) } + + spmat2 <- Matrix::rsparsematrix(5, 5, density = 0.5, symmetric = TRUE) + diag(spmat2) <- max(abs(spmat2)); spmat3 <- Matrix::cov2cor(spmat2) + X2 <- as_SFBM_corr_compact(spmat3) + for (ind_row in list_ind_row) { + for (ind_col in list_ind_col) { + expect_lt(max(abs(X2[ind_row, ind_col] - spmat3[ind_row, ind_col, drop = FALSE])), + 1 / 32767) + expect_error(X[ind_row, ind_col, drop = FALSE], "not implemented") + } + } +}) + +################################################################################ + +test_that("dense matrix accessors work", { + + spmat <- Matrix::Diagonal(5, c(1, 0:3)) + spmat[4, 3] <- 5 + spmat[1, 4] <- 6 + spmat[3, 4] <- 7 + + list_ind_row <- list_ind_col <- list(2, 1:5, sample(5, replace = TRUE)) + + for (compact in c(TRUE, FALSE)) { + X <- as_SFBM(spmat, compact = compact) + for (ind_row in list_ind_row) { + for (ind_col in list_ind_col) { + expect_identical(X$dense_acc(ind_row, ind_col), + as.matrix(spmat[ind_row, ind_col, drop = FALSE])) + } + } + } + + spmat2 <- Matrix::rsparsematrix(10, 10, density = 0.5, symmetric = TRUE) + diag(spmat2) <- 10 + X2 <- as_SFBM_corr_compact(Matrix::cov2cor(spmat2)) + for (ind_row in list_ind_row) { + for (ind_col in list_ind_col) { + expect_identical(X2$dense_acc(ind_row, ind_col), + as.matrix(X2[ind_row, ind_col])) + } + } }) ################################################################################