Skip to content

Commit

Permalink
implem dense accessors
Browse files Browse the repository at this point in the history
  • Loading branch information
privefl committed Feb 26, 2024
1 parent 4d87c8d commit b940c7e
Show file tree
Hide file tree
Showing 9 changed files with 262 additions and 3 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: bigsparser
Title: Sparse Matrix Format with Data on Disk
Version: 0.7.0
Version: 0.7.1
Authors@R:
person(given = "Florian", family = "Privé", role = c("aut", "cre"),
email = "florian.prive.21@gmail.com")
Expand All @@ -13,7 +13,7 @@ Description: Provide a sparse matrix format with data stored on disk, to be
License: GPL-3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.2
RoxygenNote: 7.2.3
URL: https://github.com/privefl/bigsparser
BugReports: https://github.com/privefl/bigsparser/issues
Depends: R (>= 3.1)
Expand Down
12 changes: 12 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ getXPtrSFBM_corr_compact <- function(path, n, m, p, first_i) {
.Call(`_bigsparser_getXPtrSFBM_corr_compact`, path, n, m, p, first_i)
}

access_dense_subset <- function(X, ind_row, ind_col) {
.Call(`_bigsparser_access_dense_subset`, X, ind_row, ind_col)
}

access_dense_subset_compact <- function(X, ind_row, ind_col) {
.Call(`_bigsparser_access_dense_subset_compact`, X, ind_row, ind_col)
}

access_dense_subset_corr_compact <- function(X, ind_row, ind_col) {
.Call(`_bigsparser_access_dense_subset_corr_compact`, X, ind_row, ind_col)
}

prodVec <- function(X, y) {
.Call(`_bigsparser_prodVec`, X, y)
}
Expand Down
14 changes: 14 additions & 0 deletions R/SFBM.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ assert_sparse_matrix <- function(spmat) {
#' And some methods:
#' - `$save()`: Save the SFBM object in `$rds`. Returns the SFBM.
#' - `$add_columns()`: Add new columns from a 'dgCMatrix' or a 'dsCMatrix'.
#' - `$dense_acc()`: Equivalent to `as.matrix(.[ind_row, ind_col])`. Use with
#' caution; `ind_row` and `ind_col` must be positive indices within range.
#'
#' @importFrom bigassertr assert_class assert_pos assert_one_int stop2
#' @importFrom methods new
Expand Down Expand Up @@ -137,6 +139,10 @@ SFBM_RC <- methods::setRefClass(
.self
},

dense_acc = function(ind_row, ind_col) {
access_dense_subset(.self, ind_row, ind_col)
},

show = function() {
cat(sprintf(
"A Sparse Filebacked Big Matrix with %s rows and %s columns.\n",
Expand Down Expand Up @@ -280,6 +286,10 @@ SFBM_compact_RC <- methods::setRefClass(
.self
},

dense_acc = function(ind_row, ind_col) {
access_dense_subset_compact(.self, ind_row, ind_col)
},

show = function() {
cat(sprintf(
"A compact Sparse Filebacked Big Matrix with %s rows and %s columns.\n",
Expand Down Expand Up @@ -382,6 +392,10 @@ SFBM_corr_compact_RC <- methods::setRefClass(
.self
},

dense_acc = function(ind_row, ind_col) {
access_dense_subset_corr_compact(.self, ind_row, ind_col)
},

show = function() {
cat(sprintf(
"A compact SFBM correlation with %s rows and %s columns.\n",
Expand Down
2 changes: 2 additions & 0 deletions R/spmat-accessor.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ spmat_accessor <- function(compact, corr = FALSE) {
#' X[1:3, 2:3]
#' X[, 4] # parameter drop is not implemented
#' X[-1, 3:4]
#' X$dense_acc(2:4, 3:4)
#'
#' X2 <- as_SFBM(spmat, compact = TRUE)
#' X2[1:3, 2:3]
#' X2$dense_acc(1:3, 2:3)
#'
setMethod('[', signature(x = "SFBM"), spmat_accessor(compact = FALSE))

Expand Down
2 changes: 2 additions & 0 deletions man/SFBM-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/crochet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,45 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// access_dense_subset
NumericMatrix access_dense_subset(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col);
RcppExport SEXP _bigsparser_access_dense_subset(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Environment >::type X(XSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP);
rcpp_result_gen = Rcpp::wrap(access_dense_subset(X, ind_row, ind_col));
return rcpp_result_gen;
END_RCPP
}
// access_dense_subset_compact
NumericMatrix access_dense_subset_compact(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col);
RcppExport SEXP _bigsparser_access_dense_subset_compact(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Environment >::type X(XSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP);
rcpp_result_gen = Rcpp::wrap(access_dense_subset_compact(X, ind_row, ind_col));
return rcpp_result_gen;
END_RCPP
}
// access_dense_subset_corr_compact
NumericMatrix access_dense_subset_corr_compact(Environment X, const IntegerVector& ind_row, const IntegerVector& ind_col);
RcppExport SEXP _bigsparser_access_dense_subset_corr_compact(SEXP XSEXP, SEXP ind_rowSEXP, SEXP ind_colSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Environment >::type X(XSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_row(ind_rowSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type ind_col(ind_colSEXP);
rcpp_result_gen = Rcpp::wrap(access_dense_subset_corr_compact(X, ind_row, ind_col));
return rcpp_result_gen;
END_RCPP
}
// prodVec
NumericVector prodVec(Environment X, const NumericVector& y);
RcppExport SEXP _bigsparser_prodVec(SEXP XSEXP, SEXP ySEXP) {
Expand Down Expand Up @@ -265,6 +304,9 @@ static const R_CallMethodDef CallEntries[] = {
{"_bigsparser_getXPtrSFBM", (DL_FUNC) &_bigsparser_getXPtrSFBM, 4},
{"_bigsparser_getXPtrSFBM_compact", (DL_FUNC) &_bigsparser_getXPtrSFBM_compact, 5},
{"_bigsparser_getXPtrSFBM_corr_compact", (DL_FUNC) &_bigsparser_getXPtrSFBM_corr_compact, 5},
{"_bigsparser_access_dense_subset", (DL_FUNC) &_bigsparser_access_dense_subset, 3},
{"_bigsparser_access_dense_subset_compact", (DL_FUNC) &_bigsparser_access_dense_subset_compact, 3},
{"_bigsparser_access_dense_subset_corr_compact", (DL_FUNC) &_bigsparser_access_dense_subset_corr_compact, 3},
{"_bigsparser_prodVec", (DL_FUNC) &_bigsparser_prodVec, 2},
{"_bigsparser_cprodVec", (DL_FUNC) &_bigsparser_cprodVec, 2},
{"_bigsparser_corr_prodVec", (DL_FUNC) &_bigsparser_corr_prodVec, 2},
Expand Down
142 changes: 142 additions & 0 deletions src/dense-accessor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/******************************************************************************/

// Enable C++11 via this plugin (Rcpp 0.10.3 or later)
// [[Rcpp::plugins(cpp11)]]

#include <bigsparser/SFBM.h>
#include <bigsparser/SFBM-corr-compact.h>

using namespace Rcpp;

/******************************************************************************/

// [[Rcpp::export]]
NumericMatrix access_dense_subset(Environment X,
const IntegerVector& ind_row,
const IntegerVector& ind_col) {

XPtr<SFBM> sfbm = X["address"];
const NumericVector p = X["p"];
const double * data = sfbm->i_x();
const IntegerVector ind_row2 = ind_row - 1;
int N = sfbm->nrow();
NumericVector cur_col(N);

int n = ind_row.size();
int m = ind_col.size();
NumericMatrix mat(n, m);

for (int j = 0; j < m; j++) {

int j2 = ind_col[j] - 1;

size_t lo = 2 * p[j2];
size_t up = 2 * p[j2 + 1];

// first pass: store all elements from column
for (size_t k = lo; k < up; k += 2) {
int ind = data[k];
double val = data[k + 1];
cur_col[ind] = val;
}

// second pass: read requested indices, and store corresponding values
for (int i = 0; i < n; i++)
mat(i, j) = cur_col[ind_row2[i]];

// third pass: reset column
for (size_t k = lo; k < up; k += 2) {
int ind = data[k];
cur_col[ind] = 0;
}
}

return mat;
}

/******************************************************************************/

// [[Rcpp::export]]
NumericMatrix access_dense_subset_compact(Environment X,
const IntegerVector& ind_row,
const IntegerVector& ind_col) {

XPtr<SFBM> sfbm = X["address"];
const NumericVector p = X["p"];
const IntegerVector first_i = X["first_i"];
const double * data = sfbm->i_x();
const IntegerVector ind_row2 = ind_row - 1;

int n = ind_row.size();
int m = ind_col.size();
NumericMatrix mat(n, m);

for (int j = 0; j < m; j++) {

int j2 = ind_col[j] - 1;

int min_i = first_i[j2];
if (min_i >= 0) {

size_t lo = p[j2];
int nb = p[j2 + 1] - lo;

for (int i = 0; i < n; i++) {

int i2 = ind_row2[i];
if (i2 < min_i) continue; // before range
int shift = i2 - min_i;
if (shift >= nb) continue; // after range

mat(i, j) = data[lo + shift];
}
}
}

return mat;
}

/******************************************************************************/

// [[Rcpp::export]]
NumericMatrix access_dense_subset_corr_compact(Environment X,
const IntegerVector& ind_row,
const IntegerVector& ind_col) {

XPtr<SFBM_corr_compact> sfbm = X["address"];
const NumericVector p = X["p"];
const IntegerVector first_i = X["first_i"];
const int16_t * data = sfbm->i_x();
const IntegerVector ind_row2 = ind_row - 1;

int n = ind_row.size();
int m = ind_col.size();
NumericMatrix mat(n, m);

for (int j = 0; j < m; j++) {

int j2 = ind_col[j] - 1;

int min_i = first_i[j2];
if (min_i >= 0) {

size_t lo = p[j2];
int nb = p[j2 + 1] - lo;

for (int i = 0; i < n; i++) {

int i2 = ind_row2[i];
if (i2 < min_i) continue; // before range
int shift = i2 - min_i;
if (shift >= nb) continue; // after range

int16_t val = data[lo + shift];
mat(i, j) = val / 32767.0;
}
}
}

return mat;
}

/******************************************************************************/
45 changes: 44 additions & 1 deletion tests/testthat/test-accessor.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test_that("sparse matrix accessors work", {
spmat[3, 4] <- 7

list_ind_row <- list_ind_col <-
list(2, 1:5, c(TRUE, FALSE, TRUE, FALSE, TRUE), -3, -(3:5), rep(1, 5))
list(2, 1:5, c(TRUE, FALSE, TRUE, FALSE, TRUE), -3, -(3:5), sample(5, replace = TRUE))

for (compact in c(TRUE, FALSE)) {
X <- as_SFBM(spmat, compact = compact)
Expand All @@ -33,6 +33,49 @@ test_that("sparse matrix accessors work", {
expect_identical(X[], spmat)
expect_identical(X[,], spmat)
}

spmat2 <- Matrix::rsparsematrix(5, 5, density = 0.5, symmetric = TRUE)
diag(spmat2) <- max(abs(spmat2)); spmat3 <- Matrix::cov2cor(spmat2)
X2 <- as_SFBM_corr_compact(spmat3)
for (ind_row in list_ind_row) {
for (ind_col in list_ind_col) {
expect_lt(max(abs(X2[ind_row, ind_col] - spmat3[ind_row, ind_col, drop = FALSE])),
1 / 32767)
expect_error(X[ind_row, ind_col, drop = FALSE], "not implemented")
}
}
})

################################################################################

test_that("dense matrix accessors work", {

spmat <- Matrix::Diagonal(5, c(1, 0:3))
spmat[4, 3] <- 5
spmat[1, 4] <- 6
spmat[3, 4] <- 7

list_ind_row <- list_ind_col <- list(2, 1:5, sample(5, replace = TRUE))

for (compact in c(TRUE, FALSE)) {
X <- as_SFBM(spmat, compact = compact)
for (ind_row in list_ind_row) {
for (ind_col in list_ind_col) {
expect_identical(X$dense_acc(ind_row, ind_col),
as.matrix(spmat[ind_row, ind_col, drop = FALSE]))
}
}
}

spmat2 <- Matrix::rsparsematrix(10, 10, density = 0.5, symmetric = TRUE)
diag(spmat2) <- 10
X2 <- as_SFBM_corr_compact(Matrix::cov2cor(spmat2))
for (ind_row in list_ind_row) {
for (ind_col in list_ind_col) {
expect_identical(X2$dense_acc(ind_row, ind_col),
as.matrix(X2[ind_row, ind_col]))
}
}
})

################################################################################

0 comments on commit b940c7e

Please sign in to comment.