From 0f598584d3736800c6f86f00b6ab69b62218a317 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Feb 2025 12:07:09 -0800 Subject: [PATCH 1/2] add wts argument to sparse_mean() --- R/sparse_mean.R | 20 ++++++++++++--- man/sparse_mean.Rd | 4 ++- tests/testthat/test-sparse_mean.R | 41 +++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/R/sparse_mean.R b/R/sparse_mean.R index 615ed9a..0bdaac6 100644 --- a/R/sparse_mean.R +++ b/R/sparse_mean.R @@ -1,8 +1,10 @@ #' Calculate mean from sparse vectors #' #' @param x A sparse numeric vector. +#' @param wts A numeric vector, should be same length as `x`. #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. #' +#' #' @details #' This function, as with any of the other helper functions assumes that the #' input `x` is a sparse numeric vector. This is done for performance reasons, @@ -33,7 +35,11 @@ #' ) #' #' @export -sparse_mean <- function(x, na_rm = FALSE) { +sparse_mean <- function(x, wts = NULL, na_rm = FALSE) { + if (!is.null(wts)) { + x <- sparse_multiplication(x, wts) + } + default <- sparse_default(x) values <- sparse_values(x) len_values <- length(values) @@ -46,7 +52,7 @@ sparse_mean <- function(x, na_rm = FALSE) { res <- sum(values, na.rm = na_rm) - if (default != 0) { + if (!is.na(default) && default != 0) { res <- res + (x_len - len_values) * default } @@ -54,7 +60,15 @@ sparse_mean <- function(x, na_rm = FALSE) { x_len <- x_len - sum(is.na(values)) } - res <- res / x_len + if (is.null(wts)) { + res <- res / x_len + } else { + na_loc <- sparse_which_na(x) + if (length(na_loc) > 0) { + wts <- wts[-na_loc] + } + res <- res / sum(wts) + } res } diff --git a/man/sparse_mean.Rd b/man/sparse_mean.Rd index 818f72d..658402b 100644 --- a/man/sparse_mean.Rd +++ b/man/sparse_mean.Rd @@ -4,11 +4,13 @@ \alias{sparse_mean} \title{Calculate mean from sparse vectors} \usage{ -sparse_mean(x, na_rm = FALSE) +sparse_mean(x, wts = NULL, na_rm = FALSE) } \arguments{ \item{x}{A sparse numeric vector.} +\item{wts}{A numeric vector, should be same length as \code{x}.} + \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} } \value{ diff --git a/tests/testthat/test-sparse_mean.R b/tests/testthat/test-sparse_mean.R index e0e2c02..ad6a2fa 100644 --- a/tests/testthat/test-sparse_mean.R +++ b/tests/testthat/test-sparse_mean.R @@ -35,3 +35,44 @@ test_that("sparse_mean() works", { expect_equal(mean(x), sparse_mean(x)) }) + +test_that("sparse_mean() works with wts argument", { + x <- sparse_double(10, 5, 1000) + wts <- (1:1000)[] + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) + + x <- sparse_double(c(10, -10), c(5, 100), 1000) + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal( + weighted.mean(x, wts, na.rm = TRUE), + sparse_mean(x, wts = wts, na_rm = TRUE) + ) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal( + weighted.mean(x, wts, na.rm = TRUE), + sparse_mean(x, wts = wts, na_rm = TRUE) + ) + + x <- sparse_double(numeric(), integer(), 1000) + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) + + x <- sparse_double(numeric(), integer(), 1000, default = 100) + + expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts)) +}) From a7c8e5e1fc9f70d9979871584a73e582b0afeaca Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Feb 2025 12:26:27 -0800 Subject: [PATCH 2/2] update description and news --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 16db70a..48dca72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sparsevctrs Title: Sparse Vectors for Use in Data Frames -Version: 0.2.0.9003 +Version: 0.2.0.9004 Authors@R: c( person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")), diff --git a/NEWS.md b/NEWS.md index ce984ad..cc65782 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ * Adding the arithmatic function `sparse_multiplication()`. (#93) +* Adding `wts` argument to `sparse_mean()`. (#95) + # sparsevctrs 0.2.0 ## New Functions