From d46e39848c74c86b2c391e0f94e4d41f67a3fb3e Mon Sep 17 00:00:00 2001 From: Paul Staab Date: Mon, 5 Oct 2015 14:49:36 +0200 Subject: [PATCH] fix calculation of sfs with an outgroup present fixes #96 --- DESCRIPTION | 4 ++-- NEWS | 10 ++++++++-- R/model_getters.R | 19 +++++++++++++++---- R/sumstat_sfs.R | 6 +----- tests/testthat/test-model-class.R | 11 ++++++++++- tests/testthat/test-sumstat-jsfs.R | 27 +++++++++++++++++++++++++-- tests/testthat/test-sumstat-sfs.R | 20 ++++++++++++++++++++ 7 files changed, 81 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b7b2ffc..93fc342 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: coala -Version: 0.1.1.9005 -Date: 2015-09-15 +Version: 0.1.1.9006 +Date: 2015-10-05 License: MIT + file LICENSE Title: A Framework for Coalescent Simulation Authors@R: c( diff --git a/NEWS b/NEWS index 053a9d3..e8f5fa0 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +coala 0.1.1.9006 +---------------- + +* Fix site frequency calculation when an outgroup is present (#96) + + coala 0.1.1.9005 ---------------- @@ -8,7 +14,7 @@ coala 0.1.1.9004 ---------------- * Fix: Remove SNPs that were not polymorphic in the given population before - calulating EHH based statistics. + calculating EHH based statistics. coala 0.1.1.9003 @@ -23,7 +29,7 @@ coala 0.1.1.9002 ---------------- * Fixes buggy iHS calculation when `max_snps` and `position` was used - simultainously. The argument `max_snps` is now ignored if a position + simultaneously. The argument `max_snps` is now ignored if a position is given (#98). diff --git a/R/model_getters.R b/R/model_getters.R index 2a78dee..7ae4b31 100644 --- a/R/model_getters.R +++ b/R/model_getters.R @@ -128,15 +128,26 @@ get_locus_number <- function(model, group = NA, ignore_variation = FALSE) { #' @describeIn get_features Returns the index of the individuals of one -#' population +#' population. Ignores outgroups, so that it can be used for indexing +#' segregating sites. #' @param zero_indexed If true, the names of the populations are started from #' 0 instead of from 1. +#' @param allow_outgroup If set to false, an error is thrown if \code{pop} is +#' marked as outgroup. #' @export -get_population_indiviuals <- function(model, pop, zero_indexed = FALSE) { - if (pop == "all") return(1:sum(get_sample_size(model))) +get_population_indiviuals <- function(model, pop, + zero_indexed = FALSE) { + sample_size <- get_sample_size(model) + outgroup <- get_outgroup(model) + + if (!is.na(outgroup)) { + if (pop == outgroup) stop("Calculating summary statistics for the outgroup") + sample_size[outgroup] <- 0 + } + + if (pop == "all") return(1:sum(sample_size)) if (!pop %in% get_populations(model)) stop("Invalid population") - sample_size <- get_sample_size(model) from <- cumsum(c(0, sample_size)) + 1 to <- cumsum(sample_size) from[pop]:to[pop] diff --git a/R/sumstat_sfs.R b/R/sumstat_sfs.R index 4191a9e..1b2a4a2 100644 --- a/R/sumstat_sfs.R +++ b/R/sumstat_sfs.R @@ -11,11 +11,7 @@ stat_sfs_class <- R6Class("stat_sfs", inherit = sumstat_class, super$initialize(name) }, calculate = function(seg_sites, trees, files, model) { - if ("all" %in% private$population) { - individuals <- 1:sum(get_sample_size(model)) - } else { - individuals <- get_population_indiviuals(model, private$population) - } + individuals <- get_population_indiviuals(model, private$population) sfs <- as.vector(calc_jsfs(seg_sites, individuals, numeric())) sfs[c(-1, -length(sfs))] } diff --git a/tests/testthat/test-model-class.R b/tests/testthat/test-model-class.R index 1b1d087..4d5d088 100644 --- a/tests/testthat/test-model-class.R +++ b/tests/testthat/test-model-class.R @@ -151,12 +151,21 @@ test_that("get population individuals works", { expect_error(get_population_indiviuals(model_theta_tau(), 3)) expect_error(get_population_indiviuals(model_theta_tau(), "al")) + # With an outgroup + expect_equal(get_population_indiviuals(model_hky(), "all"), 1:6) expect_equal(get_population_indiviuals(model_hky(), 1), 1:3) expect_equal(get_population_indiviuals(model_hky(), 2), 4:6) - expect_equal(get_population_indiviuals(model_hky(), 3), 7) + expect_error(get_population_indiviuals(model_hky(), 3)) + + model <- coal_model(1:3) + feat_outgroup(2) + expect_equal(get_population_indiviuals(model, "all"), 1:4) + expect_equal(get_population_indiviuals(model, 1), 1) + expect_error(get_population_indiviuals(model, 2)) + expect_equal(get_population_indiviuals(model, 3), 2:4) }) + test_that("getting the ploidy and individuals works", { model <- model_theta_tau() expect_equal(get_ploidy(model), 1L) diff --git a/tests/testthat/test-sumstat-jsfs.R b/tests/testthat/test-sumstat-jsfs.R index 239f49a..fe728c1 100644 --- a/tests/testthat/test-sumstat-jsfs.R +++ b/tests/testthat/test-sumstat-jsfs.R @@ -84,8 +84,8 @@ test_that("calc_jsfs works with trios", { test_that("JSFS sumstat works", { - stat <- sumstat_jsfs("jsfs_test", c(1,2)) - model <- coal_model(c(2,2), 1) + stat <- sumstat_jsfs("jsfs_test", c(1, 2)) + model <- coal_model(c(2, 2), 1) seg_sites <- list(matrix(c(1, 0, 0, 0, 1, 1, 0, 1, @@ -99,3 +99,26 @@ test_that("JSFS sumstat works", { 1, 0, 1, 0, 0, 1), 3, 3, byrow = TRUE)) }) + + +test_that("JSFS is caluculated with an outgroup present", { + stat <- sumstat_jsfs("jsfs", c(1, 2)) + model <- coal_model(c(2, 1, 1), 1) + feat_outgroup(3) + seg_sites <- list(matrix(c(1, 0, 0, 0, + 1, 1, 0, 1, + 1, 0, 0, 1), 4, 4, byrow = TRUE)) + + expect_equal(stat$calculate(seg_sites, NULL, NULL, model), + matrix(c(1, 0, + 1, 1, + 0, 1), 3, 2, byrow = TRUE)) + + model <- coal_model(c(2, 1, 1), 1) + feat_outgroup(2) + expect_error(stat$calculate(seg_sites, NULL, NULL, model)) + + stat <- sumstat_jsfs("jsfs", c(1, 3)) + expect_equal(stat$calculate(seg_sites, NULL, NULL, model), + matrix(c(1, 0, + 1, 1, + 0, 1), 3, 2, byrow = TRUE)) +}) diff --git a/tests/testthat/test-sumstat-sfs.R b/tests/testthat/test-sumstat-sfs.R index e70e86f..d1c5086 100644 --- a/tests/testthat/test-sumstat-sfs.R +++ b/tests/testthat/test-sumstat-sfs.R @@ -48,3 +48,23 @@ test_that("calculation of sfs works with trios", { stat_sfs_all <- sumstat_sfs(population = "all") expect_equal(stat_sfs_all$calculate(seg.sites, NULL, NULL, model), c(1, 0, 1)) }) + + +test_that("SFS is calculate with an outgroup present", { + model <- coal_model(c(2, 1, 1), 1) + feat_outgroup(3) + stat <- sumstat_sfs("sfs", "all") + + seg_sites <- list(matrix(c(1, 0, 0, 0, + 1, 1, 0, 1, + 1, 0, 0, 1), 3, 4, byrow = TRUE)) + expect_equal(stat$calculate(seg_sites, NULL, NULL, model), c(1, 1)) + + stat <- sumstat_sfs("jsfs", 3) + expect_error(stat$calculate(seg_sites, NULL, NULL, model)) + + model <- coal_model(c(2, 1, 1), 1) + feat_outgroup(2) + expect_equal(sumstat_sfs("jsfs", 1)$calculate(seg_sites, NULL, NULL, model), + 2) + expect_equal(sumstat_sfs("jsfs", 3)$calculate(seg_sites, NULL, NULL, model), + numeric(0)) +})