Skip to content

Commit

Permalink
rename & modify omega prime to MCMF
Browse files Browse the repository at this point in the history
  • Loading branch information
paulstaab committed Jul 31, 2015
1 parent e2ba765 commit aa0e1e7
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 73 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: coala
Version: 0.1.1.9000
Version: 0.1.0.9001
Date: 2015-07-17
License: MIT + file LICENSE
Title: A Framework for Coalescent Simulation
Expand Down Expand Up @@ -77,9 +77,9 @@ Collate:
'sumstat_four_gamete.R'
'sumstat_ihh.R'
'sumstat_jsfs.R'
'sumstat_mcmf.R'
'sumstat_nsl.R'
'sumstat_nucleotide_div.R'
'sumstat_omegaprime.R'
'sumstat_seg_sites.R'
'sumstat_sfs.R'
'sumstat_tajimas_d.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ export(sumstat_file)
export(sumstat_four_gamete)
export(sumstat_ihh)
export(sumstat_jsfs)
export(sumstat_mcmf)
export(sumstat_nSL)
export(sumstat_nucleotide_div)
export(sumstat_seg_sites)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
coala 0.1.1.9000
----------------

* Add MCMF summary statistic ().


coala 0.1.1
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ calc_nucleotide_div <- function(seg_sites, individuals) {
.Call('coala_calc_nucleotide_div', PACKAGE = 'coala', seg_sites, individuals)
}

calc_omegaprime <- function(seg_sites, individuals) {
.Call('coala_calc_omegaprime', PACKAGE = 'coala', seg_sites, individuals)
calc_mcmf <- function(seg_sites, individuals, has_trios = TRUE) {
.Call('coala_calc_mcmf', PACKAGE = 'coala', seg_sites, individuals, has_trios)
}

unphase_segsites <- function(seg_sites, ploidy, samples_per_ind) {
Expand Down
31 changes: 31 additions & 0 deletions R/sumstat_mcmf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' @importFrom R6 R6Class
stat_mcmf_class <- R6Class("stat_mcmf", inherit = sumstat_class,
private = list(
population = NULL,
req_segsites = TRUE
),
public = list(
initialize = function(name, population) {
assert_that(is.numeric(population))
assert_that(length(population) == 1)
private$population <- population
super$initialize(name)
},
calculate = function(seg_sites, trees, files, model) {
calc_mcmf(seg_sites,
get_population_indiviuals(model, private$population),
has_trios(model))
}
)
)

#' The MCMF Summary Statistic
#'
#' When adding this to a model, the "Most Common Mutation Frequecy" summary
#' statistic is calculated from the simulation output.
#'
#' @inheritParams sumstat_four_gamete
#' @export
sumstat_mcmf <- function(name = "mcmf", population = 1) {
stat_mcmf_class$new(name, population)
}
26 changes: 0 additions & 26 deletions R/sumstat_omegaprime.R

This file was deleted.

13 changes: 7 additions & 6 deletions man/sumstat_omegaprime.Rd → man/sumstat_mcmf.Rd
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/sumstat_omegaprime.R
\name{sumstat_omegaprime}
\alias{sumstat_omegaprime}
\title{Calculates the (experimental) Omega" Statistic}
% Please edit documentation in R/sumstat_mcmf.R
\name{sumstat_mcmf}
\alias{sumstat_mcmf}
\title{The MCMF Summary Statistic}
\usage{
sumstat_omegaprime(name = "omegaprime", population = 1)
sumstat_mcmf(name = "mcmf", population = 1)
}
\arguments{
\item{name}{The name of the summary statistic. When simulating a model,
Expand All @@ -14,6 +14,7 @@ with this name. Summary statistic names must be unique in a model.}
\item{population}{The population for which the statistic is calculated.}
}
\description{
Calculates the (experimental) Omega" Statistic
When adding this to a model, the "Most Common Mutation Frequecy" summary
statistic is calculated from the simulation output.
}

13 changes: 7 additions & 6 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,16 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// calc_omegaprime
NumericVector calc_omegaprime(List seg_sites, NumericVector individuals);
RcppExport SEXP coala_calc_omegaprime(SEXP seg_sitesSEXP, SEXP individualsSEXP) {
// calc_mcmf
NumericVector calc_mcmf(const List seg_sites, const NumericVector individuals, const bool has_trios);
RcppExport SEXP coala_calc_mcmf(SEXP seg_sitesSEXP, SEXP individualsSEXP, SEXP has_triosSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< List >::type seg_sites(seg_sitesSEXP);
Rcpp::traits::input_parameter< NumericVector >::type individuals(individualsSEXP);
__result = Rcpp::wrap(calc_omegaprime(seg_sites, individuals));
Rcpp::traits::input_parameter< const List >::type seg_sites(seg_sitesSEXP);
Rcpp::traits::input_parameter< const NumericVector >::type individuals(individualsSEXP);
Rcpp::traits::input_parameter< const bool >::type has_trios(has_triosSEXP);
__result = Rcpp::wrap(calc_mcmf(seg_sites, individuals, has_trios));
return __result;
END_RCPP
}
Expand Down
52 changes: 36 additions & 16 deletions src/stat_omegaprime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,33 @@
using namespace Rcpp;


int maxsplit(const NumericMatrix ss,
const int trio_locus,
const NumericVector individuals) {
void maxsplit(const NumericMatrix ss,
const int trio_locus,
const NumericVector individuals,
int & max_number,
int & snp_number) {

NumericVector trio_locus_vec = getTrioLocus(ss);

std::map<unsigned int,unsigned int> m;
std::map<unsigned int, unsigned int> m;
unsigned int key;
int max_key_value = std::pow(2, individuals.size());

for (int snp = 0; snp < ss.ncol(); ++snp) {
if (trio_locus_vec[snp] != trio_locus) continue;

// For each SNP, create a binary representation of the SNP...
key=0;
key = 0;
for(int k=1; k < individuals.size(); ++k) {
key *= 2;
key += (ss(individuals[k]-1, snp) != ss(individuals[0]-1, snp));
}

// Ignore snps that are not segregating in individuals
if (key == 0) continue;
// Ignore snps that are not segregating in the given individuals
if (key == 0 || key == max_key_value) continue;

// and increase the corresponding counter
++snp_number;
std::map<unsigned int,unsigned int>::iterator p=m.find(key);
if(p==m.end()) {
m[key]=1;
Expand All @@ -36,30 +40,46 @@ int maxsplit(const NumericMatrix ss,
}

// Return the maximal counter
unsigned int maxi=0;
unsigned int maxi = 0;
for (std::map<unsigned int,unsigned int>::iterator p=m.begin(); p!=m.end(); ++p) {
if(p->second > maxi) maxi = p->second;
}
return maxi;
max_number += maxi;
}


// [[Rcpp::export]]
NumericVector calc_omegaprime(List seg_sites, NumericVector individuals) {
NumericVector calc_mcmf(const List seg_sites,
const NumericVector individuals,
const bool has_trios = true) {

size_t n_loci = seg_sites.size();
NumericVector omega_prime(n_loci);
NumericVector mcmf(n_loci);

NumericMatrix ss;
int max_split = 0, snp_number = 0, ignore_result = 0;

for (size_t locus = 0; locus < n_loci; ++locus) {
ss = as<NumericMatrix>(seg_sites[locus]);
if (max(individuals) > ss.nrow()) stop("Invalid individuals");
if (ss.ncol() == 0) omega_prime[locus] = NA_REAL;
else {
omega_prime[locus] = (double)(maxsplit(ss, -1, individuals) +
maxsplit(ss, 1, individuals)) / ss.ncol();

max_split = 0;
snp_number = 0;

if (has_trios) {
maxsplit(ss, -1, individuals, max_split, snp_number);
maxsplit(ss, 1, individuals, max_split, snp_number);
maxsplit(ss, 0, individuals, ignore_result, snp_number);
} else {
maxsplit(ss, 0, individuals, max_split, snp_number);
}

if (snp_number == 0) {
mcmf[locus] = NA_REAL;
continue;
}
mcmf[locus] = (double)(max_split) / snp_number;
}

return(omega_prime);
return(mcmf);
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
context("SumStat OmegaPrime")
context("SumStat MCMF")

test_that("calculation is correct", {
ss <- matrix(c(1, 0, 0, 1,
1, 1, 0, 0,
0, 1, 0, 0,
1, 0, 1, 0,
1, 0, 0, 0), 4, 4, byrow = TRUE)

# No trios
seg_sites <- list(ss)
attr(seg_sites[[1]], "positions") <- c(0.1, 0.2, 0.5, 0.7)
attr(seg_sites[[1]], "locus") <- rep(0, 4)
expect_equal(calc_mcmf(seg_sites, 1:4, FALSE), .5)
expect_equal(calc_mcmf(seg_sites, c(1, 3, 4), FALSE), .5)
expect_equal(calc_mcmf(seg_sites, 2:4, FALSE), 2/3)
expect_equal(calc_mcmf(seg_sites, 3:4, FALSE), 1)

# With trios
seg_sites <- list(cbind(ss, ss, ss))
attr(seg_sites[[1]], "positions") <- rep(c(0.1, 0.2, 0.5, 0.7), 4)
attr(seg_sites[[1]], "locus") <- rep(c(-1, 0, 1), each = 4)
expect_equal(calc_omegaprime(seg_sites, 1:4), c(2 / 12))
expect_equal(calc_mcmf(seg_sites, 1:4), c(4 / 12))
expect_equal(calc_mcmf(seg_sites, 2:4), c(4 / 9))
expect_equal(calc_mcmf(seg_sites, 3:4), c(2 / 3))

ss <- matrix(c(0, 0, 0, 1,
0, 0, 1, 0,
Expand All @@ -18,16 +30,14 @@ test_that("calculation is correct", {
seg_sites[[2]] <- cbind(ss, ss, ss)
attr(seg_sites[[2]], "positions") <- rep(c(0.1, 0.2, 0.5, 0.7), 4)
attr(seg_sites[[2]], "locus") <- rep(c(-1, 0, 1), each = 4)
expect_equal(calc_omegaprime(seg_sites, 1:4), c(2 / 12, 4 / 12))

expect_equal(calc_omegaprime(seg_sites, c(1, 3, 4)), c(2 / 12, 4 / 12))
expect_equal(calc_omegaprime(seg_sites, c(1)), c(0 / 12, 0 / 12))
expect_error(calc_omegaprime(seg_sites, 1:5))
expect_equal(calc_mcmf(seg_sites, 1:4), c(c(4 / 12), c(4 / 6)))
expect_equal(calc_mcmf(seg_sites, 2:4), c(c(4 / 9), NA))
expect_error(calc_mcmf(seg_sites, 1:5))

seg_sites <- list(matrix(0, 4, 0))
attr(seg_sites[[1]], "locus") <- numeric()
attr(seg_sites[[1]], "position") <- numeric()
expect_true(is.na(calc_omegaprime(seg_sites, 1:4)))
expect_true(is.na(calc_mcmf(seg_sites, 1:4)))
})


Expand All @@ -41,19 +51,19 @@ test_that("initialzation of statistic works", {
attr(seg_sites[[1]], "positions") <- rep(c(0.1, 0.2, 0.5, 0.7), 4)
attr(seg_sites[[1]], "locus") <- rep(c(-1, 0, 1), each = 4)

stat <- sumstat_omegaprime(name = "omega_prime", 1)
stat <- sumstat_mcmf(population = 1)
op <- stat$calculate(seg_sites, NULL, NULL, coal_model(4))
expect_that(op, is_a("numeric"))
expect_equal(length(op), 1)
expect_true(op >= 0 & op <= 1)
})


test_that("simulation of omega prime works", {
test_that("simulation of MCMF works", {
set.seed(125)
model <- model_trios() + sumstat_omegaprime(name = "omega_prime", 1)
model <- model_trios() + sumstat_mcmf(name = "mcmf", population = 1)
stats <- simulate(model)
expect_that(stats$omega_prime, is_a("numeric"))
expect_equal(length(stats$omega_prime), 1)
expect_true(all(stats$omega_prime >= 0 & stats$omega_prime <= 1))
expect_that(stats$mcmf, is_a("numeric"))
expect_equal(length(stats$mcmf), 1)
expect_true(all(stats$mcmf >= 0 & stats$mcmf <= 1))
})

0 comments on commit aa0e1e7

Please sign in to comment.