Skip to content

Commit

Permalink
combine output parsing into a single function
Browse files Browse the repository at this point in the history
  • Loading branch information
paulstaab committed Jun 26, 2015
1 parent 779e4a9 commit 9ebbe77
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 221 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: coala
Version: 0.0.9902
Version: 0.0.9903
Date: 2015-06-25
License: GPL (>= 3)
Title: A Framework for Coalescent Simulation
Expand Down
4 changes: 0 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ parse_ms_output <- function(file_names, sample_size, loci_number) {
.Call('coala_parse_ms_output', PACKAGE = 'coala', file_names, sample_size, loci_number)
}

parse_ms_trees <- function(files, loci_number) {
.Call('coala_parse_ms_trees', PACKAGE = 'coala', files, loci_number)
}

parse_sg_output <- function(file_names, sample_size, sequence_length, loci_number, outgroup_size = 1L, calc_seg_sites = TRUE) {
.Call('coala_parse_sg_output', PACKAGE = 'coala', file_names, sample_size, sequence_length, loci_number, outgroup_size, calc_seg_sites)
}
Expand Down
24 changes: 7 additions & 17 deletions R/simulator_ms.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,30 +83,20 @@ ms_class <- R6Class("ms", inherit = simulator_class,
setwd(wd)

# Parse the output and calculate summary statistics
if (requires_segsites(model)) {
seg_sites <- parse_ms_output(files,
get_sample_size(model, for_sim = TRUE),
get_locus_number(model))

if (has_trios(model)) {
seg_sites <- conv_for_trios(seg_sites, model)
}
if (requires_segsites(model) || requires_trees(model)) {
output <- parse_ms_output(files, #nolint
get_sample_size(model, for_sim = TRUE),
get_locus_number(model))
} else {
seg_sites <- NULL
}

if (requires_trees(model)) {
trees <- parse_ms_trees(files, get_locus_number(model))
} else {
trees <- NULL
output <- list(seg_sites = NULL, trees = NULL)
}

cmds <- lapply(sim_cmds, function(cmd) {
paste("ms", sample_size, cmd[ , 1], cmd[ , 2])
})

sum_stats <- calc_sumstats(seg_sites, trees, files, model, parameters,
cmds, self)
sum_stats <- calc_sumstats(output$segsites, output$trees, files, model,
parameters, cmds, self)

# Clean Up
unlink(unlist(files))
Expand Down
24 changes: 7 additions & 17 deletions R/simulator_msms.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,20 @@ msms_class <- R6Class("Msms", inherit = simulator_class,
})

# Parse the output and calculate summary statistics
if (requires_segsites(model)) {
seg_sites <- parse_ms_output(files, #nolint
get_sample_size(model, for_sim = TRUE),
get_locus_number(model))

if (has_trios(model)) {
seg_sites <- conv_for_trios(seg_sites, model)
}
if (requires_segsites(model) || requires_trees(model)) {
output <- parse_ms_output(files, #nolint
get_sample_size(model, for_sim = TRUE),
get_locus_number(model))
} else {
seg_sites <- NULL
}

if (requires_trees(model)) {
trees <- parse_ms_trees(files, get_locus_number(model))
} else {
trees <- NULL
output <- list(seg_sites = NULL, trees = NULL)
}

cmds <- lapply(sim_cmds, function(cmd) {
paste("msms", sample_size, cmd[ , 1], cmd[ , 2])
})

sum_stats <- calc_sumstats(seg_sites, trees, files, model, parameters,
cmds, self)
sum_stats <- calc_sumstats(output$segsites, output$trees, files, model,
parameters, cmds, self)

# Clean Up
unlink(unlist(files))
Expand Down
16 changes: 11 additions & 5 deletions R/sumstat.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ Sumstat <- R6Class('Sumstat',
stop("Overwrite this function with the calculation of the statistic.")
},
get_name = function() private$name,
get_group = function() private$group,
requires_files = function() private$req_files,
requires_segsites = function() private$req_segsites,
requires_trees = function() private$req_trees,
Expand Down Expand Up @@ -72,10 +71,17 @@ calc_sumstats <- function(seg_sites, trees, files, model,
cmds = cmds,
simulator = simulator$get_info())

if (is_unphased(model) && requires_segsites(model)) {
seg_sites <- unphase_segsites(seg_sites,
get_ploidy(model),
get_samples_per_ind(model))
# Process seg_sites for trios and unphase if neccessary
if (requires_segsites(model)) {
if (has_trios(model) && simulator$get_name() != "seqgen") {
seg_sites <- conv_for_trios(seg_sites, model)
}

if (is_unphased(model)) {
seg_sites <- unphase_segsites(seg_sites,
get_ploidy(model),
get_samples_per_ind(model))
}
}

for (stat in model$sum_stats) {
Expand Down
12 changes: 0 additions & 12 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,6 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// parse_ms_trees
List parse_ms_trees(const List files, const int loci_number);
RcppExport SEXP coala_parse_ms_trees(SEXP filesSEXP, SEXP loci_numberSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< const List >::type files(filesSEXP);
Rcpp::traits::input_parameter< const int >::type loci_number(loci_numberSEXP);
__result = Rcpp::wrap(parse_ms_trees(files, loci_number));
return __result;
END_RCPP
}
// parse_sg_output
List parse_sg_output(const List file_names, const int sample_size, const NumericMatrix sequence_length, const int loci_number, const int outgroup_size, const bool calc_seg_sites);
RcppExport SEXP coala_parse_sg_output(SEXP file_namesSEXP, SEXP sample_sizeSEXP, SEXP sequence_lengthSEXP, SEXP loci_numberSEXP, SEXP outgroup_sizeSEXP, SEXP calc_seg_sitesSEXP) {
Expand Down
35 changes: 30 additions & 5 deletions src/parse_ms_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ List parse_ms_output(const List file_names,
size_t individuals = sum(sample_size);

List seg_sites(loci_number);
List trees(loci_number);
CharacterVector locus_trees;

int locus = -1;

for (int i = 0; i < file_names.size(); ++i) {
Expand All @@ -46,19 +49,24 @@ List parse_ms_output(const List file_names,
std::ifstream output(as<std::string>(file_name(j)).c_str(),
std::ifstream::in);

if (!output.is_open()) {
if (!output.good()) {
stop(std::string("Cannot open file ") + file_name(0));
}

std::getline(output, line);
// Read it line by line and read the relevant parts
while( output.good() ) {
std::getline(output, line);
while (output.good()) {
// Rcout << "Line: " << line << std::endl;
if (line == "//") {
++locus;
if (locus >= loci_number) stop("Too many loci in ms output");
// Rcout << "Locus: " << locus << std::endl;
std::getline(output, line);
}

// Parse Segregating Sites
else if (line.substr(0, 9) == "segsites:") {

// Rcout << "Parsing Seg. Sites" << std::endl;
if (line.substr(0, 11) == "segsites: 0") {
NumericMatrix ss = NumericMatrix(individuals, 0);
ss.attr("positions") = NumericVector(0);
Expand All @@ -80,10 +88,27 @@ List parse_ms_output(const List file_names,

seg_sites[locus] = ss;
}
std::getline(output, line);
}

// Parse Trees
else if (line.substr(0, 1) == "[" || line.substr(0, 1) == "(") {
// Rcout << "Parsing Trees" << std::endl;
while (line.substr(0, 1) == "[" || line.substr(0, 1) == "(") {
locus_trees.push_back(line);
std::getline(output, line);
}
trees[locus] = locus_trees;
locus_trees = CharacterVector();
}

else std::getline(output, line);
}
}
}

return seg_sites;
if (locus != loci_number - 1) stop("Too few loci in ms output");

return List::create(_["segsites"] = seg_sites,
_["trees"] = trees);
}
48 changes: 0 additions & 48 deletions src/parse_ms_trees.cpp

This file was deleted.

Loading

0 comments on commit 9ebbe77

Please sign in to comment.