diff --git a/NAMESPACE b/NAMESPACE index 9bd38274..f82fc80c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(.extract_GT_to_CM) export(.extract_haps) export(.grepa) export(.gt_to_popsum) +export(.rank_variants) export(.read_body_gz) export(.read_meta_gz) export(.seq_to_rects) @@ -58,6 +59,7 @@ export(null.plot) export(pairwise_genetic_diff) export(proc.chromR) export(queryMETA) +export(rank.variants.chromR) export(read.vcfR) export(regex.win) export(seq2chromR) diff --git a/R/RcppExports.R b/R/RcppExports.R index 72c542d5..2ed396e0 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -48,6 +48,10 @@ AD_frequency <- function(ad, delim = ",", allele = 1L, sum_type = 0L, decreasing .Call(`_vcfR_AD_frequency`, ad, delim, allele, sum_type, decreasing) } +write_fasta <- function(seq, seqname, filename, rowlength = 80L, verbose = 1L, depr = 1L) { + invisible(.Call(`_vcfR_write_fasta`, seq, seqname, filename, rowlength, verbose, depr)) +} + #' @export .extract_GT_to_CM <- function(fix, gt, element = "DP", alleles = 0L, extract = 1L, convertNA = 1L) { .Call(`_vcfR_extract_GT_to_CM`, fix, gt, element, alleles, extract, convertNA) @@ -241,7 +245,8 @@ pair_sort <- function() { .Call(`_vcfR_pair_sort`) } -rank_variants <- function(variants, ends, score) { +#' @export +.rank_variants <- function(variants, ends, score) { .Call(`_vcfR_rank_variants`, variants, ends, score) } diff --git a/tools/R/ranking.R b/R/ranking.R similarity index 84% rename from tools/R/ranking.R rename to R/ranking.R index 96e7d08d..88c1bdbf 100644 --- a/tools/R/ranking.R +++ b/R/ranking.R @@ -37,8 +37,7 @@ rank.variants.chromR <- function(x, scores){ stop(msg) } - x@var.info <- .Call('vcfR_rank_variants', PACKAGE = 'vcfR', x@var.info, x@win.info$end, scores) - # vars <- .Call('vcfR_rank_variants', PACKAGE = 'vcfR', pinf_mt@var.info, pinf_mt@win.info$end, testv) + x@var.info <- .rank_variants(x@var.info, x@win.info$end, scores) return(x) } diff --git a/man/ranking.Rd b/man/ranking.Rd new file mode 100644 index 00000000..e87b8cfd --- /dev/null +++ b/man/ranking.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ranking.R +\name{Ranking} +\alias{Ranking} +\alias{rank.variants.chromR} +\title{Ranking variants within windows} +\usage{ +rank.variants.chromR(x, scores) +} +\arguments{ +\item{x}{an object of class Crhom or a data.frame containing...} + +\item{scores}{a vector of scores for each variant to be used to rank the data} +} +\description{ +Rank variants within windows. +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 1eac9001..c6fb9a79 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -20,6 +20,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// write_fasta +void write_fasta(Rcpp::CharacterVector seq, std::string seqname, std::string filename, int rowlength, int verbose, int depr); +RcppExport SEXP _vcfR_write_fasta(SEXP seqSEXP, SEXP seqnameSEXP, SEXP filenameSEXP, SEXP rowlengthSEXP, SEXP verboseSEXP, SEXP deprSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type seq(seqSEXP); + Rcpp::traits::input_parameter< std::string >::type seqname(seqnameSEXP); + Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); + Rcpp::traits::input_parameter< int >::type rowlength(rowlengthSEXP); + Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); + Rcpp::traits::input_parameter< int >::type depr(deprSEXP); + write_fasta(seq, seqname, filename, rowlength, verbose, depr); + return R_NilValue; +END_RCPP +} // extract_GT_to_CM Rcpp::StringMatrix extract_GT_to_CM(Rcpp::StringMatrix fix, Rcpp::StringMatrix gt, std::string element, int alleles, int extract, int convertNA); RcppExport SEXP _vcfR_extract_GT_to_CM(SEXP fixSEXP, SEXP gtSEXP, SEXP elementSEXP, SEXP allelesSEXP, SEXP extractSEXP, SEXP convertNASEXP) { @@ -299,6 +314,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_vcfR_AD_frequency", (DL_FUNC) &_vcfR_AD_frequency, 5}, + {"_vcfR_write_fasta", (DL_FUNC) &_vcfR_write_fasta, 6}, {"_vcfR_extract_GT_to_CM", (DL_FUNC) &_vcfR_extract_GT_to_CM, 6}, {"_vcfR_CM_to_NM", (DL_FUNC) &_vcfR_CM_to_NM, 1}, {"_vcfR_extract_haps", (DL_FUNC) &_vcfR_extract_haps, 5}, diff --git a/src/deprecated_funcs.cpp b/src/deprecated_funcs.cpp new file mode 100644 index 00000000..dfb4c8fc --- /dev/null +++ b/src/deprecated_funcs.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include "vcfRCommon.h" + +// These are deprecated functions I hope to remove. + + +Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df, int depr = 1 ){ + + Rcpp::StringVector sv = df(0); + Rcpp::StringMatrix sm(sv.size(), df.size()); + + sm.attr("col.names") = df.attr("col.names"); + sm.attr("row.names") = df.attr("row.names"); + + if( depr == 1 ){ + Rcpp::Rcerr << "The Rcpp function DataFrame_to_StringMatrix was deprecated in vcfR 1.6.0" << std::endl; + Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl; + Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl; + Rcpp::Rcerr << "maintainer('vcfR')" << std::endl; + return sm; + } + + for(int i=0; i < df.size(); i++){ + sv = df(i); + for(int j=0; j < sv.size(); j++){ + sm(j, i) = sv(j); + } + } + + return sm; +} + + +/* Write data to fasta file */ + +// [[Rcpp::export]] +void write_fasta( Rcpp::CharacterVector seq, + std::string seqname, + std::string filename, + int rowlength=80, + int verbose=1, int depr = 1) { +// rowlength=rowlength-1; + FILE * pFile; +// pFile=fopen(filename.c_str(),"wt"); + pFile=fopen(filename.c_str(),"at"); + int i = 0; +// unsigned int i = 0; + + if( depr == 1 ){ + Rcpp::Rcerr << "The function write_fasta was deprecated in vcfR 1.6.0" << std::endl; + Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl; + Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl; + Rcpp::Rcerr << "maintainer('vcfR')" << std::endl; + Rcpp::stop(""); + } + + if(verbose == 1){ + Rcpp::Rcout << "Processing sample: " << seqname << "\n"; + } + + putc ('>' , pFile); + for(i=0; (unsigned)i(seq[0]) , pFile); + for(i=1; i(seq[i]) , pFile); + if(i % nreport == 0 && verbose == 1){ + Rcpp::Rcout << "\rNucleotide " << i << " processed"; + } + } + putc('\n', pFile); + fclose (pFile); + if(verbose == 1){ + Rcpp::Rcout << "\rNucleotide " << i << " processed\n"; + } +// return 0; +} + + +double extractElementD(Rcpp::String x, int number=1, int depr = 1){ + // + // x is a string similar to: + // GT:GQ:DP:RO:QR:AO:QA:GL + // + // number is the position in the colon delimited + // string which needs to be extracted. + // +// int count = 0; + int start = 0; + int pos = 1; + std::string istring = x; + std::string teststring; + unsigned int i = 0; + + if( depr == 1 ){ + Rcpp::Rcerr << "The function extractElementD was deprecated in vcfR 1.6.0" << std::endl; + Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl; + Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl; + Rcpp::Rcerr << "maintainer('vcfR')" << std::endl; + Rcpp::stop(""); + } + + for(i=1; i <= istring.size(); i++){ + if(istring[i] == ':'){ + if(pos == number){ + teststring = istring.substr(start, i-start); + double teststring2 = atof(teststring.c_str()); + return teststring2; +// return std::stod(teststring); + } else { + start = i+1; + pos++; + i++; + } + } + } + // If we get here we did not find the element. + return(0); +} + + + diff --git a/src/rank_variants.cpp b/src/rank_variants.cpp index bde8c280..e201ab70 100644 --- a/src/rank_variants.cpp +++ b/src/rank_variants.cpp @@ -18,8 +18,8 @@ bool minimize ( const mypair& l, const mypair& r) { return l.first < r.first; } - -// [[Rcpp::export]] +//' @export +// [[Rcpp::export(name=".rank_variants")]] Rcpp::DataFrame rank_variants(Rcpp::DataFrame variants, Rcpp::NumericVector ends, Rcpp::NumericVector score){ diff --git a/tools/testthat/test_rank_variants.R b/tests/testthat/test_rank_variants.R similarity index 100% rename from tools/testthat/test_rank_variants.R rename to tests/testthat/test_rank_variants.R diff --git a/tools/testthat/test_summary_tables.R b/tests/testthat/test_summary_tables.R similarity index 84% rename from tools/testthat/test_summary_tables.R rename to tests/testthat/test_summary_tables.R index 4a625538..68b1d7ec 100644 --- a/tools/testthat/test_summary_tables.R +++ b/tests/testthat/test_summary_tables.R @@ -15,7 +15,7 @@ test_that("write.var.info works",{ setwd(tempdir()) - write.var.info(myChrom, file = "test_var_info.csv") + write.var.info(chrom, file = "test_var_info.csv") expect_true(file.exists("test_var_info.csv")) unlink("test_var_info.csv") }) @@ -28,7 +28,7 @@ test_that("write.var.info works, mask == TRUE",{ setwd(tempdir()) - write.var.info(myChrom, file = "test_var_info.csv", mask = TRUE) + write.var.info(chrom, file = "test_var_info.csv", mask = TRUE) expect_true(file.exists("test_var_info.csv")) unlink("test_var_info.csv") }) @@ -40,7 +40,7 @@ test_that("write.win.info works",{ # data(vcfR_test) # myChrom <- create.chromR(vcfR_test, verbose = FALSE) data("chromR_example") - myChrom <- proc.chromR(myChrom, verbose = FALSE) + myChrom <- proc.chromR(chrom, verbose = FALSE) setwd(tempdir()) write.win.info(myChrom, file = "test_win_info.csv") diff --git a/tools/src/extractGT2NM.cpp b/tools/src/extractGT2NM.cpp index aa1a1152..d685425a 100644 --- a/tools/src/extractGT2NM.cpp +++ b/tools/src/extractGT2NM.cpp @@ -10,43 +10,8 @@ using namespace Rcpp; - -double extractElementD(String x, int number=1){ - // - // x is a string similar to: - // GT:GQ:DP:RO:QR:AO:QA:GL - // - // number is the position in the colon delimited - // string which needs to be extracted. - // -// int count = 0; - int start = 0; - int pos = 1; - std::string istring = x; - std::string teststring; - unsigned int i = 0; - - for(i=1; i <= istring.size(); i++){ - if(istring[i] == ':'){ - if(pos == number){ - teststring = istring.substr(start, i-start); - double teststring2 = atof(teststring.c_str()); - return teststring2; -// return std::stod(teststring); - } else { - start = i+1; - pos++; - i++; - } - } - } - // If we get here we did not find the element. - return(0); -} - - // [[Rcpp::export]] -Rcpp::CharacterMatrix extract_GT_to_CM(Rcpp::DataFrame x, std::string element="DP") { +Rcpp::CharacterMatrix extract_GT_to_CM_B(Rcpp::DataFrame x, std::string element="DP", int depr = 1) { int i = 0; int j = 0; Rcpp::StringVector column = x(0); // Vector to check out DataFrame columns to @@ -58,6 +23,14 @@ Rcpp::CharacterMatrix extract_GT_to_CM(Rcpp::DataFrame x, std::string element="D colnames.erase(0); cm.attr("dimnames") = Rcpp::List::create(Rcpp::CharacterVector::create(), colnames); + if( depr == 1 ){ + Rcpp::Rcerr << "The function extract_GT_to_CM was deprecated in vcfR 1.6.0" << std::endl; + Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl; + Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl; + Rcpp::Rcerr << "maintainer('vcfR')" << std::endl; + Rcpp::stop(""); + } + // Determine the position where the query element is // located in each row (variant) for(i=0; i' , pFile); - for(i=0; (unsigned)i(seq[0]) , pFile); - for(i=1; i(seq[i]) , pFile); - if(i % nreport == 0 && verbose == 1){ - Rcpp::Rcout << "\rNucleotide " << i << " processed"; - } - } - putc('\n', pFile); - fclose (pFile); - if(verbose == 1){ - Rcpp::Rcout << "\rNucleotide " << i << " processed\n"; - } -// return 0; -} - - - diff --git a/vignettes/converting_data.Rmd b/vignettes/converting_data.Rmd index 04a13521..038b2ee5 100644 --- a/vignettes/converting_data.Rmd +++ b/vignettes/converting_data.Rmd @@ -1,4 +1,4 @@ -# --- +--- title: "Converting vcfR objects to other forms" author: "Brian J. Knaus" date: "`r Sys.Date()`"