Skip to content

Commit

Permalink
added ranking
Browse files Browse the repository at this point in the history
  • Loading branch information
knausb committed Sep 15, 2017
1 parent de7fdf9 commit 150b362
Show file tree
Hide file tree
Showing 12 changed files with 195 additions and 122 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(.extract_GT_to_CM)
export(.extract_haps)
export(.grepa)
export(.gt_to_popsum)
export(.rank_variants)
export(.read_body_gz)
export(.read_meta_gz)
export(.seq_to_rects)
Expand Down Expand Up @@ -58,6 +59,7 @@ export(null.plot)
export(pairwise_genetic_diff)
export(proc.chromR)
export(queryMETA)
export(rank.variants.chromR)
export(read.vcfR)
export(regex.win)
export(seq2chromR)
Expand Down
7 changes: 6 additions & 1 deletion R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ AD_frequency <- function(ad, delim = ",", allele = 1L, sum_type = 0L, decreasing
.Call(`_vcfR_AD_frequency`, ad, delim, allele, sum_type, decreasing)
}

write_fasta <- function(seq, seqname, filename, rowlength = 80L, verbose = 1L, depr = 1L) {
invisible(.Call(`_vcfR_write_fasta`, seq, seqname, filename, rowlength, verbose, depr))
}

#' @export
.extract_GT_to_CM <- function(fix, gt, element = "DP", alleles = 0L, extract = 1L, convertNA = 1L) {
.Call(`_vcfR_extract_GT_to_CM`, fix, gt, element, alleles, extract, convertNA)
Expand Down Expand Up @@ -241,7 +245,8 @@ pair_sort <- function() {
.Call(`_vcfR_pair_sort`)
}

rank_variants <- function(variants, ends, score) {
#' @export
.rank_variants <- function(variants, ends, score) {
.Call(`_vcfR_rank_variants`, variants, ends, score)
}

Expand Down
3 changes: 1 addition & 2 deletions tools/R/ranking.R → R/ranking.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ rank.variants.chromR <- function(x, scores){
stop(msg)
}

x@var.info <- .Call('vcfR_rank_variants', PACKAGE = 'vcfR', x@var.info, x@win.info$end, scores)
# vars <- .Call('vcfR_rank_variants', PACKAGE = 'vcfR', pinf_mt@var.info, pinf_mt@win.info$end, testv)
x@var.info <- .rank_variants(x@var.info, x@win.info$end, scores)

return(x)
}
Expand Down
17 changes: 17 additions & 0 deletions man/ranking.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// write_fasta
void write_fasta(Rcpp::CharacterVector seq, std::string seqname, std::string filename, int rowlength, int verbose, int depr);
RcppExport SEXP _vcfR_write_fasta(SEXP seqSEXP, SEXP seqnameSEXP, SEXP filenameSEXP, SEXP rowlengthSEXP, SEXP verboseSEXP, SEXP deprSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type seq(seqSEXP);
Rcpp::traits::input_parameter< std::string >::type seqname(seqnameSEXP);
Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
Rcpp::traits::input_parameter< int >::type rowlength(rowlengthSEXP);
Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
Rcpp::traits::input_parameter< int >::type depr(deprSEXP);
write_fasta(seq, seqname, filename, rowlength, verbose, depr);
return R_NilValue;
END_RCPP
}
// extract_GT_to_CM
Rcpp::StringMatrix extract_GT_to_CM(Rcpp::StringMatrix fix, Rcpp::StringMatrix gt, std::string element, int alleles, int extract, int convertNA);
RcppExport SEXP _vcfR_extract_GT_to_CM(SEXP fixSEXP, SEXP gtSEXP, SEXP elementSEXP, SEXP allelesSEXP, SEXP extractSEXP, SEXP convertNASEXP) {
Expand Down Expand Up @@ -299,6 +314,7 @@ END_RCPP

static const R_CallMethodDef CallEntries[] = {
{"_vcfR_AD_frequency", (DL_FUNC) &_vcfR_AD_frequency, 5},
{"_vcfR_write_fasta", (DL_FUNC) &_vcfR_write_fasta, 6},
{"_vcfR_extract_GT_to_CM", (DL_FUNC) &_vcfR_extract_GT_to_CM, 6},
{"_vcfR_CM_to_NM", (DL_FUNC) &_vcfR_CM_to_NM, 1},
{"_vcfR_extract_haps", (DL_FUNC) &_vcfR_extract_haps, 5},
Expand Down
135 changes: 135 additions & 0 deletions src/deprecated_funcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include <Rcpp.h>
#include <fstream>
#include <zlib.h>
#include "vcfRCommon.h"

// These are deprecated functions I hope to remove.


Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df, int depr = 1 ){

Rcpp::StringVector sv = df(0);
Rcpp::StringMatrix sm(sv.size(), df.size());

sm.attr("col.names") = df.attr("col.names");
sm.attr("row.names") = df.attr("row.names");

if( depr == 1 ){
Rcpp::Rcerr << "The Rcpp function DataFrame_to_StringMatrix was deprecated in vcfR 1.6.0" << std::endl;
Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl;
Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl;
Rcpp::Rcerr << "maintainer('vcfR')" << std::endl;
return sm;
}

for(int i=0; i < df.size(); i++){
sv = df(i);
for(int j=0; j < sv.size(); j++){
sm(j, i) = sv(j);
}
}

return sm;
}


/* Write data to fasta file */

// [[Rcpp::export]]
void write_fasta( Rcpp::CharacterVector seq,
std::string seqname,
std::string filename,
int rowlength=80,
int verbose=1, int depr = 1) {
// rowlength=rowlength-1;
FILE * pFile;
// pFile=fopen(filename.c_str(),"wt");
pFile=fopen(filename.c_str(),"at");
int i = 0;
// unsigned int i = 0;

if( depr == 1 ){
Rcpp::Rcerr << "The function write_fasta was deprecated in vcfR 1.6.0" << std::endl;
Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl;
Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl;
Rcpp::Rcerr << "maintainer('vcfR')" << std::endl;
Rcpp::stop("");
}

if(verbose == 1){
Rcpp::Rcout << "Processing sample: " << seqname << "\n";
}

putc ('>' , pFile);
for(i=0; (unsigned)i<seqname.size(); i++){
putc (seqname[i] , pFile);
}
putc ('\n' , pFile);

// Number of records to report progress at.
const int nreport = 1000;

putc (Rcpp::as< char >(seq[0]) , pFile);
for(i=1; i<seq.size(); i++){
Rcpp::checkUserInterrupt();
// putc (seq[i][0] , pFile);
if( i % rowlength == 0){
putc('\n', pFile);
}
putc (Rcpp::as< char >(seq[i]) , pFile);
if(i % nreport == 0 && verbose == 1){
Rcpp::Rcout << "\rNucleotide " << i << " processed";
}
}
putc('\n', pFile);
fclose (pFile);
if(verbose == 1){
Rcpp::Rcout << "\rNucleotide " << i << " processed\n";
}
// return 0;
}


double extractElementD(Rcpp::String x, int number=1, int depr = 1){
//
// x is a string similar to:
// GT:GQ:DP:RO:QR:AO:QA:GL
//
// number is the position in the colon delimited
// string which needs to be extracted.
//
// int count = 0;
int start = 0;
int pos = 1;
std::string istring = x;
std::string teststring;
unsigned int i = 0;

if( depr == 1 ){
Rcpp::Rcerr << "The function extractElementD was deprecated in vcfR 1.6.0" << std::endl;
Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl;
Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl;
Rcpp::Rcerr << "maintainer('vcfR')" << std::endl;
Rcpp::stop("");
}

for(i=1; i <= istring.size(); i++){
if(istring[i] == ':'){
if(pos == number){
teststring = istring.substr(start, i-start);
double teststring2 = atof(teststring.c_str());
return teststring2;
// return std::stod(teststring);
} else {
start = i+1;
pos++;
i++;
}
}
}
// If we get here we did not find the element.
return(0);
}



4 changes: 2 additions & 2 deletions src/rank_variants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ bool minimize ( const mypair& l, const mypair& r)
{ return l.first < r.first; }



// [[Rcpp::export]]
//' @export
// [[Rcpp::export(name=".rank_variants")]]
Rcpp::DataFrame rank_variants(Rcpp::DataFrame variants,
Rcpp::NumericVector ends,
Rcpp::NumericVector score){
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_that("write.var.info works",{

setwd(tempdir())

write.var.info(myChrom, file = "test_var_info.csv")
write.var.info(chrom, file = "test_var_info.csv")
expect_true(file.exists("test_var_info.csv"))
unlink("test_var_info.csv")
})
Expand All @@ -28,7 +28,7 @@ test_that("write.var.info works, mask == TRUE",{

setwd(tempdir())

write.var.info(myChrom, file = "test_var_info.csv", mask = TRUE)
write.var.info(chrom, file = "test_var_info.csv", mask = TRUE)
expect_true(file.exists("test_var_info.csv"))
unlink("test_var_info.csv")
})
Expand All @@ -40,7 +40,7 @@ test_that("write.win.info works",{
# data(vcfR_test)
# myChrom <- create.chromR(vcfR_test, verbose = FALSE)
data("chromR_example")
myChrom <- proc.chromR(myChrom, verbose = FALSE)
myChrom <- proc.chromR(chrom, verbose = FALSE)

setwd(tempdir())
write.win.info(myChrom, file = "test_win_info.csv")
Expand Down
48 changes: 12 additions & 36 deletions tools/src/extractGT2NM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,8 @@ using namespace Rcpp;




double extractElementD(String x, int number=1){
//
// x is a string similar to:
// GT:GQ:DP:RO:QR:AO:QA:GL
//
// number is the position in the colon delimited
// string which needs to be extracted.
//
// int count = 0;
int start = 0;
int pos = 1;
std::string istring = x;
std::string teststring;
unsigned int i = 0;

for(i=1; i <= istring.size(); i++){
if(istring[i] == ':'){
if(pos == number){
teststring = istring.substr(start, i-start);
double teststring2 = atof(teststring.c_str());
return teststring2;
// return std::stod(teststring);
} else {
start = i+1;
pos++;
i++;
}
}
}
// If we get here we did not find the element.
return(0);
}


// [[Rcpp::export]]
Rcpp::CharacterMatrix extract_GT_to_CM(Rcpp::DataFrame x, std::string element="DP") {
Rcpp::CharacterMatrix extract_GT_to_CM_B(Rcpp::DataFrame x, std::string element="DP", int depr = 1) {
int i = 0;
int j = 0;
Rcpp::StringVector column = x(0); // Vector to check out DataFrame columns to
Expand All @@ -58,6 +23,14 @@ Rcpp::CharacterMatrix extract_GT_to_CM(Rcpp::DataFrame x, std::string element="D
colnames.erase(0);
cm.attr("dimnames") = Rcpp::List::create(Rcpp::CharacterVector::create(), colnames);

if( depr == 1 ){
Rcpp::Rcerr << "The function extract_GT_to_CM was deprecated in vcfR 1.6.0" << std::endl;
Rcpp::Rcerr << "If you use this function and you would like to advocate its persistence, please contact the maintainer." << std::endl;
Rcpp::Rcerr << "The maintainer of this package can be found with" << std::endl;
Rcpp::Rcerr << "maintainer('vcfR')" << std::endl;
Rcpp::stop("");
}

// Determine the position where the query element is
// located in each row (variant)
for(i=0; i<column.size(); i++){
Expand Down Expand Up @@ -90,3 +63,6 @@ Rcpp::CharacterMatrix extract_GT_to_CM(Rcpp::DataFrame x, std::string element="D






0 comments on commit 150b362

Please sign in to comment.