Skip to content

Commit

Permalink
limit the number of number of random sequence if too many non ACGT se…
Browse files Browse the repository at this point in the history
…quences
  • Loading branch information
gunturus committed Jul 8, 2020
1 parent bf1b3f3 commit c155bd1
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions enveomics/References.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,21 @@ void References::intializeReferences(FastqReader &fastqReader) {
double kerr;
unsigned long long int hashcode;
size_t i = 0;
size_t error_seq = 0;
size_t error_seq_limit = 3 * this->refSize;
// deal with shorter reads then kmer length
for(i=0;i<this->refSize;i++) {
fastqReader.getRandomSeq(temp);
//Hashcode for forward kmer
if(error_seq > error_seq_limit)
error("Sequence file has low quality with many non ACGT bases");
if(temp.sequence.length() < this->ksize)
error("Reads are required to have a minimum length of kmer size");
kmer = temp.sequence.substr(0,this->ksize);
flag = getHashCode(kmer,hashcode);
if(flag == -1) {
i--;
error_seq++;
continue;
}
refKmerMap[hashcode] = 0;
Expand All @@ -95,14 +100,19 @@ void References::intializeReferences(FastaReader &fastaReader) {
int flag;
unsigned long long int hashcode;
size_t i = 0;
size_t error_seq = 0;
size_t error_seq_limit = 3 * this->refSize;
for(i=0;i<this->refSize;i++) {
fastaReader.getRandomSeq(temp);
if(error_seq > error_seq_limit)
error("Sequence file has low quality with many non ACGT bases");
if(temp.sequence.length() < this->ksize)
error("Reads are required to have a minimum length of kmer size");
kmer = temp.sequence.substr(0,this->ksize);
flag = getHashCode(kmer, hashcode);
if(flag == -1) {
i--;
error_seq++;
continue;
}
refKmerMap[hashcode] = 0;
Expand Down

0 comments on commit c155bd1

Please sign in to comment.