Permalink
Browse files

Adds rdiffs to pcr.seqs

  • Loading branch information...
1 parent 8c16ba6 commit 2e89f31246436a4e6ed810b26cb5edcefe3360d3 @mothur-westcott mothur-westcott committed Jul 14, 2016
Showing with 35 additions and 27 deletions.
  1. +12 −8 source/commands/pcrseqscommand.cpp
  2. +8 −7 source/commands/pcrseqscommand.h
  3. +13 −10 source/trimoligos.cpp
  4. +2 −2 source/trimoligos.h
@@ -22,7 +22,7 @@ vector<string> PcrSeqsCommand::setParameters(){
CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
-
+ CommandParameter prdiffs("rdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(prdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
@@ -44,15 +44,16 @@ string PcrSeqsCommand::getHelpString(){
try {
string helpString = "";
helpString += "The pcr.seqs command reads a fasta file.\n";
- helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, rdiffs, processors, keepprimer and keepdots.\n";
helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
helpString += "The start parameter allows you to provide a starting position to trim to.\n";
helpString += "The end parameter allows you to provide a ending position to trim from.\n";
helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n";
helpString += "The processors parameter allows you to use multiple processors.\n";
helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
- helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the forward primer. The default is 0.\n";
+ helpString += "The rdiffs parameter is used to specify the number of differences allowed in the reverse primer. The default is 0.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
@@ -269,6 +270,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option) {
temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "rdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, rdiffs);
nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
@@ -587,7 +591,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, rdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -714,7 +718,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
revPrimer = oligos.getReversePrimers();
}
- TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
+ TrimOligos trim(pdiffs, rdiffs, 0, primers, barcodes, revPrimer);
while (!done) {
@@ -792,9 +796,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
int primerStart = 0; int primerEnd = 0;
vector<int> results = trim.findReverse(currSeq, primerStart, primerEnd);
bool good = true;
- if (results[0] > pdiffs) { good = false; }
+ if (results[0] > rdiffs) { good = false; }
totalDiffs += results[0];
- commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pdiffs) + ") ";
+ commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], rdiffs) + ") ";
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
@@ -875,7 +879,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
currSeq.setComment("\t" + commentString + "\t" + seqComment);
}
- if (totalDiffs > pdiffs) { trashCode += "t"; goodSeq = false; }
+ if (totalDiffs > (pdiffs + rdiffs)) { trashCode += "t"; goodSeq = false; }
//trimming removed all bases
if (currSeq.getUnaligned() == "") { goodSeq = false; }
@@ -40,7 +40,7 @@ class PcrSeqsCommand : public Command {
vector<linePair> lines;
bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
- int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
+ int start, end, processors, length, pdiffs, rdiffs, numFPrimers, numRPrimers;
Oligos oligos;
vector<string> outputNames;
@@ -68,13 +68,13 @@ struct pcrData {
string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
unsigned long long fstart;
unsigned long long fend;
- int count, start, end, length, pdiffs, pstart, pend;
+ int count, start, end, length, pdiffs, pstart, pend, rdiffs;
MothurOut* m;
set<string> badSeqNames;
bool keepprimer, keepdots, fileAligned, adjustNeeded;
pcrData(){}
- pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
+ pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, int rd, unsigned long long fst, unsigned long long fen) {
filename = f;
goodFasta = gf;
badFasta = bfn;
@@ -90,6 +90,7 @@ struct pcrData {
fstart = fst;
fend = fen;
pdiffs = pd;
+ rdiffs = rd;
locationsName = loc;
count = 0;
fileAligned = true;
@@ -152,7 +153,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
numFPrimers = primers.size();
}
- TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
+ TrimOligos trim(pDataArray->pdiffs, pDataArray->rdiffs, 0, primers, barcodes, revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
pDataArray->count++;
@@ -243,9 +244,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
int primerStart = 0; int primerEnd = 0;
vector<int> results = trim.findReverse(currSeq, primerStart, primerEnd);
bool good = true;
- if (results[0] > pDataArray->pdiffs) { good = false; }
+ if (results[0] > pDataArray->rdiffs) { good = false; }
totalDiffs += results[0];
- commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pDataArray->pdiffs) + ") ";
+ commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pDataArray->rdiffs) + ") ";
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
@@ -324,7 +325,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
currSeq.setComment("\t" + commentString + "\t" + seqComment);
}
- if (totalDiffs > pDataArray->pdiffs) { trashCode += "t"; goodSeq = false; }
+ if (totalDiffs > (pDataArray->pdiffs + pDataArray->rdiffs)) { trashCode += "t"; goodSeq = false; }
//trimming removed all bases
if (currSeq.getUnaligned() == "") { goodSeq = false; }
View
@@ -24,6 +24,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
bdiffs = b;
ldiffs = l;
sdiffs = s;
+ rdiffs = 0;
barcodes = br;
primers = pr;
@@ -73,6 +74,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
bdiffs = b;
ldiffs = l;
sdiffs = s;
+ rdiffs = 0;
paired = true;
hasIndex = hi;
@@ -138,12 +140,13 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
}
/********************************************************************/
//strip, pdiffs, bdiffs, primers, barcodes, revPrimers
-TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
+TrimOligos::TrimOligos(int p, int rd, int b, map<string, int> pr, map<string, int> br, vector<string> r){
try {
m = MothurOut::getInstance();
pdiffs = p;
bdiffs = b;
+ rdiffs = rd;
barcodes = br;
primers = pr;
@@ -282,7 +285,7 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
string rawSequence = seq.getUnaligned();
int maxRevPrimerLength = revPrimer[0].length();
vector<int> success;
- success.push_back(pdiffs + 1000); //guilty until proven innocent
+ success.push_back(rdiffs + 1000); //guilty until proven innocent
success.push_back(1e6); //no matches found
for(int i=0;i<revPrimer.size();i++){
@@ -310,10 +313,10 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
}
//cout << maxRevPrimerLength << endl;
//if you found the barcode or if you don't want to allow for diffs
- if ((pdiffs == 0) || (success[0] == 0)) { return success; }
+ if ((rdiffs == 0) || (success[0] == 0)) { return success; }
else { //try aligning and see if you can find it
Alignment* alignment;
- if (revPrimer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRevPrimerLength+pdiffs+1)); }
+ if (revPrimer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRevPrimerLength+rdiffs+1)); }
else{ alignment = NULL; }
//can you find the revPrimer
@@ -324,15 +327,15 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
for(int i=0;i<revPrimer.size();i++){
- if (rawSequence.length() < revPrimer[i].length()+pdiffs) {} //ignore primers too long for this seq
+ if (rawSequence.length() < revPrimer[i].length()+rdiffs) {} //ignore primers too long for this seq
else{
//undefined if not forced into an int.
- int stopSpot = rawRSequence.length()-(revPrimer[i].length()+pdiffs);
+ int stopSpot = rawRSequence.length()-(revPrimer[i].length()+rdiffs);
for (int j = 0; j < stopSpot; j++){
string oligo = reverseOligo(revPrimer[i]);
- string rawChunk = rawRSequence.substr(j,oligo.length()+pdiffs);
+ string rawChunk = rawRSequence.substr(j,oligo.length()+rdiffs);
//cout << "r before = " << oligo << '\t' << rawChunk << endl;
// cout << oligo << '\t' << olength << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
@@ -350,7 +353,7 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
oligo = oligo.substr(0,alnLength);
temp = temp.substr(0,alnLength);
int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
+ if (alnLength == 0) { numDiff = rdiffs + 1000; }
//cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
if(numDiff < minDiff){
@@ -367,8 +370,8 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
if (alignment != NULL) { delete alignment; }
- if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = 1e6; return success; } //no good matches
- else if(minCount > 1) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = pdiffs + 10000; return success; } //can't tell the difference between multiple primers
+ if(minDiff > rdiffs) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = 1e6; return success; } //no good matches
+ else if(minCount > 1) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = rdiffs + 10000; return success; } //can't tell the difference between multiple primers
else{ success[0] = minDiff; success[1] = 0; return success; }
}
View
@@ -19,7 +19,7 @@
class TrimOligos {
public:
- TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+ TrimOligos(int,int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
TrimOligos(int,int, int, int, map<int, oligosPair>, map<int, oligosPair>, bool); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex
~TrimOligos();
@@ -55,7 +55,7 @@ class TrimOligos {
string getCodeValue(int, int);
private:
- int pdiffs, bdiffs, ldiffs, sdiffs;
+ int pdiffs, bdiffs, ldiffs, sdiffs, rdiffs;
bool paired, hasIndex;
map<string, int> barcodes;

0 comments on commit 2e89f31

Please sign in to comment.