- helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n";
- helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List, repfasta, repnames or count, and constaxonomy are required.\n";
+ helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, and optional *.rep.fasta, *.rep.names, groupfile, or count file and creates a database file.\n";
+ helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. Listor shared and constaxonomy are required.\n";
helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine();
+ m->mothurOut("[ERROR]: No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine();
+ if ((repnamesfile != "") && (repfastafile == "")) { m->mothurOut("[ERROR]: You must provide a repfasta file if you are using a repnames file."); m->mothurOutEndLine();
//the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below
map<string, string> tempRepNames;
@@ -298,26 +289,29 @@ int CreateDatabaseCommand::execute(){
repNames.erase(it++);
}
repNames = tempRepNames;
- }else {
+ }elseif (countfile != ""){
ct.readTable(countfile, true, false);
numUniqueNamesFile = ct.getNumUniqueSeqs();
nameMap = ct.getNameMap();
}
- //are there the same number of otus in the fasta and name files
- if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
-
- if (m->control_pressed) { return0; }
-
- //are there the same number of OTUs in the tax and fasta file
- if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
-
if (m->control_pressed) { return0; }
- //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
- for (int i = 0; i < classifyOtuSizes.size(); i++) {
- if (classifyOtuSizes[i] != repOtusSizes[i]) {
- m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ". These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true;
+ if (repfastafile != "") {
+
+ //are there the same number of otus in the fasta and name files
+ if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
+
+ //are there the same number of OTUs in the tax and fasta file
+ if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
+
+ if (m->control_pressed) { return0; }
+
+ //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
+ for (int i = 0; i < classifyOtuSizes.size(); i++) {
+ if (classifyOtuSizes[i] != repOtusSizes[i]) {
+ m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ". These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true;
+ }
}
}
@@ -362,7 +356,9 @@ int CreateDatabaseCommand::execute(){
for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += '\t' + (ct.getNamesOfGroups())[i]; }
+ if (repfastafile != "") { header += "\trepSeqName\trepSeq"; }
+ header += "\tOTUConTaxonomy";
out << header << endl;
vector<string> binLabels = list->getLabels();
@@ -380,9 +376,9 @@ int CreateDatabaseCommand::execute(){
m->splitAtComma(bin, binNames);
string seqRepName = "";
- int numSeqsRep = 0;
+ int numSeqsRep = binNames.size();
- if (countfile == "") {
+ if (repnamesfile != "") {
sort(binNames.begin(), binNames.end());
bin = "";
for (int j = 0; j < binNames.size()-1; j++) {
@@ -399,14 +395,14 @@ int CreateDatabaseCommand::execute(){
if (binNames.size() != classifyOtuSizes[index]) {
m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
map<string, int>::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file.
if (itNameMap != nameMap.end()) {
seqRepName = itNameMap->first;
numSeqsRep = itNameMap->second;
- j += binNames.size();
+ j += binNames.size();//exit loop
}
}
@@ -440,15 +436,25 @@ int CreateDatabaseCommand::execute(){
for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << '\t' << counts[(groupmap->getNamesOfGroups())[j]]; }
0 comments on commit
2ff2527