From 21daaeb9dafc0cfeeda23e4e081f07bb81657978 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Fri, 29 Jun 2012 11:09:41 +0100 Subject: [PATCH] sga-graphdiff now directly outputs the final set of calls. --- src/GraphDiff/DindelRealignWindow.cpp | 25 +++++++++++++++++++------ src/GraphDiff/DindelUtil.cpp | 20 ++++++++++++++++++-- src/GraphDiff/DindelUtil.h | 3 ++- src/GraphDiff/GraphCompare.cpp | 15 +++++++++++++-- src/GraphDiff/GraphCompare.h | 2 ++ src/GraphDiff/VCFTester.cpp | 8 ++++++-- src/GraphDiff/VCFTester.h | 1 + src/Util/VCFUtil.cpp | 19 +++++++++++++++++-- src/Util/VCFUtil.h | 1 + 9 files changed, 79 insertions(+), 15 deletions(-) diff --git a/src/GraphDiff/DindelRealignWindow.cpp b/src/GraphDiff/DindelRealignWindow.cpp index 2529cf9a..e3b7147a 100644 --- a/src/GraphDiff/DindelRealignWindow.cpp +++ b/src/GraphDiff/DindelRealignWindow.cpp @@ -1435,24 +1435,37 @@ void DindelRealignWindowResult::Inference::outputAsVCF(const DindelVariant & var record.addComment("SB", this->strandBias); record.addComment("NumLib", this->numLibraries); record.addComment("NumFragments", this->numReadNames); - out.push_back(record); - /* // HistDist std::stringstream histdist_ss; - std::copy(histDistance.begin(), histDistance.end(), std::ostream_iterator(histdist_ss, ",")); + for (size_t x=0;x0) + histdist_ss << ","; + histdist_ss << histDistance[x]; + } record.addComment("HistDist", histdist_ss.str()); // HistLik std::stringstream histlik_ss; - std::copy(histAlignLik.begin(), histAlignLik.end(), std::ostream_iterator(histdist_ss, ",")); + for (size_t x=0;x0) + histlik_ss << ","; + histlik_ss << histAlignLik[x]; + } record.addComment("HistLik", histlik_ss.str()); // HistMapQ std::stringstream histmap_ss; - std::copy(histMapQ.begin(), histMapQ.end(), std::ostream_iterator(histMapQ_ss, ",")); - record.addComment("HistMapQ", histlik_ss.str()); + for (size_t x=0;x0) + histmap_ss << ","; + histmap_ss << histMapQ[x]; + } + record.addComment("HistMapQ", histmap_ss.str()); + + out.push_back(record); + /* out.precision(5); out.setf(std::ios::fixed,std::ios::floatfield); out << var.getChrom() << "\t" << var.getPos() << "\t"; diff --git a/src/GraphDiff/DindelUtil.cpp b/src/GraphDiff/DindelUtil.cpp index 6dacc647..8359308b 100644 --- a/src/GraphDiff/DindelUtil.cpp +++ b/src/GraphDiff/DindelUtil.cpp @@ -21,7 +21,8 @@ DindelReturnCode DindelUtil::runDindelPairMatePair(const std::string& id, const StringVector& variant_haplotypes, const GraphCompareParameters& parameters, std::ostream& baseOut, - std::ostream& variantOut) + std::ostream& variantOut, + std::ostream& callsOut) { PROFILE_FUNC("runDindelPairMatePair") @@ -283,7 +284,7 @@ DindelReturnCode DindelUtil::runDindelPairMatePair(const std::string& id, pPreviousResult = pThisResult; } - // Copy VCFRecords to output + // Copy raw VCFRecords to output for(size_t i = 0; i <= 1; ++i) { std::ostream& curr_out = i == 0 ? baseOut : variantOut; @@ -291,6 +292,21 @@ DindelReturnCode DindelUtil::runDindelPairMatePair(const std::string& id, curr_out << vcfRecords[i][j] << "\n"; } + // Make comparative calls + size_t VARIANT_IDX = 1; + size_t BASE_IDX = 0; + bool has_base_calls = !vcfRecords[BASE_IDX].empty(); + for(size_t i = 0; i < vcfRecords[1].size(); ++i) + { + bool not_called_in_base = true; + if(has_base_calls) + not_called_in_base = vcfRecords[BASE_IDX][i].passStr == "NoCall"; + + bool called_in_variant = vcfRecords[VARIANT_IDX][i].passStr == "PASS"; + if(called_in_variant && not_called_in_base) + callsOut << vcfRecords[VARIANT_IDX][i] << "\n"; + } + baseOut.flush(); variantOut.flush(); diff --git a/src/GraphDiff/DindelUtil.h b/src/GraphDiff/DindelUtil.h index f0408e58..a0b68d3c 100644 --- a/src/GraphDiff/DindelUtil.h +++ b/src/GraphDiff/DindelUtil.h @@ -36,7 +36,8 @@ namespace DindelUtil const StringVector& variant_haplotypes, const GraphCompareParameters& parameters, std::ostream& baseOut, - std::ostream& variantOut); + std::ostream& variantOut, + std::ostream& callsOut); // Run a naive caller DindelReturnCode runNaiveCaller(const std::string& normalString, diff --git a/src/GraphDiff/GraphCompare.cpp b/src/GraphDiff/GraphCompare.cpp index 01185eeb..939b93a3 100644 --- a/src/GraphDiff/GraphCompare.cpp +++ b/src/GraphDiff/GraphCompare.cpp @@ -175,12 +175,14 @@ GraphCompareResult GraphCompare::process(const SequenceWorkItem& item) std::stringstream baseVCFSS; std::stringstream variantVCFSS; + std::stringstream callsVCFSS; DindelReturnCode drc = DindelUtil::runDindelPairMatePair(kmer, build_result.base_haplotypes, build_result.variant_haplotypes, m_parameters, baseVCFSS, - variantVCFSS); + variantVCFSS, + callsVCFSS); // if(m_parameters.verbose > 0 || 1) @@ -195,6 +197,7 @@ GraphCompareResult GraphCompare::process(const SequenceWorkItem& item) { result.baseVCFStrings.push_back(baseVCFSS.str()); result.variantVCFStrings.push_back(variantVCFSS.str()); + result.calledVCFStrings.push_back(callsVCFSS.str()); } } } @@ -498,13 +501,15 @@ void GraphCompare::testKmer(const std::string& kmer) std::stringstream baseVCFSS; std::stringstream variantVCFSS; + std::stringstream callsVCFSS; DindelReturnCode drc = DindelUtil::runDindelPairMatePair(kmer, build_result.base_haplotypes, build_result.variant_haplotypes, m_parameters, baseVCFSS, - variantVCFSS); + variantVCFSS, + callsVCFSS); std::cout << "base: " << baseVCFSS.str() << "\n"; std::cout << "variant: " << variantVCFSS.str() << "\n"; @@ -533,6 +538,7 @@ void GraphCompare::showMappingLocations(const std::string& str) // GraphCompareAggregateResults::GraphCompareAggregateResults(const std::string& fileprefix) : m_baseVCFFile(fileprefix + ".base.vcf","w"), m_variantVCFFile(fileprefix + ".variant.vcf","w"), + m_callsVCFFile(fileprefix + ".calls.vcf","w"), m_numVariants(0) { // @@ -541,6 +547,7 @@ GraphCompareAggregateResults::GraphCompareAggregateResults(const std::string& fi // m_baseVCFFile.outputHeader("stub", "stub"); m_variantVCFFile.outputHeader("stub", "stub"); + m_callsVCFFile.outputHeader("stub", "stub"); // Initialize mutex int ret = pthread_mutex_init(&m_mutex, NULL); @@ -601,6 +608,10 @@ void GraphCompareAggregateResults::process(const SequenceWorkItem& /*item*/, con m_baseVCFFile.getOutputStream() << result.baseVCFStrings[i]; m_variantVCFFile.getOutputStream() << result.variantVCFStrings[i]; } + + // Write out the final calls + for(size_t i = 0; i < result.calledVCFStrings.size(); ++i) + m_callsVCFFile.getOutputStream() << result.calledVCFStrings[i]; } // diff --git a/src/GraphDiff/GraphCompare.h b/src/GraphDiff/GraphCompare.h index 65f10bb6..15e7827f 100644 --- a/src/GraphDiff/GraphCompare.h +++ b/src/GraphDiff/GraphCompare.h @@ -82,6 +82,7 @@ struct GraphCompareResult StringVector baseVCFStrings; StringVector variantVCFStrings; + StringVector calledVCFStrings; }; // @@ -204,6 +205,7 @@ class GraphCompareAggregateResults // VCF output files VCFFile m_baseVCFFile; VCFFile m_variantVCFFile; + VCFFile m_callsVCFFile; size_t m_numVariants; }; diff --git a/src/GraphDiff/VCFTester.cpp b/src/GraphDiff/VCFTester.cpp index e54aab09..2e33e03f 100644 --- a/src/GraphDiff/VCFTester.cpp +++ b/src/GraphDiff/VCFTester.cpp @@ -19,10 +19,12 @@ VCFTester::VCFTester(const GraphCompareParameters& params) : m_parameters(params), m_graphComparer(params), m_baseVCFFile("vtest.base.vcf","w"), - m_variantVCFFile("vtest.variant.vcf","w") + m_variantVCFFile("vtest.variant.vcf","w"), + m_callsVCFFile("vtest.calls.vcf","w") { m_baseVCFFile.outputHeader("stub", "stub"); m_variantVCFFile.outputHeader("stub", "stub"); + m_callsVCFFile.outputHeader("stub", "stub"); DindelUtil::initializeCodeCounts(m_returnCodes); } @@ -63,6 +65,7 @@ void VCFTester::process(const VCFFile::VCFEntry& record) // Run dindel std::stringstream baseSS; std::stringstream variantSS; + std::stringstream callsSS; std::string id = "."; StringVector ref_haplotypes; @@ -75,7 +78,8 @@ void VCFTester::process(const VCFFile::VCFEntry& record) var_haplotypes, m_parameters, baseSS, - variantSS); + variantSS, + callsSS); m_returnCodes[code] += 1; diff --git a/src/GraphDiff/VCFTester.h b/src/GraphDiff/VCFTester.h index 860083a0..f8e0c2bb 100644 --- a/src/GraphDiff/VCFTester.h +++ b/src/GraphDiff/VCFTester.h @@ -49,6 +49,7 @@ class VCFTester // Output file VCFFile m_baseVCFFile; VCFFile m_variantVCFFile; + VCFFile m_callsVCFFile; }; #endif diff --git a/src/Util/VCFUtil.cpp b/src/Util/VCFUtil.cpp index efd0e50d..057483a8 100644 --- a/src/Util/VCFUtil.cpp +++ b/src/Util/VCFUtil.cpp @@ -40,6 +40,16 @@ void VCFRecord::addComment(const std::string& key, const int& value) addComment(key, v_str.str()); } +void VCFRecord::addComment(const std::string& key, const double& value) +{ + std::stringstream v_str; + v_str.precision(5); + v_str.setf(std::ios::fixed,std::ios::floatfield); + v_str << value; + addComment(key, v_str.str()); +} + + void VCFRecord::setPassStr(const std::string str) { passStr = str; @@ -71,8 +81,13 @@ std::ostream& operator<<(std::ostream& o, VCFRecord& record) o << record.varStr << "\t"; o << (int)record.quality << "\t"; o << record.passStr << "\t"; - std::copy( record.comments.begin(), record.comments.end(), - std::ostream_iterator(o, ";")); // comments + + for(size_t i = 0; i < record.comments.size(); ++i) + { + if(i > 0) + o << ";"; + o << record.comments[i]; + } return o; } diff --git a/src/Util/VCFUtil.h b/src/Util/VCFUtil.h index 116ae999..74bf5bd6 100644 --- a/src/Util/VCFUtil.h +++ b/src/Util/VCFUtil.h @@ -42,6 +42,7 @@ struct VCFRecord VCFRecord(); void addComment(const std::string& key, const std::string& value); void addComment(const std::string& key, const int& value); + void addComment(const std::string& key, const double& value); void setPassStr(const std::string str); void setQuality(const double q);