Skip to content

Commit

Permalink
Have the basic bam prep and seed collection parts of the tools comple…
Browse files Browse the repository at this point in the history
…te. Just need to implement the actual algorithm now! Also may want to add some unit tests for the seed collector just to make sure there are no problems.
  • Loading branch information
Daniel Mapleson committed May 15, 2014
1 parent 1ff8b45 commit 630fb3d
Show file tree
Hide file tree
Showing 10 changed files with 250 additions and 105 deletions.
Binary file added Post_alignment_RNA-Seq_filtering_tool.pdf
Binary file not shown.
49 changes: 29 additions & 20 deletions configure.ac
Expand Up @@ -78,26 +78,10 @@ AC_CHECK_HEADERS([getopt.h])
AC_CHECK_LIB(pthread, pthread_exit)
AC_CHECK_LIB(z, gzread)

## Check for boost
BOOST_REQUIRE([1.52])

# Header only
BOOST_STRING_ALGO
BOOST_FOREACH
BOOST_UNORDERED

# Header + lib
BOOST_FILESYSTEM
BOOST_PROGRAM_OPTIONS
BOOST_SYSTEM
BOOST_THREADS
BOOST_TEST

# Combine BOOST variables (apart for BOOST_TEST)
BOOST_LDFLAGS="${BOOST_FILESYSTEM_LDFLAGS} ${BOOST_PROGRAM_OPTIONS_LDFLAGS} ${BOOST_SYSTEM_LDFLAGS} ${BOOST_THREAD_LDFLAGS}"
BOOST_LIBS="${BOOST_FILESYSTEM_LIBS} ${BOOST_PROGRAM_OPTIONS_LIBS} ${BOOST_SYSTEM_LIBS} ${BOOST_THREAD_LIBS}"

# Allow users to specify custom directories for seqan library and doxygen. This might save some users from having to set the environment variables manually.
# Allow users to specify custom directories for samtools and bamtools.
# This might save some users from having to set the environment variables manually.

AC_ARG_WITH([samtools],
[AS_HELP_STRING([--with-samtools=PREFIX],
Expand Down Expand Up @@ -133,15 +117,40 @@ LDFLAGS="${BAMTOOLS_LDFLAGS} ${SAMTOOLS_LDFLAGS} ${AM_LDFLAGS} ${LDFLAGS}"

#### Put any header or lib checks here ####

AC_CHECK_HEADER([api/BamMultiReader.h], [], [
AC_MSG_ERROR([Bamtools not found. Please ensure that the bamtools include directory can be found on the CPPFLAGS env var. Also check that the Bamtools lib directory can be found on the LDFLAGS env var.])
])

AC_CHECK_LIB([bam], [fai_build], [], [echo "Samtools not found" exit -1], [])
AC_CHECK_HEADER([api/BamMultiReader.h], [], [echo "Bamtools not found" exit -1], [])
AC_CHECK_LIB(bam, fai_build, [], [
AC_MSG_ERROR([Samtools not found. Please ensure that the Samtools include directory can be found on the CPPFLAGS env var. Also check that the Samtools lib directory can be found on the LDFLAGS env var.])
])

# Restore the previous CPPFLAGS
CPPFLAGS=${OLD_CPPFLAGS}
LDFLAGS=${OLD_LDFLAGS}


## Check for boost
BOOST_REQUIRE([1.52])

# Header only
BOOST_STRING_ALGO
BOOST_FOREACH
BOOST_UNORDERED

# Header + lib
BOOST_FILESYSTEM
BOOST_PROGRAM_OPTIONS
BOOST_SYSTEM
BOOST_THREADS
BOOST_TEST

# Combine BOOST variables (apart for BOOST_TEST)
BOOST_LDFLAGS="${BOOST_FILESYSTEM_LDFLAGS} ${BOOST_PROGRAM_OPTIONS_LDFLAGS} ${BOOST_SYSTEM_LDFLAGS} ${BOOST_THREAD_LDFLAGS}"
BOOST_LIBS="${BOOST_FILESYSTEM_LIBS} ${BOOST_PROGRAM_OPTIONS_LIBS} ${BOOST_SYSTEM_LIBS} ${BOOST_THREAD_LIBS}"



#########

AC_SUBST([AM_CPPFLAGS])
Expand Down
81 changes: 56 additions & 25 deletions include/seed_collector.hpp
Expand Up @@ -22,7 +22,7 @@
#include <boost/foreach.hpp>

#include <api/BamReader.h>
#include <api/BamMultiReader.h>
#include <api/BamWriter.h>

using std::vector;

Expand All @@ -33,46 +33,77 @@ namespace portculis {
class SeedCollector {
private:

vector<string> bamFiles;
bool collectiveMode;

vector<BamReader> singleReaders;
BamMultiReader multiReader;
string sortedBam;
string seedFile;
bool verbose;

BamReader reader;
BamWriter writer;

SamHeader header;
RefVector refs;

protected:

bool isPotentialSeed(BamAlignment& al) {
BOOST_FOREACH(CigarOp op, al.CigarData) {
if (op.Type == 'N') {
return true;
}
}

return false;
}

public:

SeedCollector(vector<string> _bamFiles, bool _collectiveMode) :
bamFiles(_bamFiles), collectiveMode(_collectiveMode) {
SeedCollector(string _sortedBam, string _seedFile, bool _verbose) :
sortedBam(_sortedBam), seedFile(_seedFile), verbose(_verbose) {

if (collectiveMode) {
if (!multiReader.Open(bamFiles)) {
throw "Could not open bam files";
}

header = reader.GetHeader();
refs = reader.GetReferenceData();
if (!reader.Open(sortedBam)) {
throw "Could not open bam reader";
}
else {
BOOST_FOREACH(string bamFile, bamFiles) {
BamReader reader;
if (!reader.Open(bamFile)) {
throw "Could not open bam files";
}
singleReaders.push_back(reader);
}
}

if (!writer.Open(seedFile, header, refs)) {
throw "Could not open bam writer";
}

header = reader.GetHeader();
refs = reader.GetReferenceData();

if (verbose) {

cout << endl << "Collecting seeds from: " << sortedBam << endl;
cout << "Header:" << endl << header.ToString() << endl;
cout << "Refs:" << endl;

BOOST_FOREACH(RefData ref, refs) {
cout << ref.RefLength << " - " << ref.RefName << endl;
}
}
}

virtual ~SeedCollector() {
reader.Close();
writer.Close();
}

void sort() {
uint64_t collect() {

BamAlignment al;
uint64_t count = 0;
while(reader.GetNextAlignment(al))
{
if (isPotentialSeed(al)) {
writer.SaveAlignment(al);
count++;
}
}

return count;
}


};
}

4 changes: 3 additions & 1 deletion src/Makefile.am
Expand Up @@ -9,7 +9,9 @@ portculis_CPPFLAGS = -DCPLUSPLUS @BAMTOOLS_CPPFLAGS@ @SAMTOOLS_CPPFLAGS@ @BOOST_
portculis_LDFLAGS = @BAMTOOLS_LDFLAGS@ @SAMTOOLS_LDFLAGS@ @BOOST_LDFLAGS@ @AM_LDFLAGS@
portculis_LDADD = -lbam -lbamtools @BOOST_LIBS@ -lpthread -lz
portculis_SOURCES = \
genome_mapper.hpp \
seed_collector.hpp \
bamtools_sort.h \
bamtools_sort.cpp \
bamtools_sort.cpp \
portculis.cc

0 comments on commit 630fb3d

Please sign in to comment.