Skip to content

Commit

Permalink
Merge pull request #544 from maxibor/dev
Browse files Browse the repository at this point in the history
Add bam filtering on fragment length script
  • Loading branch information
jfy133 committed Sep 11, 2020
2 parents baa0d60 + a40ae71 commit 249d5f4
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
* [#512](https://github.com/nf-core/eager/issues/512) Added flexible trimming of bams by library type. 'half' and 'none' UDG libraries can now be trimmed differentially within a single eager run.
* Added a `.dockstore.yml` config file for automatic workflow registration with [dockstore.org](https://dockstore.org/)
* Updated template to nf-core/tools 1.10.2
* [#544](https://github.com/nf-core/eager/pull/544) Add script to perform bam filtering on fragment length

### `Fixed`

Expand Down
82 changes: 82 additions & 0 deletions bin/filter_bam_fragment_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3

import argparse
import pysam


def get_args():
"""This function parses and return arguments passed in"""
parser = argparse.ArgumentParser(
prog="bam_filter", description="Filter bam on fragment length"
)
parser.add_argument("bam", help="Bam aligment file")
parser.add_argument(
"-l",
dest="fraglen",
default=35,
type=int,
help="Minimum fragment length. Default = 35",
)
parser.add_argument(
"-a",
dest="all",
default=False,
action="store_true",
help="Include all reads, even unmapped",
)
parser.add_argument(
"-o",
dest="output",
default=None,
help="Output bam basename. Default = {bam_basename}.filtered.bam",
)

args = parser.parse_args()

bam = args.bam
fraglen = args.fraglen
allreads = args.all
outfile = args.output

return (bam, fraglen, allreads, outfile)


def getBasename(file_name):
if ("/") in file_name:
basename = file_name.split("/")[-1].split(".")[0]
else:
basename = file_name.split(".")[0]
return basename


def filter_bam(infile, outfile, fraglen, allreads):
"""Write bam to file
Args:
infile (stream): pysam stream
outfile (str): Path to output bam
fraglen(int): Minimum fragment length to keep
allreads(bool): Apply on all reads, not only mapped
"""
bamfile = pysam.AlignmentFile(infile, "rb")
bamwrite = pysam.AlignmentFile(outfile + ".filtered.bam", "wb", template=bamfile)

for read in bamfile.fetch(until_eof=True):
if allreads:
if read.query_length >= fraglen:
bamwrite.write(read)
else:
if read.is_unmapped == False and read.query_length >= fraglen:
bamwrite.write(read)


if __name__ == "__main__":
BAM, FRAGLEN, ALLREADS, OUTFILE = get_args()

BAMFILE = pysam.AlignmentFile(BAM, "rb")

if OUTFILE is None:
OUTFILE = getBasename(BAM)

filter_bam(BAM, OUTFILE, FRAGLEN, ALLREADS)

0 comments on commit 249d5f4

Please sign in to comment.