Skip to content

Commit

Permalink
-a
Browse files Browse the repository at this point in the history
  • Loading branch information
mtw committed Nov 27, 2014
1 parent 34a4525 commit 21e34fa
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 92 deletions.
6 changes: 4 additions & 2 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ Revision history for Perl extension ViennaNGS.
- everything is exported via @EXPORT_OK now
- updated README

0.10
0.10 Thu Nov 27 2014
- requires Perl >= 5.12.0
- added sortbed()
- added uniquify_bam()
- added scripts/bam_uniq.pl
- added scripts/sj_visualizer.pl
- refactored scripts/motiffinda.pl
- updated scripts/assembly_hub_constructer.pl
- removed development version of scripts/assembly_hub_constructer.pl
- made scripts/gff2bed.pl work with OO Bio::ViennaNGS::AnnoC
- removed get_stranded_subsequence (now lives in Bio::ViennaNGS::Fasta)
- updated POD
4 changes: 4 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ statistics. Optionally create BED output, as well as normalized bedGraph
and bigWig files for coverage visualization in genome browsers (see
dependencies on third-patry tools below).

bam_uniq.pl
Extract unique and multi mapping reads from BAM alignment files and create
a separate BAM file for both uniqe (.uniq.) and multi (.mult.) mappers.

gff2bed.pl:
Convert RefSeq GFF3 annotation files to BED12 format. Individual BED12
files are created for each feature type (CDS/tRNA/rRNA/etc.). Tested with
Expand Down
86 changes: 80 additions & 6 deletions lib/Bio/ViennaNGS.pm
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# -*-CPerl-*-
# Last changed Time-stamp: <2014-10-22 15:49:09 mtw>
# Last changed Time-stamp: <2014-11-27 14:21:31 mtw>

package Bio::ViennaNGS;

use 5.12.0;
use Exporter;
use version; our $VERSION = qv('0.10_03');
use version; our $VERSION = qv('0.10');
use strict;
use warnings;
use Bio::Perl 1.006924;
Expand All @@ -19,7 +19,7 @@ use Carp;

our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = qw ( split_bam bam2bw bed2bw sortbed
our @EXPORT_OK = qw ( split_bam uniquify_bam bam2bw bed2bw sortbed
bed2bigBed computeTPM featCount_data
parse_multicov write_multicov totalreads );

Expand Down Expand Up @@ -291,6 +291,68 @@ sub split_bam {
return @processed_files;
}

# uniquify_bam ($bam,$dest,$log)
# Deconvolute BAM files into unique and multi mappers
# Returns array of filenames to .uniq. and .mult. BAM files
sub uniquify_bam {
my ($bamfile,$dest,$log) = @_;
my ($bam, $bn,$path,$ext,$read,$header);
my ($tmp_uniq,$tmp_mult,$fn_uniq,$fn_mult,$bam_uniq,$bam_mult);
my ($count_all,$count_uniq,$count_mult) = (0)x3;
my @processed_files = ();
my $this_function = (caller(0))[3];

croak "ERROR [$this_function] Cannot find $bamfile\n"
unless (-e $bamfile);
croak "ERROR [$this_function] $dest does not exist\n"
unless (-d $dest);

($bn,$path,$ext) = fileparse($bamfile, qr /\..*/);

(undef,$tmp_uniq) = tempfile('BAM_UNIQ_XXXXXXX',UNLINK=>0);
(undef,$tmp_mult) = tempfile('BAM_MULT_XXXXXXX',UNLINK=>0);

$bam = Bio::DB::Bam->open($bamfile, "r");
$fn_uniq = file($dest,$bn.".uniq".$ext);
$fn_mult = file($dest,$bn.".mult".$ext);
$header = $bam->header; # TODO: modify header, leave traces ...

$bam_uniq = Bio::DB::Bam->open($tmp_uniq,'w')
or croak "ERROR [$this_function] Cannot open temp file for writing: $!";
$bam_mult = Bio::DB::Bam->open($tmp_mult,'w')
or croak "ERROR [$this_function] Cannot open temp file for writing: $!";
$bam_uniq->header_write($header);
$bam_mult->header_write($header);

while ($read = $bam->read1() ) {
$count_all++;
if ($read->aux_get("NH") == 1){ # uniquely mapped reads
$bam_uniq->write1($read);
$count_uniq++;
}
else { # multiply mapped reads
$bam_mult->write1($read);
$count_mult++;
}
}

croak "ERROR [$this_function] Read counts don't match\n"
unless ($count_uniq + $count_mult == $count_all);

rename ($tmp_uniq, $fn_uniq);
rename ($tmp_mult, $fn_mult);
push (@processed_files, $fn_uniq);
push (@processed_files, $fn_mult);

if (defined $log){
my $lf = file($dest,$log);
open(LOG, ">>", $lf) or croak $!;
printf LOG "%15d reads total\n%15d unique reads\n%15d multiple reads\n",
$count_all,$count_uniq,$count_mult;
close(LOG);
}
}

# bam2bw ( $bam,$chromsizes )
# Generate BedGraph and BigWig coverage from BAM via two third-party tools:
# genomeCoverageBed from BEDtools
Expand Down Expand Up @@ -577,6 +639,9 @@ analysis
# split a single-end or paired-end BAM file by strands
@result = split_bam($bam_in,$rev,$want_uniq,$want_bed,$destdir,$logfile);
# extract unique and multi mappers from a BAM file
@result = uniquify_bam($bam_in,$outdir,$logfile);
# make bigWig from BAM
bam2bw($bam_in,$chromsizes)
Expand All @@ -600,10 +665,11 @@ analysis
=head1 DESCRIPTION
ViennaNGS is a collection of utilities and subroutines for building
Next-Generation Sequencing (NGS) data analysis pipelines.
Bio::ViennaNGS is a collection of utilities and subroutines for
building efficient Next-Generation Sequencing (NGS) data analysis
pipelines.
=over
=over
=item split_bam($bam,$reverse,$want_uniq,$want_bed,$dest_dir,$log)
Expand Down Expand Up @@ -633,6 +699,14 @@ separately, thus allowing for scenarios where eg. one read is a
multi-mapper, whereas its associate mate is a unique mapper, resulting
in an ambiguous alignment of the entire fragment.
=item uniquify_bam($bam,$dest,$log)
Extract I<unique> and I<multi> mapping reads from BAM file
C<$bam>. New BAM files for unique and multi mappers are created in the
output folder C<$dest>, which are named B<basename.uniq.bam> and
B<basename.mult.bam>, respectively. If defined, a logfile named
C<$log> is created in the output folder.
=item bam2bw($bam,$chromsizes)
Creates BedGraph and BigWig coverage profiles from BAM files. These
Expand Down
84 changes: 0 additions & 84 deletions scripts/assembly_hub_constructer.pl

This file was deleted.

0 comments on commit 21e34fa

Please sign in to comment.