Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 24e586c797
Fetching contributors…

Cannot retrieve contributors at this time

182 lines (164 sloc) 6.48 kb
function [coverage reads_ok intron_list read_starts paired_reads] = get_coverage_per_read(CFG, gene, reverse_ret)
% GET_COVERAGE_PER_READ Gets the reads from the BAM file covering the gene region.
%
% [coverage reads_ok intron_list read_starts paired_reads] = get_coverage_per_read(CFG, gene, reverse_ret)
%
% -- input --
% CFG: configuration struct
% gene: struct defining a gene with start, stops, exons etc.
% reverse_ret: if true, the positions are considered in reverse
% direction on the reverse strand
%
% -- output --
% coverage: matrix of exonic positions x reads
% reads_ok: indicates success of file parsing
% intron_list: nx4 list of introns
% (intron start, intron stop, confirmation, strand)
% read_starts: vector of number of reads starting at the given exonic positions
% paired_reads: struct including starts, stops and mates for paired-end reads
%
%
% This program is free software; you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 3 of the License, or
% (at your option) any later version.
%
% Written (W) 2009-2011 Regina Bohnert, Gunnar Raetsch
% Copyright (C) 2009-2011 Max Planck Society
%
% initialisation
coverage = [];
reads_ok = 1;
intron_list = zeros(2, 0);
read_starts = zeros(0, 1);
paired_reads.starts = zeros(1, 0);
paired_reads.stops = zeros(1, 0);
paired_reads.mates = zeros(2, 0);
collapse = double(~CFG.paired & nargout<4);
if nargin<3
reverse_ret = 0;
end
if nargout==5
assert(CFG.paired==1);
end
if ~isfield(CFG, 'both_strands')
CFG.both_strands = 0;
end
if CFG.both_strands
strand = '0';
else
strand = gene.strand;
end
% parameters for get_reads
if ~isfield(CFG, 'tracks_max_intron_len')
CFG.tracks_max_intron_len = 1e9;
end
if ~isfield(CFG, 'tracks_min_exon_len')
CFG.tracks_min_exon_len = -1;
end
if ~isfield(CFG, 'tracks_max_mismatches')
CFG.tracks_max_mismatches = CFG.read_len;
end
subsample = 1000; mapped = 1; spliced = 1; maxminlen = 0;
win = CFG.read_len;
eidx = [max(gene.eidx(1)-win,1):gene.eidx(1)-1, gene.eidx, gene.eidx(end)+1:gene.eidx(end)+win];
win_size = length(max(gene.eidx(1)-win,1):gene.eidx(1)-1);
for f = 1:length(CFG.tracks_fn{gene.chr_num}),
fname = CFG.tracks_fn{gene.chr_num}{f};
if ~exist(fname, 'file'),
warning('BAM file %s does not exist', fname);
end
try
if nargout==5
[coverage_idx_tmp{f}, intron_list_tmp, cov_paired_tmp, mates_tmp] = get_reads(fname, gene.chr, eidx(1), eidx(end), strand, collapse, subsample, CFG.tracks_max_intron_len, CFG.tracks_min_exon_len, CFG.tracks_max_mismatches, mapped, spliced, maxminlen, CFG.paired);
clear cov_paired_tmp;
elseif nargout==3 || nargout==4
[coverage_idx_tmp{f}, intron_list_tmp] = get_reads(fname, gene.chr, eidx(1), eidx(end), strand, collapse, subsample, CFG.tracks_max_intron_len, CFG.tracks_min_exon_len, CFG.tracks_max_mismatches, mapped, spliced, maxminlen, CFG.paired);
else
[coverage_idx_tmp{f}] = get_reads(fname, gene.chr, eidx(1), eidx(end), strand, collapse, subsample, CFG.tracks_max_intron_len, CFG.tracks_min_exon_len, CFG.tracks_max_mismatches, mapped, spliced, maxminlen, CFG.paired);
end
catch
warning('get_reads failed');
intron_list = intron_list';
reads_ok = 0;
return;
end
if exist('intron_list_tmp', 'var')
if ~isempty(intron_list_tmp),
intron_list = [intron_list intron_list_tmp];
end
end
if exist('mates_tmp', 'var')
if ~isempty(mates_tmp),
paired_reads.mates = [paired_reads.mates mates_tmp+1];
end
end
end
% process coverage: convert to exonic position indices
coverage_idx = [coverage_idx_tmp{:}];
if ~collapse
coverage_idx = unique(coverage_idx', 'rows')'; % no overlapping reads
if ~isempty(coverage_idx)
coverage = sparse(coverage_idx(1,:), coverage_idx(2,:), 1, max(coverage_idx(1,:)), eidx(end)-eidx(1)+1)';
else
coverage = sparse([], [], 1, eidx(end)-eidx(1)+1, 0);
end
coverage = coverage(eidx(win_size+1:win_size+gene.exonic_len)-eidx(1)+1, :);
% no overlapping reads
assert(~any(any(full(coverage>1))));
else
coverage = sum(coverage_idx, 1);
coverage = sparse(coverage(eidx(win_size+1:win_size+gene.exonic_len)-eidx(1)+1)');
end
% process intron list (1: intron start, 2: intron stop, 3: confirmation, 4: strand)
if nargout>2
intron_list = [intron_list', zeros(size(intron_list,2), 1), (gene.strand=='-')*ones(size(intron_list,2), 1)];
if ~isempty(intron_list)
intron_list_unique = unique(intron_list, 'rows');
for n = 1:size(intron_list_unique,1),
intron_list_unique(n,3) = sum(intron_list_unique(n,1)==intron_list(:,1) & ...
intron_list_unique(n,2)==intron_list(:,2) & ...
intron_list_unique(n,4)==intron_list(:,4));
end
intron_list = intron_list_unique;
clear intron_list_unique;
end
end
% process read starts: count reads starting at each exonic position
if nargout>3
read_starts = zeros(gene.exonic_len, 1);
for c = 1:size(coverage, 2),
fidx = find(coverage(:,c)~=0, 1, 'first');
if ~isempty(fidx),
if fidx==1 && sum(coverage(:,c),1)<CFG.read_len, continue; end
read_starts(fidx) = read_starts(fidx) + 1;
end
end
end
% process read starts and stops: store indices (eidx based)
if nargout>4
paired_reads.starts = nan(1, size(coverage, 2));
paired_reads.stops = nan(1, size(coverage, 2));
for c = 1:size(coverage, 2),
fidx = find(coverage(:,c)~=0, 1, 'first');
lidx = find(coverage(:,c)~=0, 1, 'last');
if ~isempty(fidx) && ~isempty(lidx)
if (fidx==1 || lidx==size(coverage, 1)) && sum(coverage(:,c),1)<CFG.read_len, continue; end
paired_reads.starts(c) = gene.eidx(fidx);
paired_reads.stops(c) = gene.eidx(lidx);
end
end
idx = find(~isnan(paired_reads.starts) & ~isnan(paired_reads.stops));
[C fidx1] = intersect(paired_reads.mates(1,:), idx);
[C fidx2] = intersect(paired_reads.mates(2,:), idx);
paired_reads.mates = paired_reads.mates(:,intersect(fidx1, fidx2));
end
% collapse coverage as single reads are not required anymore
if CFG.paired || nargout>3
coverage = sum(coverage, 2);
end
% reverse for minus strand
if reverse_ret && gene.strand=='-'
rev_idx = size(coverage,1):-1:1;
coverage = coverage(rev_idx,:);
end
Jump to Line
Something went wrong with that request. Please try again.