In [1]:
%pylab inline
import pyBigWig

def _multiprocess_gene_coverage(data):
    """Process gene_c overage given a bigwig and a genegroup.

    WigReader is not pickleable when passed as an argument so we use strings
    as input for the bigwig

    Parameters
    ----------
    data: tuple
          gene_gorup, bigwig, offset_5p, offset_3p, max_positions, orientation

    Returns
    -------
    norm_cov: Series
              normalized coverage
    """
    gene_group, bw, offset_5p, offset_3p, max_positions, orientation = data
    bw = WigReader(bw)
    coverage, gene_offset_5p, gene_offset_3p = gene_coverage(
        gene_group, bw, offset_5p, offset_3p)
    coverage = coverage.fillna(0)

    if orientation == '5prime':
        if max_positions is not None and len(coverage.index) > 0:
            # min_index will correspond to the gene_offset_5p in general
            min_index = min(coverage.index.tolist())
            max_index = max(coverage.index.tolist())
            assert min_index == -gene_offset_5p, 'min_index and gene_offset_5p are not same| min_index: {} | gene_offset_5p: {}'.format(
                min_index, -gene_offset_5p)
            coverage = coverage[np.arange(min_index,
                                          min(max_index, max_positions))]
    elif orientation == '3prime':
        # We now want to be tracking things from the end position
        # we can do this since gene_coverage() takes care of the strand
        # so a 3prime is always the tail of the array
        # note that if gene_offset_5p >0, in this case, it is almost never used
        # since we restrict ourselves to max_positions, which itself is almost
        # always < 1000
        if max_positions is not None and len(coverage.index) > 0:
            max_index = max(coverage.index.tolist())
            min_index = min(coverage.index.tolist())
            assert min_index == -gene_offset_5p, 'min_index and gene_offset_5p are not same| min_index: {} | gene_offset_5p: {}'.format(
                min_index, -gene_offset_5p)
            # max_index is the maximum we can go to the right
            # our stop codon will be located gene_offset_3p upstream of this index
            # Let's reindex our series so that we set
            coverage = coverage.reindex(np.arange(-max_index, -min_index, 1))
            coverage = coverage[np.arange(-max_positions, gene_offset_3p)]
    else:
        raise ValueError('{} orientation not supported'.format(orientation))

    assert coverage is not None, 'coverage is none | max_index={} | min_index={}| gene_offset_3p={} | gene_offset_5p={}'.format(
        max_index, min_index, gene_offset_3p, gene_offset_5p)
    coverage_mean = coverage.mean()
    norm_cov = coverage / coverage_mean
    norm_cov = norm_cov.fillna(0)
    bw.close()
    return norm_cov

Populating the interactive namespace from numpy and matplotlib


ModuleNotFoundError: No module named 'pygtf'