We can import the package using:

In [6]:
import pyBigWig
from pyfasta import Fasta

Now we are ready to play around with pyBigWig! We can open a BigWig file using:

In [2]:
bw = pyBigWig.open("/home/iker/GenomeDK/nnPib2019/data/training_data/C14M18.bigwig")

Let's first check is `bw` is really a BigWig file:

In [3]:
bw.isBigWig()

True

It is! Now, we can look at the different chromosomes in the file:

In [4]:
bw.chroms()

{'chr1': 248956422,
 'chr10': 133797422,
 'chr11': 135086622,
 'chr11_KI270721v1_random': 100316,
 'chr12': 133275309,
 'chr13': 114364328,
 'chr14': 107043718,
 'chr14_GL000009v2_random': 201709,
 'chr14_GL000194v1_random': 191469,
 'chr14_GL000225v1_random': 211173,
 'chr14_KI270722v1_random': 194050,
 'chr14_KI270723v1_random': 38115,
 'chr14_KI270724v1_random': 39555,
 'chr14_KI270725v1_random': 172810,
 'chr14_KI270726v1_random': 43739,
 'chr15': 101991189,
 'chr15_KI270727v1_random': 448248,
 'chr16': 90338345,
 'chr16_KI270728v1_random': 1872759,
 'chr17': 83257441,
 'chr17_GL000205v2_random': 185591,
 'chr17_KI270729v1_random': 280839,
 'chr17_KI270730v1_random': 112551,
 'chr18': 80373285,
 'chr19': 58617616,
 'chr1_KI270706v1_random': 175055,
 'chr1_KI270708v1_random': 127682,
 'chr1_KI270709v1_random': 66860,
 'chr1_KI270711v1_random': 42210,
 'chr1_KI270712v1_random': 176043,
 'chr1_KI270713v1_random': 40745,
 'chr1_KI270714v1_random': 41717,
 'chr2': 242193529,
 'chr20': 6

If we want to know the length of a single chromosome we can use:

In [5]:
bw.chroms("chr22")

50818468

We can also check the header of the BigWig file:

In [6]:
bw.header()

{'version': 4,
 'nLevels': 10,
 'nBasesCovered': 3095449528,
 'minVal': 0,
 'maxVal': 36,
 'sumData': 1122619497,
 'sumSquared': 1773530499}

We can begin exploring our file. First, we can consult individual values by using:

In [7]:
bw.values("chr22", 0, 3)

[0.17124000191688538, 0.17124000191688538, 0.17124000191688538]

This gives us the first three values of chromosome 22. Instead, we might be interested in knowing the intervals, because some values might be repeated over a range.

In [8]:
bw.intervals("chr22", 0, 10712324)

((0, 10711304, 0.17124000191688538),
 (10711304, 10711373, 0.983299970626831),
 (10711373, 10712279, 0.17124000191688538),
 (10712279, 10712324, 0.07452999800443649))

The avobe code retrieves the intervals between the beginning of the chromosome up until position 10712324. If we want to include one more interval, we could try typing:

In [9]:
bw.intervals("chr22", 0, 10712325)

((0, 10711304, 0.17124000191688538),
 (10711304, 10711373, 0.983299970626831),
 (10711373, 10712279, 0.17124000191688538),
 (10712279, 10712324, 0.07452999800443649),
 (10712324, 10712428, 0.04546000063419342))

Many times we are interested in calculating the average over a range of values. For example:

In [10]:
bw.stats("chr22", 0, 10712324)

[0.17137697442745817]

We can also compute the maximum values over a range of values by using:

In [11]:
bw.stats("chr22", 10711304, 10712324, type='max')

[0.983299970626831]

In [12]:
lst = []

for i in bw.intervals("chr22"):
    lst.append(i[1] - i[0])

In [13]:
bw.close()

In [16]:
len(lst)

1088622

In [17]:
lst

[10711304,
 69,
 906,
 45,
 104,
 23,
 11,
 1159,
 132,
 31,
 84,
 4,
 43,
 1072,
 63,
 75,
 26,
 1468,
 66,
 17,
 24,
 12,
 46,
 13,
 5,
 36,
 30,
 17,
 60,
 22,
 13,
 5,
 106,
 37,
 6,
 12,
 205,
 42,
 56,
 14,
 68,
 100,
 745,
 23,
 142,
 8,
 3780,
 69,
 58,
 25,
 73,
 59,
 350,
 70,
 67,
 46,
 3822,
 19,
 60,
 27,
 1,
 47,
 29,
 50,
 329,
 46,
 218,
 103,
 24,
 29,
 1,
 65,
 29,
 15,
 5,
 10,
 5,
 17,
 6,
 5,
 25,
 1,
 13,
 52,
 4,
 25,
 15,
 5,
 127,
 12,
 28,
 42,
 21,
 5,
 22,
 37,
 238,
 4,
 12,
 27,
 112,
 5,
 23,
 4,
 89,
 61,
 64,
 95,
 183,
 303,
 131,
 517,
 107,
 8,
 50,
 16,
 2,
 76,
 39,
 859,
 183,
 7396,
 113,
 611,
 111,
 10109,
 77,
 1086,
 35,
 144,
 1,
 10256,
 181,
 6889,
 39,
 67,
 23,
 6025,
 127,
 85,
 163,
 467,
 150,
 2373,
 73,
 1431,
 1,
 11,
 127,
 38,
 6,
 303,
 97,
 59,
 6,
 26,
 136,
 1073,
 130,
 582,
 5,
 21,
 126,
 562,
 112,
 153945,
 144,
 723,
 56,
 3135,
 29,
 2142,
 116,
 2796,
 18,
 8054,
 3,
 145,
 8,
 4941,
 74,
 1686,
 107,
 119,
 104,
 36,

In [8]:
f = Fasta('/home/iker/GenomeDK/nnPib2019/data/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta')


KeyboardInterrupt: 

In [9]:
sorted(f.keys())

NameError: name 'f' is not defined