In [1]:
import os
import numpy as np
import pyBigWig
from collections import defaultdict
import matplotlib.pyplot as plt

In [2]:
proj_root = "/users/kcochran/projects/procapnet/"

which_genome = "t2t"

chroms_to_look_at = ["chr" + str(i+1) for i in range(22)] + ["chrX", "chrY"]

cell_types = ["K562", "A673", "CACO2", "CALU3", "HUVEC", "MCF10A"]

chrom_sizes_filepath = which_genome + "/" + which_genome + ".chrom.sizes"

In [3]:
def load_chrom_sizes(chrom_sizes_filepath):
    with open(chrom_sizes_filepath) as f:
        chrom_sizes_lines = [line.strip().split('\t') for line in f]
    chrom_sizes = {line[0] : int(line[1]) for line in chrom_sizes_lines}
    return chrom_sizes

chrom_sizes = load_chrom_sizes(chrom_sizes_filepath)

In [4]:
chrom_sizes

{'chr1': 248387328,
 'chr2': 242696752,
 'chr3': 201105948,
 'chr4': 193574945,
 'chr5': 182045439,
 'chr6': 172126628,
 'chr7': 160567428,
 'chrX': 154259566,
 'chr9': 150617247,
 'chr8': 146259331,
 'chr11': 135127769,
 'chr10': 134758134,
 'chr12': 133324548,
 'chr13': 113566686,
 'chr14': 101161492,
 'chr15': 99753195,
 'chr16': 96330374,
 'chr17': 84276897,
 'chr18': 80542538,
 'chr20': 66210255,
 'chrY': 62460029,
 'chr19': 61707364,
 'chr22': 51324926,
 'chr21': 45090682,
 'chrM': 16569}

In [5]:
def get_bw_path(cell_type, pos_or_neg, which_genome = which_genome, proj_root=proj_root):
    assert pos_or_neg in ["pos", "neg"], pos_or_neg
    bws_dir = "bigwigs/" + which_genome + "/" + cell_type + "/genomewide/"
    bw_path = bws_dir + ".".join([cell_type, pos_or_neg, "bigWig"])
    assert os.path.exists(bw_path), bw_path
    return bw_path

In [7]:
for cell_type in cell_types:
    for strand in ["pos", "neg"]:
        print("\n")
        print(cell_type, strand)
        
        bw_path = get_bw_path(cell_type, strand)
        print("BigWig path:", bw_path)
        print("Exists?", os.path.exists(bw_path))
        
        # for hg38, this is about 24 GB
        print("File size:", "%0.1f" % (os.path.getsize(bw_path) / 1000000000) + "GB")
        
        try:
            bw = pyBigWig.open(bw_path, "r")
            
            a = bw.values("chr1", 103525338, 103525341, numpy=True)
            print("Some chr1 values:", a)
            
            bw.close()
        except:
            print("error")
            
        try:
            bw = pyBigWig.open(bw_path, "r")
            
            a = bw.values("chr17", 1000000, 1000003, numpy=True)
            print("Some chr17 values:", a)
            
            bw.close()
        except:
            print("error")
            
            
        try:
            bw = pyBigWig.open(bw_path, "r")
            
            for chrom in chroms_to_look_at:
                a = bw.values(chrom, 1900000, 1900003, numpy=True)
                print("Some " + chrom + " values:", a)
            
            bw.close()
        except:
            print("error")



K562 pos
BigWig path: bigwigs/t2t/K562/genomewide/K562.pos.bigWig
Exists? True
File size: 25.0GB
Some chr1 values: [0.00753028 0.00284478 0.00536302]
Some chr17 values: [0.00079164 0.01530167 0.00129722]
Some chr1 values: [0.00499748 0.10381041 0.0878109 ]
Some chr2 values: [0.00130087 0.00225842 0.00168608]
Some chr3 values: [0.04884481 0.00470593 0.00281491]
Some chr4 values: [0.02596099 0.00507441 0.0039937 ]
Some chr5 values: [0.0032201  0.00423002 0.03024908]
Some chr6 values: [0.00052564 0.00097019 0.00200137]
Some chr7 values: [0.00165306 0.00048719 0.00079431]
Some chr8 values: [0.00459772 0.00109251 0.00910152]
Some chr9 values: [0.03973553 0.06789248 0.03199045]
Some chr10 values: [0.00400329 0.01045551 0.00527009]
Some chr11 values: [0.00113676 0.0015545  0.00144256]
Some chr12 values: [0.03563515 0.00514021 0.00671986]
Some chr13 values: [0.00034687 0.00018882 0.00034256]
Some chr14 values: [0.0014553  0.00193492 0.00132024]
Some chr15 values: [0.00695324 0.0031697  0.048

Some chr9 values: [0.00412319 0.01280338 0.01145639]
Some chr10 values: [0.02680632 0.02016312 0.01277045]
Some chr11 values: [0.00018154 0.00039773 0.00033093]
Some chr12 values: [0.00055436 0.0015727  0.0005787 ]
Some chr13 values: [3.8254380e-05 6.8678783e-05 5.4237084e-04]
Some chr14 values: [0.00117518 0.00199958 0.00296218]
Some chr15 values: [0.0003352  0.00237943 0.00019151]
Some chr16 values: [0.00037477 0.0008456  0.00102245]
Some chr17 values: [0.00127321 0.00667747 0.00091641]
Some chr18 values: [7.4784963e-05 4.4797114e-05 3.5446697e-05]
Some chr19 values: [0.001491   0.01129636 0.00028779]
Some chr20 values: [0.0001005  0.00010336 0.00291914]
Some chr21 values: [3.0435247e-06 4.0793278e-05 2.1465814e-06]
Some chr22 values: [0.04460002 0.01152858 0.0915734 ]
Some chrX values: [0.00010952 0.00036257 0.00045089]
Some chrY values: [9.381722e-04 4.516258e-05 7.832865e-04]


CALU3 pos
BigWig path: bigwigs/t2t/CALU3/genomewide/CALU3.pos.bigWig
Exists? True
File size: 25.2GB
Some