In [1]:
import higlass as hg
import cooler
import os
from clodius.cli import aggregate

In [2]:
def compute_domain(coords, clr: cooler.Cooler, padding=2.0):
    df = clr.matrix(as_pixels=True).fetch(coords)

    pos1 = df["bin1_id"].min() * clr.binsize
    pos2 = df["bin1_id"].max() * clr.binsize

    if padding != 0.0:
        extent = pos2 - pos1
        new_extent = int(padding * extent)
        pos1 -= (new_extent - extent) // 2
        pos2 += (new_extent - extent) // 2

    return max(0, pos1), pos2


def parse_coords(coords):
    chrom, _, coords = coords.partition(":")
    start, _, end = coords.partition("-")
    start = int(start.replace(",", ""))
    end = int(end.replace(",", ""))

    return chrom, start, end


def expand_coords(coords, chroms, padding=1_000_000):
    chrom, start, end = parse_coords(coords)
    padding_per_side = padding / 2
    start = max(0, start - padding_per_side)
    end = min(chroms[chrom], end + padding_per_side)

    return f"{chrom}:{start:.0f}-{end:.0f}"

def prepare_tads(path_to_tads, outdir):
    name = os.path.basename(path_to_tads).removesuffix(".bed.gz")
    out_path = os.path.join(outdir, f"{name}.beddb")
    aggregate._bedpe(
        path_to_tads,
        out_path,
        assembly="hg38",
        chr1_col=1,
        from1_col=2,
        to1_col=3,
        chr2_col=1,
        from2_col=2,
        to2_col=3
    )
    return out_path

In [3]:
gene_annotation_tile = hg.remote(
    uid="P0PLbQMwTYGy-5uPIQid7A",
    server="https://higlass.io/api/v1/",
    name="hg38 Gene annotation",
)

coolers = {
    "WT": "../data/output/nfcore_hic/mcools/hg38_MCF10A_WT_merged.mcool",
    "T1": "../data/output/nfcore_hic/mcools/hg38_MCF10A_T1_merged.mcool",
    "C1": "../data/output/nfcore_hic/mcools/hg38_MCF10A_C1_merged.mcool"
}

tads = {
    "WT": ["../data/output/tad_analysis/ICE/50000/hg38_001_MCF10A_WT_REP1_domains.bed.gz",
           "../data/output/tad_analysis/ICE/50000/hg38_002_MCF10A_WT_REP2_domains.bed.gz"],
    "T1": ["../data/output/tad_analysis/ICE/50000/hg38_003_MCF10A_T1_REP1_domains.bed.gz",
           "../data/output/tad_analysis/ICE/50000/hg38_004_MCF10A_T1_REP2_domains.bed.gz"],
    "C1": ["../data/output/tad_analysis/ICE/50000/hg38_005_MCF10A_C1_REP1_domains.bed.gz",
           "../data/output/tad_analysis/ICE/50000/hg38_006_MCF10A_C1_REP2_domains.bed.gz"],
}

genes = {
    "BRCA1": "chr17:43,044,295-43,170,245",
    "BRCA2": "chr13:32,315,086-32,400,268",
    "TP53": "chr17:7,661,779-7,687,546",
    "PTEN": "chr10:87,862,638-87,971,930",
    "PIK3CA": "chr3:179,148,114-179,240,093",
    "ERBB2": "chr17:39,687,914-39,730,426",
    "MYC": "chr8:127,735,434-127,742,951"
}

tmpdir = "/tmp/visualizing_tads_with_bc_oncogenes"
os.makedirs(tmpdir, exist_ok=True)

domains_beddb = {}


chroms = cooler.Cooler(f"{list(coolers.values())[0]}::/resolutions/1000000").chroms()[:].set_index("name")["length"].to_dict()


cooler_tracks = {}
domains_tracks = {}
chrom_sizes_tracks = {}
hga_tracks = {}
vga_tracks = {}


for sample, (path1, path2) in tads.items():
    domains_tracks[sample] = [hg.bed2ddb(prepare_tads(path1, tmpdir)).track("bedlike", height=25)]
    domains_tracks[sample].append(hg.bed2ddb(prepare_tads(path2, tmpdir)).track("bedlike", height=25))


tracks = {}

for sample, clr in coolers.items():
    cooler_tracks[sample] = hg.cooler(clr).track("linear-heatmap", height=200, options={"labelPosition": "hidden"})
    
    chrom_sizes_tracks[sample] = hg.cooler(clr).track("chromosome-labels", height=25, options={"fontSize": 16})
    hga_tracks[sample] = gene_annotation_tile.track("horizontal-gene-annotations", height=150, options={"fontSize": 16})
    vga_tracks[sample] = gene_annotation_tile.track("vertical-gene-annotations", width=150)


uri = list(coolers.values())[0]
if cooler.fileops.is_multires_file(uri):
    grps = cooler.fileops.list_coolers(uri)
    uri = f"{uri}::{grps[0]}"

In [4]:
domain = compute_domain(expand_coords(genes["BRCA1"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [5]:
domain = compute_domain(expand_coords(genes["BRCA2"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [6]:
domain = compute_domain(expand_coords(genes["TP53"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [7]:
domain = compute_domain(expand_coords(genes["PTEN"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [8]:
domain = compute_domain(expand_coords(genes["PIK3CA"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [9]:
domain = compute_domain(expand_coords(genes["ERBB2"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view

In [10]:
domain = compute_domain(expand_coords(genes["MYC"], chroms), cooler.Cooler(uri))

width = 6

view = hg.view(
    (hga_tracks["WT"], "top"),
    (chrom_sizes_tracks["WT"], "top"),
    (cooler_tracks["WT"], "top"),
    (domains_tracks["WT"][0], "top"),
    (domains_tracks["WT"][1], "top"),
    (cooler_tracks["T1"], "top"),
    (domains_tracks["T1"][0], "top"),
    (domains_tracks["T1"][1], "top"),
    (cooler_tracks["C1"], "top"),
    (domains_tracks["C1"][0], "top"),
    (domains_tracks["C1"][1], "top"),
    width=width).domain(x=domain, y=domain)

view