![FISH diagram](assets/diagram.png)

In [1]:
import pandas as pd
import bioframe as bf
import higlass as hg
import cooler
import os

In [2]:
chr8_window = "chr8:126,337,000-128,236,000"
chr10_window = "chr10:71,279,000-73,312,000"
chr10_outside_window = "chr10:73,312,001-74,000,000"

probe1_window = "chr8:127,500,000-128,236,000"
probe2_window = "chr10:72,500,000-73,312,000"
probe3_window = "chr10:73,312,000-74,000,000"


rp11 = pd.read_table("../data/input/bac_probes/rp11_clones.tsv.gz", sep="\t").rename(
    columns={"chromStart": "start", "chromEnd": "end"}
)
fish = pd.read_table("../data/input/bac_probes/fish_clones.tsv.gz", sep="\t").rename(
    columns={"chromStart": "start", "chromEnd": "end"}
)

rp11

Unnamed: 0,bin,chrom,start,end,name,score,strand,thickStart,thickEnd,reserved,blockCount,blockSizes,chromStarts
0,0,chr1,66961816,67140793,RP11-1077I9,1000,+,66961816,67140793,0,2,834716,0178261
1,0,chr1,67045504,67194804,RP11-120G19,1000,-,67045504,67194804,0,2,613526,0148774
2,0,chr1,67063753,67239840,RP11-590K22,1000,+,67063753,67239840,0,2,497441,0175646
3,0,chr1,67063756,67205131,RP11-772I3,1000,+,67063756,67205131,0,2,214498,0140877
4,0,chr1,201151313,201331954,RP11-101I9,1000,+,201151313,201331954,0,2,518753,0179888
...,...,...,...,...,...,...,...,...,...,...,...,...,...
85898,73,chr22_KI270734v1_random,110454,133537,RP11-1053O2,750,-,110454,133537,0,2,470572,022511
85899,585,chr22_KI270734v1_random,17812,108718,RP11-1151I6,500,-,17812,108718,0,2,874584,090322
85900,585,chr22_KI270734v1_random,17812,61167,RP11-991H7,750,-,17812,61167,0,2,545531,042824
85901,585,chr22_KI270734v1_random,36761,61778,RP11-94B18,500,-,36761,61778,0,2,539601,024416


In [3]:
rp11["FISH_clone"] = rp11["name"].isin(fish["name"])
rp11["size"] = rp11["end"] - rp11["start"]

rp11 = rp11[["chrom", "start", "end", "name", "score", "strand", "size", "FISH_clone"]]

rp11

Unnamed: 0,chrom,start,end,name,score,strand,size,FISH_clone
0,chr1,66961816,67140793,RP11-1077I9,1000,+,178977,False
1,chr1,67045504,67194804,RP11-120G19,1000,-,149300,False
2,chr1,67063753,67239840,RP11-590K22,1000,+,176087,False
3,chr1,67063756,67205131,RP11-772I3,1000,+,141375,False
4,chr1,201151313,201331954,RP11-101I9,1000,+,180641,False
...,...,...,...,...,...,...,...,...
85898,chr22_KI270734v1_random,110454,133537,RP11-1053O2,750,-,23083,False
85899,chr22_KI270734v1_random,17812,108718,RP11-1151I6,500,-,90906,False
85900,chr22_KI270734v1_random,17812,61167,RP11-991H7,750,-,43355,False
85901,chr22_KI270734v1_random,36761,61778,RP11-94B18,500,-,25017,False


In [4]:
def select(df, query):
    df1 = bf.select(df, query)
    end = int(query.split("-")[-1].replace(",", ""))

    return df1[df1["end"] < end]

In [5]:
probe1_candidates = select(rp11, probe1_window)
probe2_candidates = select(rp11, probe2_window)
probe3_candidates = select(rp11, probe3_window)

os.makedirs("../data/output/bac_clones/candidate_probes", exist_ok=True)

cols = ["chrom", "start", "end", "name", "score", "strand"]
probe1_candidates[cols].to_csv(
    "../data/output/bac_clones/candidate_probes/probe1.bed", sep="\t", header=False, index=False
)
probe2_candidates[cols].to_csv(
    "../data/output/bac_clones/candidate_probes/probe2.bed", sep="\t", header=False, index=False
)
probe3_candidates[cols].to_csv(
    "../data/output/bac_clones/candidate_probes/probe3.bed", sep="\t", header=False, index=False
)
pd.concat([probe1_candidates, probe2_candidates, probe3_candidates])[cols].to_csv(
    "../data/output/bac_clones/candidate_probes/probes.bed", sep="\t", header=False, index=False
)

probe1_candidates.to_csv("../data/output/bac_clones/candidate_probes/probe1.tsv", sep="\t", index=False)
probe2_candidates.to_csv("../data/output/bac_clones/candidate_probes/probe2.tsv", sep="\t", index=False)
probe3_candidates.to_csv("../data/output/bac_clones/candidate_probes/probe3.tsv", sep="\t", index=False)
pd.concat([probe1_candidates, probe2_candidates, probe3_candidates]).to_csv(
    "../data/output/bac_clones/candidate_probes/probes.tsv", sep="\t", index=False
)


probe1_candidates

Unnamed: 0,chrom,start,end,name,score,strand,size,FISH_clone
42815,chr8,127853174,128024415,RP11-125A17,1000,+,171241,False
42816,chr8,127897080,128079309,RP11-164J24,1000,-,182229,False
42817,chr8,127918163,128090054,RP11-946L14,1000,+,171891,False
46166,chr8,127372954,127538977,RP11-1150B6,1000,+,166023,False
46167,chr8,127447349,127614698,RP11-367L7,1000,-,167349,False
46168,chr8,127540610,127684674,RP11-1145O20,1000,+,144064,False
46169,chr8,127543566,127702694,RP11-1136L8,1000,-,159128,False
46170,chr8,127989740,128184068,RP11-748F3,1000,-,194328,False


In [6]:
%%bash
cd ../data/output/bac_clones/candidate_probes/

clodius aggregate bedpe \
    --chr1-col 1 \
    --chr2-col 1 \
    --from1-col 2 \
    --from2-col 2 \
    --to1-col 3 \
    --to2-col 3 \
    --assembly hg38 \
    probes.bed


In [7]:
def compute_domain(coords, clr: cooler.Cooler, padding=2.0):
    df = clr.matrix(as_pixels=True).fetch(coords)

    pos1 = df["bin1_id"].min() * clr.binsize
    pos2 = df["bin1_id"].max() * clr.binsize

    if padding != 0.0:
        extent = pos2 - pos1
        new_extent = int(padding * extent)
        pos1 -= (new_extent - extent) // 2
        pos2 += (new_extent - extent) // 2

    return max(0, pos1), pos2

In [8]:
gene_annotation_tile = hg.remote(
    uid="P0PLbQMwTYGy-5uPIQid7A",
    server="https://higlass.io/api/v1/",
    name="hg38 Gene annotation",
)

coolers = [
    "../data/output/nfcore_hic/mcools/hg38_MCF10A_WT_merged.mcool",
    # "../data/output/nfcore_hic/mcools/hg38_MCF10A_T1_merged.mcool",
    "../data/output/nfcore_hic/mcools/hg38_MCF10A_C1_merged.mcool",
]

probes = "../data/output/bac_clones/candidate_probes/probes.bedpedb"

cooler_tiles = {}
cooler_tracks = {}


for clr in coolers:
    cooler_tiles[clr] = hg.cooler(clr)
    cooler_tracks[clr] = cooler_tiles[clr].track("heatmap", height=500)


probes_track = hg.bed2ddb(probes).track("bedlike")

chrom_sizes_track = list(cooler_tiles.values())[0].track("chromosome-labels")
hga_track = gene_annotation_tile.track("horizontal-gene-annotations")
vga_track = gene_annotation_tile.track("vertical-gene-annotations")

top_track = hg.combine(hga_track, probes_track, height=150)
top_track = hg.combine(top_track, chrom_sizes_track, height=200)

left_track = hg.combine(vga_track, probes_track)
left_track = hg.combine(left_track, chrom_sizes_track, width=200)

In [9]:
uri = coolers[0]
if cooler.fileops.is_multires_file(uri):
    grps = cooler.fileops.list_coolers(uri)
    uri = f"{uri}::{grps[0]}"
domain = compute_domain(chr8_window, cooler.Cooler(uri))

views = {}

width = 12 // len(cooler_tracks)

for key, track in cooler_tracks.items():
    views[key] = hg.view((left_track, "left"), (top_track, "top"), (track, "center"), width=width).domain(
        x=domain, y=domain
    )

view = None
for v in views.values():
    if view is None:
        view = v
        continue
    view = hg.concat("horizontal", view, v)
view = view.locks(hg.lock(*list(views.values())))
view