In [1]:
!pip install git+https://github.com/manzt/hg.git
!pip install bioframe
!pip install jupyter-scatter
!pip install hg
!pip install pillow
!pip install pyarrow

In [2]:
# from IPython.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))

# Bioframe for application programming

Here we use bioframe to link two dynamic widgets:

* An interactive scatter plot where each point represents a 50-kb long genomic locus
* An interactive genome browser

The goal is to enable brushing points in the scatter plot to automatically highlight the corresponding locations in the genome browser.

In the callback code, adjacent intervals selected in the scatter plot are combined into larger windows using `bioframe.merge` before updating the browser widget.

### Load higlass config

In [4]:
import hg
conf = hg.Viewconf.from_url("https://higlass.io/api/v1/viewconf?d=LT2GVwbmTUWYWoG0yJy5CA")
conf

### Load columnar data for dynamic tileset and jupyter-scatter

In [5]:
from dataclasses import dataclass, field
import uuid
from PIL import ImageColor

import numpy as np
import bioframe as bf
import pyarrow.parquet
import pandas as pd

clusters = pd.read_csv(
    'https://github.com/open2c/bioframe-poster/blob/main/data/HCT116_Unsynchronized_Auxin360mins.hg38.50000.clusters.E1-E10.eignorm_sqrt.chr1-22.kmeans8_5.tsv?raw=true',
    sep="\t"
)

eigs = pd.read_parquet(
    'https://github.com/open2c/bioframe-poster/blob/main/data/HCT116_Unsynchronized_Auxin360mins.hg38.50000.E0-E128.trans.eigvecs.pq?raw=true',
    columns=["GC", "E0", "E1", "E2"],
)

bins = pd.read_parquet(
    'https://github.com/open2c/bioframe-poster/blob/main/data/hg38.bins.50000.pq?raw=true',
    columns=["centel"],
)

df = pd.concat([clusters, eigs, bins], axis=1)
df.chrom = df.chrom.astype('category')
df.name = df.name.astype('category')
df.color = df.color.astype('category')
df.dropna(inplace=True)
df.reset_index(inplace=True)

def get_cmap(df):
    d = dict(clusters.groupby(["name"]).first()["color"])
    return {
        k: (h, ",".join(map(str, ImageColor.getcolor(h, "RGB"))))
        for k, h in d.items()
    }

cmap = get_cmap(df)

### Define a custom tileset for HiGlass (`hg`). 

`Tileset.subset(locs)` takes a set of indices and subsets the inner dataframe, creating a new set of `_tiles` to serve to HiGlass.

In [6]:
import uuid

# only way I could figure out how to color the tracks correctly
def to_bed(chrom, start, end, name, *_):
    return [
        chrom,
        int(start),
        int(end),
        name,
        ".", # score
        ".", # strand
        ".", 
        ".", 
        cmap[name][1]
    ]

def get_lens(chromsizes: pd.Series):
    abslen = chromsizes.cumsum()
    starts = pd.Series([0] + abslen[:-1].tolist(), index=abslen.index)
    return dict(starts)

def prepare_tiles(df, starts):
    tiles = bf.merge(df[["chrom", "start", "end", "name"]], on=["name"])
    tiles.dropna(inplace=True)
    return [
        {
            "chrOffset": int(starts[l[0]]),
            "xStart": int(starts[l[0]] + l[1]),
            "xEnd": int(starts[l[0]] + l[2]),
            "importance": 0,
            "uid": str(uuid.uuid4()),
            "fields": to_bed(*l)
        } for l in tiles.to_records(index=False)
    ]

# Get the trackid/viewid pairs to specify which tracks should be reloaded
def get_selectors(view, pos):
    return [
        {
            "trackId": t.uid,
            "viewId": view.uid,
        }
        for t in getattr(view.tracks, pos)
    ]

@dataclass
class Tileset:
    chromsizes: pd.Series
    uid: str = field(default_factory=lambda: str(uuid.uuid4()))
    datatype = "bedlike"
        
    def __post_init__(self):
        self._starts = get_lens(self.chromsizes)
        self._tiles = []
        
    def update(self, df):
        self._tiles = prepare_tiles(df, self._starts)

        
    def info(self):
        genome_length = int(np.sum(self.chromsizes.values))
        return {
            "uuid": self.uid,
            "max_width": genome_length,
            "min_pos": [1],
            "max_pos": [genome_length],
            "max_zoom": 0,
        }
     
    def tiles(self, _tileids):
        return [(f"{self.uid}.0.0", self._tiles)]
        

### Create custom tileset and add to `hg.server`

In [7]:
import hg
from matplotlib.colors import Normalize
import jscatter

# setup scatterplot
s = jscatter.Scatter(x='E1', y='E2', color_by="name", color_map=[v[0] for v in cmap.values()], data=df)

# setup tileset
chromsizes = bf.fetch_chromsizes('hg38')[:'chrY']
t = Tileset(chromsizes)
t.update(df)
ts = hg.server.add(t)

### Override the tilesets for the clustering track in the original config with our custom tileset

In [8]:
# replace tileset for "cluster" track with our new tileset
conf.views[0].tracks.top[-1].tileset(ts, inplace=True)
conf.views[0].tracks.left[-1].tileset(ts, inplace=True)

# modify the existing view config 
conf.views[0].tracks.bottom = [] # erase
#conf.views[0].tracks.top = [
#    conf.views[0].tracks.top[0],  # chromosome labels
#    conf.views[0].tracks.top[1],  # gene annotations
#    conf.views[0].tracks.top[-1], # clustering assingment
# ]

w = conf.widget() # get a higlass widget

### Callback to link the `HiGlassWidget` with `JScatter`

In [9]:
def on_selection_change(change):
    # subset the tileset based on the 50000 bin indices
    locs = change["new"]
    if len(locs) > 0:
        t.update(df.iloc[locs])
    # grab the selector for the top clustering track
    # BUG: some issue with JS where only one selector is working atm...
    reload_selector = get_selectors(conf.views[0], "top")[-1]
    # have the HiglassWidget reload the clustering track
    w.reload(reload_selector)

s.widget.observe(on_selection_change, names="selection")
display(w, s.show())

HiGlassWidget()

HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(width='36px'), style…