In [1]:
import pandas as pd
import re

from ipywidgets.widgets import HTML

In [2]:
chromatin_types = {
    "A": "crimson",
    "B": "royalblue",
    "u": "dimgray",
    "?": "magenta",
}


def extract_type(tags: list[str]) -> str:
    for typ in tags.split(","):
        if typ in chromatin_types:
            return typ
    return "?"


def format_type_string(s: str) -> str:
    html = ""
    for segment, typ in re.findall(r"((.)\2*)", s):
        color = chromatin_types[typ]
        html += f"""<span style="color: {color}">{segment}</span>"""
    return html
    

def split_into_chunks(text: str, size: int) -> list[str]:
    return [text[i:i + size] for i in range(0, len(text), size)]

---

In [3]:
chains_filename = "chains.tsv"

chains_table = pd.read_csv(chains_filename, sep="\t")
binsize, = chains_table.eval("end - start").mode()

chains_table.head(2)

Unnamed: 0,chain,start,end,A,B,tags
0,chr1:a,0,100000,0.0,1.0,B
1,chr1:a,100000,200000,0.0,1.0,B


### Type distribution

In [4]:
type_counts = chains_table["tags"].map(extract_type).value_counts()
site_count = len(chains_table)

for typ, count in type_counts.items():
    print("{}: {} ({:.1f} %)".format(typ, count, 100 * count / site_count))

A: 25407 (41.9 %)
B: 18287 (30.2 %)
u: 16948 (27.9 %)


### Annotation

In [5]:
chrom, start, end = "chr20", 0, 1e10
chain = f"{chrom}:a"

row_length = 50
chunk_length = 10

In [6]:
section = (
    chains_table
    .query(f"chain == @chain and start >= @start and end <= @end")
)
type_string = "".join(section["tags"].map(extract_type))

# Format the long type string as a tabular text.
row_starts = section["start"].iloc[::row_length]
row_strings = split_into_chunks(type_string, row_length)
assert len(row_starts) == len(row_strings)

html = """<pre style="font: 16px/1.5 'Lucida Console'">"""
html += f"<strong>{chrom}</strong> ({binsize:,d}bp bins)\n"

for row_start, row_string in zip(row_starts, row_strings):
    html += f"{row_start:12,d}bp   "
    html += " ".join(
        format_type_string(chunk)
        for chunk in split_into_chunks(row_string, chunk_length)
    )
    html += "\n"

html += "</pre>"

HTML(html)

HTML(value='<pre style="font: 16px/1.5 \'Lucida Console\'"><strong>chr20</strong> (100,000bp bins)\n          …