In [3]:
import bioframe as bf
import pandas as pd
import pathlib
import numpy.typing as npt
import numpy as np
from collections import Counter
from typing import Any, Dict, List, Tuple, Union

In [10]:
def get_subcompartment_ranks() -> dict:
    compartment_labels = tuple(["B3", "B2", "B1", "B0", "A0", "A1", "A2", "A3"])
    return {k: v for v, k in enumerate(compartment_labels)}


def get_compartment_ranks() -> dict:
    compartment_labels = tuple(["B", "A"])
    return {k: v for v, k in enumerate(compartment_labels)}


def group_and_sort_subcompartments(df: pd.DataFrame, aggregate_subcompartments: bool) -> pd.DataFrame:
    if aggregate_subcompartments:
        df = df.apply(lambda x: x.str[0])

    df = df.groupby(df.columns.tolist(), as_index=False).size()

    cols = [col for col in df.columns if col != "size"]
    df.sort_values(
        by=cols,
        key=lambda x: x.apply(lambda y: get_compartment_ranks().get(y)),
        ignore_index=True,
        inplace=True,
        kind="stable",
    )

    return df

In [11]:
df = (
    pd.read_table("../data/output/compartment_analysis/10000/MCF10A_WT_T1_C1_10000.subcompartments.bedGraph.gz")
    .set_index(["chrom", "start", "end"])
    .filter(regex=r".*\.state")
)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,MCF10A_WT.state,MCF10A_T1.state,MCF10A_C1.state
chrom,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
chr1,40000,50000,A0,A1,A0
chr1,50000,60000,A0,A1,A0
chr1,60000,70000,A0,A1,A0
chr1,70000,80000,A0,A1,A0
chr1,80000,90000,A0,A1,A0
...,...,...,...,...,...
chrX,155660000,155670000,B1,A3,B0
chrX,155670000,155680000,B1,A3,B0
chrX,155680000,155690000,B1,A3,B0
chrX,155690000,155700000,B1,A3,B0


In [13]:
df1 = group_and_sort_subcompartments(df, True)
df1["cov"] = (df1["size"] * 10_000) / 1.0e6
df1["rel_cov"] = df1["size"] / df1["size"].sum()
df1

Unnamed: 0,MCF10A_WT.state,MCF10A_T1.state,MCF10A_C1.state,size,cov,rel_cov
0,B,B,B,102422,1024.22,0.375772
1,B,B,A,11923,119.23,0.043744
2,B,A,B,7610,76.1,0.02792
3,B,A,A,14615,146.15,0.05362
4,A,B,B,10157,101.57,0.037265
5,A,B,A,6085,60.85,0.022325
6,A,A,B,9791,97.91,0.035922
7,A,A,A,109961,1099.61,0.403432
