# Compartments and edges

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format='svg'

def plot_intervals(query=None, annot=None, **kwargs):

    import matplotlib.pyplot as plt

    vlines = kwargs.get('vlines', [])
    if 'vlines' in kwargs: del kwargs['vlines']
    figsize = kwargs.get('figsize', (8, 1.5*len(kwargs)-1))
    if 'figsize' in kwargs: del kwargs['figsize']

    tups = list(kwargs.items())
    tups = reversed(tups)

    df_list = []
    labels = []
    for label, df in tups:
        labels.append(label)
        df['label'] = np.repeat(label, df.index.size)
        df_list.append(df)
    bigdf = pd.concat(df_list)

    bigdf['chrom'] = pd.Categorical(bigdf['chrom'], bigdf['chrom'].unique())
    bigdf['label'] = pd.Categorical(bigdf['label'], bigdf['label'].unique())

    gr = bigdf.groupby('chrom', observed=False)

    fig, axes = plt.subplots(gr.ngroups, 1, figsize=figsize, 
                            sharey=True
                            #  sharex=True
                             )
    if type(axes) is not np.ndarray:
        # in case there is only one axis so it not returned as a list
        axes = [axes]
    
    for i, chrom in enumerate(gr.groups):
        _df = gr.get_group(chrom)
        _gr = _df.groupby('label', observed=False)
        for y, label in enumerate(_gr.groups):
            try:
                df = _gr.get_group(label)
            except KeyError:
                continue
            y = np.repeat(y, df.index.size)
            axes[i].hlines(y, df.start.tolist(), df.end.tolist(), lw=10, colors=f'C{y[0]}', capstyle='butt')
            delta = len(labels)/10

        axes[i].spines['top'].set_visible(False)
        axes[i].spines['left'].set_visible(False)
        axes[i].spines['right'].set_visible(False)

        axes[i].set_yticks(list(range(len(labels))), labels)
        axes[i].tick_params(axis='y', which='both', left=False)
        axes[i].set_ylim(-1, len(labels)-0.7)
        # axes[i].set_xlim(df.start.min()-delta, df.end.max()+delta)
        if i != gr.ngroups-1:
            axes[i].tick_params(axis='x', which='both', bottom=False)

        axes[i].set_title(chrom, loc='left', fontsize=10)

    for y, ax in enumerate(axes):
        y = np.repeat(y, len(vlines))
        axes[i].vlines(vlines, *ax.get_ylim(), lw=0.1, colors='black', zorder=0)
    
    plt.tight_layout()
    return axes

def stairs(df, start='start', end='end', pos='pos', endtrim=0):
    "Turn a df with start, end into one with pos to plot as stairs"
    df1 = df.copy(deep=True)
    df2 = df.copy(deep=True)
    df1[pos] = df1[start]
    df2[pos] = df2[end] - endtrim
    return pd.concat([df1, df2]).sort_values([start, end])

## Read in 100kb windows

In [None]:
e1_100kb = pd.read_csv('rec_compartments/round_spermatid_e1_100kb_arms.csv')
e1_100kb['start'] = [i*100_000 for i in range(e1_100kb.index.size)]
e1_100kb['end'] = e1_100kb.start + 100_000
e1_100kb['sign'] = np.sign(e1_100kb.e1)

plot_df = stairs(e1_100kb)
plt.figure(figsize=(12, 3))
plt.fill_between(plot_df.pos, plot_df.e1) ;

## Call compartments

In [None]:
e1_100kb['segment_id'] = ((e1_100kb.sign.shift() != e1_100kb.sign)).cumsum()

comp = e1_100kb.groupby('segment_id', as_index=False).agg(dict(
     e1=['mean', 'sum'], 
     start='min', 
     end='max', 
     segment_id='mean', 
     sign='mean'
))
comp.columns = ['_'.join(col).strip() for col in comp.columns.values]
comp = comp.rename(
    columns={'start_min':'start',
             'end_max':'end', 
             'segment_id_mean':'segment_id', 
             'sign_mean':'sign'}
)
comp['comp'] = ['A' if x > 0 else 'B' for x in comp.sign]
comp = comp.reset_index()
comp['chrom'] = 'chrX'

## Call edges where both A and B flanking 100kb windwos have data

In [None]:
_comp = comp.copy()
for i in range(1, _comp.index.size-1):
    if np.isnan(_comp.loc[i-1, 'e1_mean']):
        _comp.loc[i, 'start'] = np.nan
    if np.isnan(_comp.loc[i+1, 'e1_mean']):
        _comp.loc[i, 'end'] = np.nan
_comp = _comp.loc[~_comp.e1_mean.isnull(), :]
_comp = _comp.reset_index()
edges = pd.concat([_comp.start, _comp.end]).sort_values().unique()

## Plot compartmnet and defined edges

In [None]:
compartments = comp.loc[~comp.e1_mean.isnull()].copy()
compartments['start'] = compartments.start.astype(int)
compartments['end'] = compartments.end.astype(int)

axes = plot_intervals(
    A=comp.loc[comp.comp == 'A', :].copy(),
    B=comp.loc[comp.comp == 'B', :].copy(),   
    vlines=edges.tolist(),
    figsize=(12, 2)
)

In [None]:
compartments.to_csv('compartments.csv')

In [None]:
edge_segments = pd.DataFrame(np.column_stack((edges-100_000, edges+100_000)), columns=['start', 'end'])
edge_segments['chrom'] = 'chrX'
edge_segments.to_csv('edge_segments.csv')