In [1]:
import os
from collections import defaultdict
import pickle 
import traceback

import h5py
import json
from copy import deepcopy as dcopy

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
import seaborn as sns

import numpy as np
import numpy.ma as ma
import pandas as pd
import scipy
from scipy.ndimage.filters import gaussian_filter1d
from scipy.interpolate import interp1d, interp2d

import polychrom
from polychrom import polymer_analyses, contactmaps, polymerutils
from polychrom.hdf5_format import list_URIs, load_URI, HDF5Reporter

import cooltools.lib.plotting
from mpl_toolkits.axes_grid1 import make_axes_locatable
from cooltools.lib import numutils

import simutils

  from scipy.ndimage.filters import gaussian_filter1d


In [2]:
def filter_diags(hmap, ignore_diags):
    hmap_diag_filtered = np.copy(hmap)
    for i in range(ignore_diags):
        np.fill_diagonal(hmap_diag_filtered[:, i:], np.nan)
        np.fill_diagonal(hmap_diag_filtered[i:, :], np.nan)
    return hmap_diag_filtered

def calc_ixns(subcomps_coarsened, ooe, n_diags=[0,0]):
    if n_diags:
        trans_ooe = filter_diags(ooe, n_diags[1])
        cis_ooe = filter_diags(ooe, n_diags[0])
    else:
        trans_ooe = cis_ooe = ooe

    mean_ixns = np.nan*np.ones((4,4))
    for i in range(3):
        for j in range(3):
            mean_ixns[i][j] = np.nanmean(trans_ooe[subcomps_coarsened==i].T[subcomps_coarsened==j])
    #mean_ixns[0:2, 3] = mean_ixns[0:2, 2]
    #mean_ixns[3, 0:2] = mean_ixns[2, 0:2]

    x_loc = subcomps_coarsened == 2

    XX_inter_mask = np.zeros((ooe.shape[0], ooe.shape[0]))
    XX_intra_mask = np.zeros((ooe.shape[0], ooe.shape[0]))
    for i in np.where(x_loc)[0]:
        for j in np.where(x_loc)[0]:
            XX_inter_mask[i,j] = (np.sum(x_loc[i:j]) < j-i)
            XX_intra_mask[i,j] = (np.sum(x_loc[i:j]) == j-i)
    XX_inter_mask = XX_inter_mask.astype(bool)
    XX_intra_mask = XX_intra_mask.astype(bool)

    mean_inter_X = np.nanmean(np.ma.array(trans_ooe, mask=~XX_inter_mask).compressed())    
    mean_intra_X = np.nanmean(np.ma.array(cis_ooe, mask=~XX_intra_mask).compressed())
    
    mean_ixns[2,2] = mean_inter_X
    mean_ixns[3,3] = mean_intra_X
    
    return mean_ixns

def make_ixn_df(sim_group_path, mtx_fh, AB_self_attr, XX=0):
    aa_list = []
    ab_list = []
    bb_list = []
    ax_list = []
    bx_list = []
    xx_intra_list = []
    xx_inter_list = []
    

    for AB in AB_self_attr:
        comp_dir = f'AA{AB:.2f}_BB{AB:.2f}_XX{XX:.2f}'
        sim_dir = 'Xboundaries'
        hmap_path = f'{sim_group_path}/{comp_dir}/{sim_dir}/results/heatmaps'
        if os.path.exists(f'{hmap_path}/{mtx_fh}'):
            with open(f'{hmap_path}/{mtx_fh}', 'rb') as o:
                mean_ixns = np.load(o)
            aa_list.append(mean_ixns[0,0])
            ab_list.append(mean_ixns[0,1])
            bb_list.append(mean_ixns[1,1])
            ax_list.append(mean_ixns[0,2])
            bx_list.append(mean_ixns[1,2])
            xx_inter_list.append(mean_ixns[2,2])
            xx_intra_list.append(mean_ixns[3,3])
        else:
            aa_list.append(np.nan)
            ab_list.append(np.nan)
            bb_list.append(np.nan)
            ax_list.append(np.nan)
            bx_list.append(np.nan)
            xx_inter_list.append(np.nan)
            xx_intra_list.append(np.nan)


    ixn_df = pd.DataFrame({
        "AA": aa_list,
        "AB": ab_list,
        "BB": bb_list,
        "AS": ax_list,
        "BS": bx_list,
        "SS_inter": xx_inter_list,
        "SS_intra": xx_intra_list,
    })

    ixn_df['AA_attr'] = AB_self_attr
    ixn_df['BB_attr'] = AB_self_attr
    ixn_df['SS_attr'] = XX
    
    return ixn_df

In [3]:
cutoff_rad = 5 
binSize = 5
base_path = f'/net/levsha/share/emily/notebooks/sims/bombyx/toy_models'

sim_dir = 'SC-10pol'

In [4]:
# load compartment labels
with open('../polymer_info.pkl', 'rb') as f:
    monInfo = pickle.load(f)
      
chroms = monInfo['chroms']
L = monInfo['L']
n_per_chain = monInfo['L']//binSize
n_chains = 30
n_chains_per_sphere = 10

mon_id_tmp = monInfo['compartment_ID']
mon_id = np.array([mon_id_tmp[i] for i in range(0, L, binSize)])

A_self_attr = [0.00, 0.025, 0.0375, 0.05, 0.075, 0.1]
B_self_attr = A_self_attr
X_self_attr = [0.00, 0.05]

In [5]:
# zoomify all of the matrices
for AA in A_self_attr:
    for BB in B_self_attr:
        for XX in X_self_attr:
            comp_dir = f'AA{AA:.2f}_BB{BB:.2f}_XX{XX:.2f}'
            sim = f'{comp_dir}__{sim_dir}'   
            
            sim_path = f'{base_path}/compartments_only/sweep_output/{comp_dir}/{sim_dir}'
            sim = f'{comp_dir}__{sim_dir}'
            hmap_fh = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-{binSize}_IC_chainMap.npy'

            if os.path.exists(f'{sim_path}/results/heatmaps/{hmap_fh}'):
                continue
            else:
                hmap_fh_unbinned = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-1_IC_chainMap.npy'
                if os.path.exists(f'{sim_path}/results/heatmaps/{hmap_fh_unbinned}'):
                    mat = np.load(f'{sim_path}/results/heatmaps/{hmap_fh_unbinned}')
                    mat = mat/np.mean(np.diag(mat,k=1))
                    mat_binned = numutils.zoom_array(mat, (L/binSize,L/binSize))

                    save_fh = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-{binSize}_IC_chainMap.npy'
                    with open(f'{sim_path}/results/heatmaps/{save_fh}', 'wb') as f:
                        np.save(f, mat_binned)

In [6]:
# Taking expected from neutral chain
Ps_comp_dir = f'AA{0:.2f}_BB{0:.2f}_XX{0:.2f}'
Ps_sim_dir = 'SC-10pol'
Ps_sim_path = f'{base_path}/compartments_only/sweep_output/{Ps_comp_dir}/{Ps_sim_dir}'
Ps_fh = f'{Ps_comp_dir}__{Ps_sim_dir}__cutoff{cutoff_rad:04.1f}.txt'
Ps = pd.read_csv(f'{Ps_sim_path}/results/Ps_scaling/{Ps_fh}', sep='\t')

x, y = simutils.interpolate_Ps(Ps['dist'].values, Ps['Ps'].values, L)
y = y/chroms
y = y/y[1]
exp_2bin = numutils.LazyToeplitz(y)
exp = numutils.zoom_array(exp_2bin[:,:], (n_per_chain,n_per_chain))

  x = np.log10(bins)
  y = np.log10(Ps)
  interp_x = np.log10(np.arange(last_bin))


In [7]:
for AA in A_self_attr:
    for BB in B_self_attr:
        for XX in X_self_attr:
            comp_dir = f'AA{AA:.2f}_BB{BB:.2f}_XX{XX:.2f}'
            sim = f'{comp_dir}__{sim_dir}'   
            
            sim_path = f'{base_path}/compartments_only/sweep_output/{comp_dir}/{sim_dir}'
            sim = f'{comp_dir}__{sim_dir}'
            ooe_fh = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-{binSize}_IC_OOE_chainMap.npy'

            if os.path.exists(f'{sim_path}/results/heatmaps/{ooe_fh}'):
                continue
            else:
                hmap_fh = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-{binSize}_IC_chainMap.npy'
                if os.path.exists(f'{sim_path}/results/heatmaps/{hmap_fh}'):
                    mat = np.load(f'{sim_path}/results/heatmaps/{hmap_fh}')
                    mat = mat/np.mean(np.diag(mat,k=1))

                    ooe = mat/exp[:,:]

                    with open(f'{sim_path}/results/heatmaps/{ooe_fh}', 'wb') as f:
                        np.save(f, ooe)

In [8]:
comp_sizes = np.diff(np.concatenate([np.array([0]), np.where(np.diff(mon_id) != 0)[0]]))
biggest_comp = max(comp_sizes)

mtx_fh = f'mean_ixns.npy'
for AA in A_self_attr:
    for BB in B_self_attr:
        for XX in X_self_attr:
            comp_dir = f'AA{AA:.2f}_BB{BB:.2f}_XX{XX:.2f}'
            sim = f'{comp_dir}__{sim_dir}'   
            
            sim_path = f'{base_path}/compartments_only/sweep_output/{comp_dir}/{sim_dir}'
            sim = f'{comp_dir}__{sim_dir}'   
        
            ooe_fh = f'{sim}__cutoff-{cutoff_rad:04.1f}_binSize-{binSize}_IC_OOE_chainMap.npy'

            try:
                ooe = np.load(f'{sim_path}/results/heatmaps/{ooe_fh}')
            except FileNotFoundError:
                continue
            if os.path.exists(f'{sim_path}/results/heatmaps/{mtx_fh}'):
                print(f'mean interactions for {sim} already exist')
                continue
            else:
                print(f'computing ixns for {sim_path}')
                mean_ixns = calc_ixns(subcomps_coarsened=mon_id, ooe=ooe, n_diags=[2,biggest_comp])
                with open(f'{sim_path}/results/heatmaps/{mtx_fh}', 'wb') as o:
                    np.save(o, mean_ixns)   

mean interactions for AA0.00_BB0.00_XX0.00__SC-10pol already exist
mean interactions for AA0.00_BB0.00_XX0.03__SC-10pol already exist
mean interactions for AA0.00_BB0.00_XX0.05__SC-10pol already exist
mean interactions for AA0.00_BB0.03_XX0.00__SC-10pol already exist
mean interactions for AA0.00_BB0.03_XX0.03__SC-10pol already exist
mean interactions for AA0.00_BB0.03_XX0.05__SC-10pol already exist
mean interactions for AA0.00_BB0.04_XX0.00__SC-10pol already exist
mean interactions for AA0.00_BB0.04_XX0.03__SC-10pol already exist
mean interactions for AA0.00_BB0.04_XX0.05__SC-10pol already exist
mean interactions for AA0.00_BB0.05_XX0.00__SC-10pol already exist
mean interactions for AA0.00_BB0.05_XX0.03__SC-10pol already exist
mean interactions for AA0.00_BB0.05_XX0.05__SC-10pol already exist
mean interactions for AA0.00_BB0.07_XX0.00__SC-10pol already exist
mean interactions for AA0.00_BB0.07_XX0.03__SC-10pol already exist
mean interactions for AA0.00_BB0.07_XX0.05__SC-10pol already e