In [1]:
import numpy as np
import sys
import pandas as pd
sys.path.append('../')
import matplotlib.pyplot as plt
from scipy import signal

from ALT_F_BIND_chromo_vertex_nuclear_competitive_2_density import eval_f_bind_competitive_2, eval_f_bind_competitive_arr_2

In [2]:
def calc_s_bind_comp(ws, mus, f_binds):
    f_bind_10, f_bind_01, f_bind_20, f_bind_02, f_bind_11 = f_binds
    mu_1, mu_2 = mus
    # sigma1, sigma2 = sigmas
    w_1, w_2 = ws

    # boltzmann probability of each binding state [s_HP1, s_PRC1]
    p_00 = 1
    p_10 = np.exp(-f_bind_10 + mu_1*1 - w_1*1) 
    p_01 = np.exp(-f_bind_01 + mu_2*1 - w_2*1)
    p_20 = np.exp(-f_bind_20 + mu_1*2 - w_1*2)
    p_02 = np.exp(-f_bind_02 + mu_2*2 - w_2*2)
    p_11 = np.exp(-f_bind_10 + mu_1*1 - w_1*1  - f_bind_01 + mu_2*1 - w_2*1)
    
    q_bind = p_00 + p_01 + p_10 + p_02 + p_20 + p_11 #1. + np.exp(-eps * sigma + mu - w)
    s_bind1 = (1*p_10 + 2*p_20 + 1*p_11) / q_bind #(f_bind_20 + mu_1 - w_1) + 1*(f_bind_11 + mu_1 - w_1 + mu_2 - w_2) / q_bind
    s_bind2 = (1*p_01 + 2*p_02 + 1*p_11) / q_bind #np.exp(1*(f_bind_01 + mu_2 - w_2) + 2*(f_bind_02 + mu_2 - w_2) + 1(*f_bind_11 + mu_1 - w_1 + mu_2 - w_2) / q_bind

    s_binds = [s_bind1, s_bind2]
    return s_binds

In [3]:
# x: genomic distance
# a = 2.6065
# a = 2.6075, mu = -4.8, v_int = -2 : get bridging
def calc_p_loop(x, a=1):
    
    p_loop = a / np.abs(x) ** (1.5)
    
    return p_loop

# Self-consistent binding state solution 

In [40]:
%%time
# 2 marks competitive, no for loop

# mus = [-4.8,-4.8]
mus = [-4.8,-4.8]
# eps = -2
# j = -2

# sigma_1 = np.random.randint(1, size = n_nuc)
# sigma_1[20:40] = 2
# sigma_1[60:80] = 2

# sigma_2 = np.random.randint(1, size = n_nuc)
# sigma_2[40:60] = 2
# sigma_2[80:100] = 2

sigma_1 = np.loadtxt("HNCFF683HCZ_H3K9me3_methyl.txt")
sigma_2 = np.loadtxt("ENCFF919DOR_H3K27me3_methyl.txt")
sigmas = [sigma_1, sigma_2]

n_nuc = len(sigma_1)
n_iter = 100

e_m = [1.52, 1.52]
v_int = np.array([[-4,0],[0,-4]])
# e_m = [2, 2]
# v_int = np.array([[-2,0],[0,-2]])

w_1 = np.zeros(n_nuc)
w_2 = np.zeros(n_nuc)
ws = [w_1, w_2]
ws_old = np.copy(ws)


f_bind_10 = eval_f_bind_competitive_arr_2([1,0], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
f_bind_01 = eval_f_bind_competitive_arr_2([0,1], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
f_bind_20 = eval_f_bind_competitive_arr_2([2,0], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
f_bind_02 = eval_f_bind_competitive_arr_2([0,2], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
f_bind_11 = eval_f_bind_competitive_arr_2([1,1], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
f_binds_comp = [f_bind_10, f_bind_01, f_bind_20, f_bind_02, f_bind_11]

print("convergence:")
for i in range(n_iter):
    # convergence test
    if (i != 0 and (i%10 == 0)) or (i==1) or (i==n_iter-1):
        ws_1_conv = np.sum(np.abs(ws[0] - ws_old[0]))
        ws_2_conv = np.sum(np.abs(ws[1] - ws_old[1]))
        # print(np.abs(np.sum(ws[0] - ws_old[0])))
        # print(np.abs(np.sum(ws[1] - ws_old[1])))
        print(ws_1_conv + ws_2_conv)
        
    ws_old = np.copy(ws)
    s_old = np.copy(s_arr)
    
    s_arr = calc_s_bind_comp(ws, mus, f_binds_comp) # calcultae binding state of HP1 and PRC1
    # s = calc_s_bind(w, sigma, eps, mu)

    # calculate array of looping probabilitys by nucleosome, with padding for convolution
    i = 0
    p_loop_ini = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i)))
    p_loop_reflect = np.concatenate((p_loop_ini[1:][::-1], p_loop_ini))

    # calculate part of interaction energy
    p_s_sum1 = signal.convolve(s_arr[0], p_loop_reflect, mode = "valid") # equivalent to [np.sum(p_loop * s_arr[0])] for i in range (n_nuc)
    p_s_sum2 = signal.convolve(s_arr[1], p_loop_reflect, mode = "valid") # equivalent to [np.sum(p_loop * s_arr[1])] for i in range (n_nuc)

    # calculate mean-field protein conjugate field via saddle point equation
    w_1_fast = v_int[0,0] * p_s_sum1 +  v_int[0,1] * p_s_sum2
    w_2_fast = v_int[1,1] * p_s_sum2 +  v_int[0,1] * p_s_sum1
    ws = [w_1_fast, w_2_fast]

convergence:
234947.45490884638
735365.6626429909
100924.01397062656
50802.69670371679
29571.036149145017
17946.062755618957
21186.841346101162
14371.660687863223
28472.12726103305
9131.82627598481
7936.161819989586
CPU times: total: 17.7 s
Wall time: 37 s


In [42]:
print(np.sum(np.abs(s_arr[0] - s_old[0]))) # change in binding at last iteration
print(np.sum(np.abs(s_arr[1] - s_old[1])))
print(len(s_arr[0]))

344.9788955164958
175.2046132449968
451692


In [48]:
np.outer(s_arr[0], s_arr[1])

MemoryError: Unable to allocate 1.48 TiB for an array with shape (451692, 451692) and data type float64

# Co-occupancy maps

### Coarse grain/ average binding states

In [60]:
len(s_arr[0])

451692

In [74]:
test = np.arange(1,13,1)

np.mean(test.reshape(-1, 3), axis=1)

array([ 2.,  5.,  8., 11.])

In [100]:
s_1, s_2 = s_arr
factor = 36
s_1_avgd = np.mean(s_1.reshape(-1, factor), axis=1)
s_2_avgd = np.mean(s_2.reshape(-1, factor), axis=1)

co_occ_raw = np.outer(s_1_avgd, s_2_avgd)

In [104]:
np.min(co_occ_raw)

9.404171006239338e-40

In [None]:
# QUESTION: why is final s_bind ("s") this way? middle unmarked section has binding
plt.figure()
font = {'family' : 'serif',
        'weight':'normal',
        'size': 18}
plt.rc('font', **font)

plt.plot(s_arr[0], 'bo-', label = "$s^{(1)HP1}_i$")
# plt.plot(s_arr[1], 'ro-', label = "$s^{(2)PRC1}_i$")

plt.plot(sigma_1, 'ko-', label = "$\sigma^{(1)H3K9me3}_i$")
# plt.plot(sigma_2, 'go-', label = "$\sigma^{(2)H3K27me3}_i$")
plt.xlabel("nucleosome postition")
plt.ylabel("#")
plt.legend()

plt.show()

In [None]:
# TODO
# compare to andy result DONE
# OPTIMIZE- change form of looping probabililty to only consider 200 nearest? DONE
# confrim convergence of ws DONE

# Co-occupancy map. 
        # compress results- average binding of every n beads DONE
        # determine threshold (different phase behvior at low mu both, mow mu one, low mu other
        # compare to chromo? understand phase behavior?
# generate 2d (average) density maps vs mu? to compare with theory

# look at different v_int systems - understand relationship between mu, v_int, and a (looping prob magnitude)


# Andys work

In [None]:
# single nucleosome binding partition function
def calc_q_bind(w, sigma, eps = 0, mu = 0):
    q_bind = 1. + np.exp(-eps * sigma + mu - w)
    
    return q_bind

In [None]:
# average s_bind
# sigma: mark state!
# QUESTION what is epsilon (binding param?)
def calc_s_bind(w, sigma, eps = 0, mu = 0):
    q_bind = 1. + np.exp(-eps * sigma + mu - w)
    s_bind = np.exp(-eps * sigma + mu - w) / q_bind
    
    return s_bind



In [None]:
# x: genomic distance
def calc_p_loop(x, a = 1):
    
    p_loop = a / np.abs(x) ** (1.5)
    
    return p_loop

In [None]:
n_nuc = 100
n_iter = 70

w = np.zeros(n_nuc)

mu = -4.8
eps = -2
sigma = np.random.randint(1, size = n_nuc)
sigma[20:40] = 1
sigma[60:80] = 1
j = -2

for i in range(n_iter):
    s = calc_s_bind(w, sigma, eps, mu)
    for i in range(n_nuc):
        p_loop = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i))) 
        # gives array of looping probs centered at ith nucleosome (at which probability = 0) 
        w[i] = j * np.sum(p_loop * s)
        # QUESTION: why is this the expression for w? saddle point equation?

In [None]:
# ANDY with convergence test
n_nuc = 100
n_iter = 70

w = np.zeros(n_nuc)
w_old = np.zeros(n_nuc)

mu = -4.8
eps = -2
sigma = np.random.randint(1, size = n_nuc)
sigma[20:40] = 1
sigma[60:80] = 1
j = -2

for i in range(n_iter):
    print("convergence test:")
    # print("w: ", w[0:10])
    # print("w_old: ", w_old[0:10])    
    print(np.sum(w-w_old))
    s = calc_s_bind(w, sigma, eps, mu)
    w_old = np.copy(w)
    for i in range(n_nuc):
        p_loop = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i))) 
        # gives array of looping probs centered at ith nucleosome (at which probability = 0) 
        w[i] = j * np.sum(p_loop * s)
        # QUESTION: why is this the expression for w? saddle point equation?

In [None]:
sigma

In [None]:
# QUESTION: why is final s_bind ("s") this way? middle unmarked section has binding
plt.figure()
font = {'family' : 'serif',
        'weight':'normal',
        'size': 18}
plt.rc('font', **font)

plt.plot(s, 'ko-')
plt.plot(sigma, 'bo-')

plt.show()

In [None]:
n_nuc = 10000
n_iter = 20

w = np.zeros(n_nuc)

mu = -4.5
eps = -2
sigma = np.random.randint(2, size = n_nuc)
sigma[40:60] = 1
j = -2

for i in range(n_iter):
    s = calc_s_bind(w, sigma, eps, mu)
    for i in range(n_nuc):
        p_loop = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i)))
        w[i] = j * np.sum(p_loop * s)


In [None]:
plt.figure()
font = {'family' : 'serif',
        'weight':'normal',
        'size': 18}
plt.rc('font', **font)

plt.plot(s, 'ko-')
plt.plot(sigma, 'bo-')

plt.show()

# Old efficiency testing code

In [None]:


# # TESTING for efficiency


# # 2 marks competitive

# n_nuc = 4
# n_iter = 1

# w_1 = np.zeros(n_nuc)
# w_2 = np.zeros(n_nuc)
# ws = [w_1, w_2]

# # mus = [-4.8,-4.8]
# mus = [-4.8,-10]
# # eps = -2
# # j = -2

# sigma_1 = [0,2,1,2]#np.random.randint(1, size = n_nuc)
# # sigma_1[2:4] = 2
# # sigma_1[60:80] = 2

# sigma_2 = np.random.randint(1, size = n_nuc)
# # sigma_2[40:60] = 2
# # sigma_2[80:100] = 2

# sigmas = [sigma_1, sigma_2]

# # e_m = [1.52, 1.52]
# # v_int = np.array([[-4,0],[0,-4]])

# e_m = [2, 2]
# v_int = np.array([[-2,0],[0,-2]])

# f_bind_10 = eval_f_bind_competitive_arr_2([1,0], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
# f_bind_01 = eval_f_bind_competitive_arr_2([0,1], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
# f_bind_20 = eval_f_bind_competitive_arr_2([2,0], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
# f_bind_02 = eval_f_bind_competitive_arr_2([0,2], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
# f_bind_11 = eval_f_bind_competitive_arr_2([1,1], sigma_1, sigma_2, e_m, v_int[0, 0],v_int[0, 1],v_int[1, 1])
# f_binds_comp = [f_bind_10, f_bind_01, f_bind_20, f_bind_02, f_bind_11]

# p_loop_initial = None

# for i in range(n_iter):
#     s_arr = calc_s_bind_comp(ws, mus, f_binds_comp) # binding state of HP1 and PRC1
#     # s = calc_s_bind(w, sigma, eps, mu)
    
#     n_nuc = 4
#     for i in range(n_nuc):
#         p_loop = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i)))
#         if i == 0:
#             p_loop_initial = p_loop
#         print("-------------------------")
#         print("p_loop:", p_loop)
#         print("s_bind:", s_arr[0])
#         print("sum(s_bind*p_loop):", np.sum(s_arr[0]*p_loop))
#         print("conv:", signal.convolve(s_arr[0], p_loop, mode = "valid"))
#         w_1[i] = v_int[0,0] * np.sum(p_loop * s_arr[0]) +  v_int[0,1] * np.sum(p_loop * s_arr[1])
#         w_2[i] = v_int[1,1] * np.sum(p_loop * s_arr[1]) +  v_int[0,1] * np.sum(p_loop * s_arr[0])
#         w_s = [w_1, w_2]

#         p_loop_reflect = np.concatenate((p_loop_initial[1:][::-1], p_loop_initial))
#         print("conv reflect:", signal.convolve(s_arr[0], p_loop_reflect, mode = "valid"))        
#         # print("sum(s_bind*p_loop):", np.sum(s_arr[0]*p_loop))
# print("w1:", w_1)
# print("w2:", w_2)

# i = 0
# p_loop_ini = np.concatenate((calc_p_loop(np.arange(0,i)-i),np.array([0]),calc_p_loop(np.arange(i + 1, n_nuc)-i)))
# p_loop_reflect = np.concatenate((p_loop_ini[1:][::-1], p_loop_ini))

# p_s_sum1 = signal.convolve(s_arr[0], p_loop_reflect, mode = "valid") # equivalent to [np.sum(p_loop * s_arr[0])] for i in range (n_nuc)
# p_s_sum2 = signal.convolve(s_arr[1], p_loop_reflect, mode = "valid") # equivalent to [np.sum(p_loop * s_arr[1])] for i in range (n_nuc)
# w_1_fast = v_int[0,0] * p_s_sum1 +  v_int[0,1] * p_s_sum2
# w_2_fast = v_int[1,1] * p_s_sum2 +  v_int[0,1] * p_s_sum1

# print("new w1:", w_1_fast)
# print("new w2:", w_2_fast)
